diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 4ea56fd86690a783836cce92ae46e951b12f99fa..aef980604cf674c3ddfec95d2c0bdbb7a14c7cd9 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -10,18 +10,18 @@ set(arbor_sources
     builtin_mechanisms.cpp
     cell_group_factory.cpp
     common_types_io.cpp
-    mc_cell.cpp
+    local_alloc.cpp
     event_binner.cpp
     fvm_layout.cpp
     fvm_lowered_cell_impl.cpp
     hardware/affinity.cpp
-    hardware/gpu.cpp
     hardware/memory.cpp
     hardware/node_info.cpp
     hardware/power.cpp
     io/locked_ostream.cpp
     io/serialize_hex.cpp
     lif_cell_group.cpp
+    mc_cell.cpp
     mc_cell_group.cpp
     mechcat.cpp
     memory/cuda_wrappers.cpp
@@ -43,7 +43,6 @@ set(arbor_sources
     threading/threading.cpp
     thread_private_spike_store.cpp
     util/hostname.cpp
-    util/path.cpp
     util/unwind.cpp
     version.cpp
 )
diff --git a/arbor/backends/gpu/matrix_state_interleaved.hpp b/arbor/backends/gpu/matrix_state_interleaved.hpp
index 0cb2aac84fde5f55f2158e9415c54f50122fa155..2ac1989e37c79e9c5607137f073381f90afa6e1d 100644
--- a/arbor/backends/gpu/matrix_state_interleaved.hpp
+++ b/arbor/backends/gpu/matrix_state_interleaved.hpp
@@ -2,8 +2,8 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 
-#include "math.hpp"
 #include "memory/memory.hpp"
 #include "util/span.hpp"
 #include "util/partition.hpp"
diff --git a/arbor/backends/gpu/mechanism.cpp b/arbor/backends/gpu/mechanism.cpp
index cb57c95ae07ae8af312aae225d54d01abe866f00..e545e791806f8bef9e21a8d7c48e2fcf8a962d87 100644
--- a/arbor/backends/gpu/mechanism.cpp
+++ b/arbor/backends/gpu/mechanism.cpp
@@ -8,10 +8,10 @@
 #include <arbor/arbexcept.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mechanism.hpp>
 #include <arbor/util/optional.hpp>
 
-#include "math.hpp"
 #include "memory/memory.hpp"
 #include "util/index_into.hpp"
 #include "util/maputil.hpp"
diff --git a/arbor/backends/multicore/mechanism.cpp b/arbor/backends/multicore/mechanism.cpp
index a5b22f3e54631b3e5c75e1df5ae26185aed5635e..81c48e8f890c1ec92d2ac7f41d57e8a728267e5c 100644
--- a/arbor/backends/multicore/mechanism.cpp
+++ b/arbor/backends/multicore/mechanism.cpp
@@ -7,10 +7,10 @@
 
 #include <arbor/fvm_types.hpp>
 #include <arbor/common_types.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mechanism.hpp>
 #include <arbor/util/optional.hpp>
 
-#include "math.hpp"
 #include "util/index_into.hpp"
 #include "util/maputil.hpp"
 #include "util/padded_alloc.hpp"
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index 2af16850824150d3214050d01dfe41fdf6310e80..6cb520ef3e0cd4843bd2eb23f92a350a1e5c3631 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -6,15 +6,15 @@
 #include <vector>
 
 #include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
 #include <arbor/constants.hpp>
 #include <arbor/fvm_types.hpp>
-#include <arbor/common_types.hpp>
 #include <arbor/ion.hpp>
+#include <arbor/math.hpp>
 #include <arbor/simd/simd.hpp>
 
 #include "backends/event.hpp"
 #include "io/sepval.hpp"
-#include "math.hpp"
 #include "util/padded_alloc.hpp"
 #include "util/rangeutil.hpp"
 
diff --git a/arbor/backends/multicore/threshold_watcher.hpp b/arbor/backends/multicore/threshold_watcher.hpp
index cc5b741691b14ea63fbf66e90f072ae816dafd7f..dcb38168c55025516af752ae14d69e4e49895bc1 100644
--- a/arbor/backends/multicore/threshold_watcher.hpp
+++ b/arbor/backends/multicore/threshold_watcher.hpp
@@ -2,10 +2,9 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 
 #include "backends/threshold_crossing.hpp"
-#include "math.hpp"
-
 #include "multicore_common.hpp"
 
 namespace arb {
diff --git a/arbor/benchmark_cell_group.cpp b/arbor/benchmark_cell_group.cpp
index 0556045dea9a2abbd5afa1ad9c6beae64bd5e073..2da67ef0b148fc38fc0f29092df5a9552de1474b 100644
--- a/arbor/benchmark_cell_group.cpp
+++ b/arbor/benchmark_cell_group.cpp
@@ -13,9 +13,9 @@
 
 namespace arb {
 
-benchmark_cell_group::benchmark_cell_group(std::vector<cell_gid_type> gids,
+benchmark_cell_group::benchmark_cell_group(const std::vector<cell_gid_type>& gids,
                                            const recipe& rec):
-    gids_(std::move(gids))
+    gids_(gids)
 {
     cells_.reserve(gids_.size());
     for (auto gid: gids_) {
diff --git a/arbor/benchmark_cell_group.hpp b/arbor/benchmark_cell_group.hpp
index 02fee8d51758043616848e4398e149e81ad21008..f915eed0752b1b5704bae6c50f2023294a6b15f6 100644
--- a/arbor/benchmark_cell_group.hpp
+++ b/arbor/benchmark_cell_group.hpp
@@ -13,7 +13,7 @@ namespace arb {
 
 class benchmark_cell_group: public cell_group {
 public:
-    benchmark_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
+    benchmark_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec);
 
     cell_kind get_cell_kind() const override;
 
diff --git a/arbor/cell_group.hpp b/arbor/cell_group.hpp
index 98cf20ab102d43dcce888f76408b724386733f89..a77d720eed9143a63c4e74a78cad28fc76ab9f90 100644
--- a/arbor/cell_group.hpp
+++ b/arbor/cell_group.hpp
@@ -42,9 +42,4 @@ public:
 
 using cell_group_ptr = std::unique_ptr<cell_group>;
 
-template <typename T, typename... Args>
-cell_group_ptr make_cell_group(Args&&... args) {
-    return cell_group_ptr(new T(std::forward<Args>(args)...));
-}
-
 } // namespace arb
diff --git a/arbor/cell_group_factory.cpp b/arbor/cell_group_factory.cpp
index 54e343eaf3a04e565df3949d3ac50c495033112b..d82bada037059be7f98400e815e4dd2e2eeeb93c 100644
--- a/arbor/cell_group_factory.cpp
+++ b/arbor/cell_group_factory.cpp
@@ -1,12 +1,11 @@
 #include <vector>
 
-#include <arbor/arbexcept.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/domain_decomposition.hpp>
 #include <arbor/recipe.hpp>
 
 #include "benchmark_cell_group.hpp"
 #include "cell_group.hpp"
+#include "cell_group_factory.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "lif_cell_group.hpp"
 #include "mc_cell_group.hpp"
@@ -14,23 +13,45 @@
 
 namespace arb {
 
-cell_group_ptr cell_group_factory(const recipe& rec, const group_description& group) {
-    switch (group.kind) {
+template <typename Impl, typename... Args>
+cell_group_ptr make_cell_group(Args&&... args) {
+    return cell_group_ptr(new Impl(std::forward<Args>(args)...));
+}
+
+cell_group_factory cell_kind_implementation(cell_kind ck, backend_kind bk) {
+    using gid_vector = std::vector<cell_gid_type>;
+
+    switch (ck) {
     case cell_kind::cable1d_neuron:
-        return make_cell_group<mc_cell_group>(group.gids, rec, make_fvm_lowered_cell(group.backend));
+        return [bk](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<mc_cell_group>(gids, rec, make_fvm_lowered_cell(bk));
+        };
 
     case cell_kind::spike_source:
-        return make_cell_group<spike_source_cell_group>(group.gids, rec);
+        if (bk!=backend_kind::multicore) break;
+
+        return [](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<spike_source_cell_group>(gids, rec);
+        };
 
     case cell_kind::lif_neuron:
-        return make_cell_group<lif_cell_group>(group.gids, rec);
+        if (bk!=backend_kind::multicore) break;
+
+        return [](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<lif_cell_group>(gids, rec);
+        };
 
     case cell_kind::benchmark:
-        return make_cell_group<benchmark_cell_group>(group.gids, rec);
+        if (bk!=backend_kind::multicore) break;
+
+        return [](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<benchmark_cell_group>(gids, rec);
+        };
 
-    default:
-        throw arbor_internal_error("cell_group_factory: unknown cell kind");
+    default: ;
     }
+
+    return cell_group_factory{}; // empty function => not supported
 }
 
 } // namespace arb
diff --git a/arbor/cell_group_factory.cu b/arbor/cell_group_factory.cu
deleted file mode 100644
index 21d20e7b9f18c2c1028cfe4e5391f2b463d106a9..0000000000000000000000000000000000000000
--- a/arbor/cell_group_factory.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "cell_group_factory.cpp"
diff --git a/arbor/cell_group_factory.hpp b/arbor/cell_group_factory.hpp
index 320e4ff531aad22074cee94d569bc5a9d9c60942..47c476e52c2f2d3eeb572fee22774a546bffbf39 100644
--- a/arbor/cell_group_factory.hpp
+++ b/arbor/cell_group_factory.hpp
@@ -1,13 +1,27 @@
 #pragma once
 
-#include <arbor/domain_decomposition.hpp>
+// Provide a map from cell group kinds and execution back-end to implementation,
+// as represented by a `cell_group_factory` function wrapper below.
+//
+// An empty function implies there is no support for that cell kind on that
+// back-end.
+
+#include <functional>
+#include <vector>
+
+#include <arbor/common_types.hpp>
 #include <arbor/recipe.hpp>
 
 #include "cell_group.hpp"
 
 namespace arb {
 
-// Helper factory for building cell groups
-cell_group_ptr cell_group_factory(const recipe& rec, const group_description& group);
+using cell_group_factory = std::function<cell_group_ptr (const std::vector<cell_gid_type>&, const recipe&)>;
+
+cell_group_factory cell_kind_implementation(cell_kind, backend_kind);
+
+inline bool cell_kind_supported(cell_kind c, backend_kind b) {
+    return static_cast<bool>(cell_kind_implementation(c, b));
+}
 
 } // namespace arb
diff --git a/arbor/fvm_compartment.hpp b/arbor/fvm_compartment.hpp
index 6361c5117a48d98112200a6bccead430cd4df4a1..0d626328764637a0ef36ba31190c08017d68aa49 100644
--- a/arbor/fvm_compartment.hpp
+++ b/arbor/fvm_compartment.hpp
@@ -4,10 +4,10 @@
 #include <utility>
 
 #include <arbor/common_types.hpp>
+#include <arbor/math.hpp>
 #include <arbor/util/compat.hpp>
 
 #include "algorithms.hpp"
-#include "math.hpp"
 #include "util/iterutil.hpp"
 #include "util/partition.hpp"
 #include "util/rangeutil.hpp"
diff --git a/arbor/hardware/affinity.cpp b/arbor/hardware/affinity.cpp
index 29ef7ee428f93041afb4912606fb8ed3cc59939b..73e2762f856e4cc4481b307cec475cd876b67e19 100644
--- a/arbor/hardware/affinity.cpp
+++ b/arbor/hardware/affinity.cpp
@@ -1,63 +1,52 @@
 #include <cstdlib>
+#include <system_error>
 #include <vector>
 
-#include <arbor/util/optional.hpp>
-
 #ifdef __linux__
 
-    #ifndef _GNU_SOURCE
-        #define _GNU_SOURCE
-    #endif
-
-    extern "C" {
-        #include <sched.h>
-    }
-
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
 #endif
 
+extern "C" {
+#include <sched.h>
+}
+
 namespace arb {
 namespace hw {
 
-#ifdef __linux__
 std::vector<int> get_affinity() {
+    std::vector<int> cores;
     cpu_set_t cpu_set_mask;
 
-    auto status = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_mask);
-
-    if(status==-1) {
-        return {};
+    int status = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_mask);
+    if (status) {
+        throw std::system_error(errno, std::generic_category());
     }
 
-    unsigned cpu_count = CPU_COUNT(&cpu_set_mask);
-
-    std::vector<int> cores;
-    for(auto i=0; i<CPU_SETSIZE && cores.size()<cpu_count; ++i) {
-        if(CPU_ISSET(i, &cpu_set_mask)) {
+    for (int i=0; i<CPU_SETSIZE; ++i) {
+        if (CPU_ISSET(i, &cpu_set_mask)) {
             cores.push_back(i);
         }
     }
 
-    if(cores.size() != cpu_count) {
-        return {};
-    }
-
     return cores;
 }
-#else
 
-// No support for non-linux systems
+} // namespace hw
+} // namespace arb
+
+#else // def __linux__
+
+// No support for non-linux systems.
+namespace arb {
+namespace hw {
+
 std::vector<int> get_affinity() {
     return {};
 }
-#endif
-
-util::optional<std::size_t> num_cores() {
-    auto cores = get_affinity();
-    if (cores.size()==0u) {
-        return util::nullopt;
-    }
-    return cores.size();
-}
 
 } // namespace hw
 } // namespace arb
+
+#endif // def __linux__
diff --git a/arbor/hardware/affinity.hpp b/arbor/hardware/affinity.hpp
index 8c5cbc59db3f3213eb5a1b6337bf83bcac14b5f1..db6c8f6bab6daacb3943ce3eb0a7ac97d884d16f 100644
--- a/arbor/hardware/affinity.hpp
+++ b/arbor/hardware/affinity.hpp
@@ -3,12 +3,10 @@
 #include <cstdint>
 #include <vector>
 
-#include <arbor/util/optional.hpp>
-
 namespace arb {
 namespace hw {
 
-// The list of cores for which the calling thread has affinity.
+// The list of logical processors for which the calling thread has affinity.
 // If calling from the main thread at application start up, before
 // attempting to change thread affinity, may produce unreliable
 // results.
@@ -19,12 +17,5 @@ namespace hw {
 // available cores.
 std::vector<int> get_affinity();
 
-// Attempts to find the number of cores available to the application
-// This is likely to give inaccurate results if the caller has already
-// been playing with thread affinity.
-//
-// Returns 0 if unable to determine the number of cores.
-util::optional<std::size_t> num_cores();
-
 } // namespace util
 } // namespace arb
diff --git a/arbor/hardware/gpu.cpp b/arbor/hardware/gpu.cpp
deleted file mode 100644
index bd5f2584acf898a1b93dc4ba2396fbe1581a6ce5..0000000000000000000000000000000000000000
--- a/arbor/hardware/gpu.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifdef ARB_HAVE_GPU
-    #include <cuda_runtime.h>
-#endif
-
-namespace arb {
-namespace hw {
-
-#ifdef ARB_HAVE_GPU
-unsigned num_gpus() {
-    int n;
-    cudaGetDeviceCount(&n);
-    return n;
-}
-#else
-unsigned num_gpus() {
-    return 0u;
-}
-#endif
-
-} // namespace hw
-} // namespace arb
diff --git a/arbor/hardware/gpu.hpp b/arbor/hardware/gpu.hpp
deleted file mode 100644
index 78b8ea807c3b772be81c344989792f0d2e85667c..0000000000000000000000000000000000000000
--- a/arbor/hardware/gpu.hpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma once
-
-namespace arb {
-namespace hw {
-
-unsigned num_gpus();
-
-} // namespace hw
-} // namespace arb
diff --git a/arbor/hardware/node_info.cpp b/arbor/hardware/node_info.cpp
index 354a697dfb5b3aa5e1c1cfaf9b7d55b31a47b9ef..095c26b01fc8c30e52450570951366508cdf1a28 100644
--- a/arbor/hardware/node_info.cpp
+++ b/arbor/hardware/node_info.cpp
@@ -1,19 +1,52 @@
-#include <algorithm>
+#include <thread>
+
+#ifdef ARB_HAVE_GPU
+#include <cuda_runtime.h>
+#endif
+
+// TODO: C++17 use __has_include(<unistd.h>)
+#if defined(__unix__) || defined(__APPLE__) && defined(__MACH__)
+#include <unistd.h>
+#endif
 
 #include "affinity.hpp"
-#include "gpu.hpp"
 #include "node_info.hpp"
 
 namespace arb {
 namespace hw {
 
-// Return a node_info that describes the hardware resources available on this node.
-// If unable to determine the number of available cores, assumes that there is one
-// core available.
-node_info get_node_info() {
-    auto res = num_cores();
-    unsigned ncpu = res? *res: 1u;
-    return {ncpu, num_gpus()};
+
+unsigned node_gpus() {
+#ifdef ARB_HAVE_GPU
+    int n;
+    if (cudaGetDeviceCount(&n)==cudaSuccess) {
+        return (unsigned)(n);
+    }
+#endif
+
+    return 0;
+}
+
+unsigned node_processors() {
+    // Attempt to get count first from affinity information if available.
+    unsigned n = get_affinity().size();
+
+    // If no luck, try sysconf.
+#ifdef _SC_NPROCESSORS_ONLN
+    if (!n) {
+        long r = sysconf(_SC_NPROCESSORS_ONLN);
+        if (r>0) {
+            n = (unsigned)r;
+        }
+    }
+#endif
+
+    // If still zero, try the hint from the library.
+    if (!n) {
+        n = std::thread::hardware_concurrency();
+    }
+
+    return n;
 }
 
 } // namespace util
diff --git a/arbor/hardware/node_info.hpp b/arbor/hardware/node_info.hpp
index f83a2caf000c37d791ef8a477e2703d481c94cdd..0452bdd4f62d0b23b08ecb58781c2c45bb69ccf1 100644
--- a/arbor/hardware/node_info.hpp
+++ b/arbor/hardware/node_info.hpp
@@ -3,20 +3,12 @@
 namespace arb {
 namespace hw {
 
-// Information about the computational resources available on a compute node.
-// Currently a simple enumeration of the number of cpu cores and gpus, which
-// will become richer.
-struct node_info {
-    node_info() = default;
-    node_info(unsigned c, unsigned g):
-        num_cpu_cores(c), num_gpus(g)
-    {}
+// Number of GPUs detected on the node.
+unsigned node_gpus();
 
-    unsigned num_cpu_cores = 1;
-    unsigned num_gpus = 0;
-};
+// Number of visible logical processors on the node.
+// 0 => unable to determine.
+unsigned node_processors();
 
-node_info get_node_info();
-
-} // namespace util
+} // namespace hw
 } // namespace arb
diff --git a/arbor/io/exporter.hpp b/arbor/io/exporter.hpp
deleted file mode 100644
index 9370391dc84a1c6f6e3ec551bb241bc53f87afcd..0000000000000000000000000000000000000000
--- a/arbor/io/exporter.hpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include <random>
-#include <string>
-
-#include <arbor/common_types.hpp>
-#include <arbor/spike.hpp>
-
-namespace arb {
-namespace io {
-
-// interface for exporters.
-// Exposes one virtual functions:
-//    do_export(vector<type>) receiving a vector of parameters to export
-
-class exporter {
-public:
-    // Performs the export of the data
-    virtual void output(const std::vector<spike>&) = 0;
-
-    // Returns the status of the exporter
-    virtual bool good() const = 0;
-
-    virtual ~exporter() {}
-};
-
-} //communication
-} // namespace arb
diff --git a/arbor/io/exporter_spike_file.hpp b/arbor/io/exporter_spike_file.hpp
deleted file mode 100644
index 7ee80d7a3bbea417845a76da94838cefd66c1d46..0000000000000000000000000000000000000000
--- a/arbor/io/exporter_spike_file.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#pragma once
-
-#include <fstream>
-#include <iomanip>
-#include <memory>
-#include <random>
-#include <stdexcept>
-#include <vector>
-
-#include <cstring>
-#include <cstdio>
-
-#include <arbor/common_types.hpp>
-#include <arbor/spike.hpp>
-
-#include "io/exporter.hpp"
-#include "util/file.hpp"
-
-namespace arb {
-namespace io {
-
-class exporter_spike_file: public exporter {
-public:
-    // Constructor
-    // over_write if true will overwrite the specified output file (default = true)
-    // output_path  relative or absolute path
-    // file_name    will be appended with "_x" with x the rank number
-    // file_extension  a seperator will be added automatically
-    exporter_spike_file(
-        const std::string& file_name,
-        const std::string& path,
-        const std::string& file_extension,
-        int index,
-        bool over_write=true)
-    {
-        file_path_ =
-            create_output_file_path(
-                file_name, path, file_extension, index);
-
-        //test if the file exist and depending on over_write throw or delete
-        if (!over_write && util::file_exists(file_path_)) {
-            throw std::runtime_error(
-                "Tried opening file for writing but it exists and over_write is false: " + file_path_);
-        }
-
-        file_handle_.open(file_path_);
-    }
-
-    // Performs export of the spikes to file.
-    // One id and spike time with 4 decimals after the comma on a
-    // line space separated.
-    void output(const std::vector<spike>& spikes) override {
-        for (auto spike : spikes) {
-            char linebuf[45];
-            auto n =
-                std::snprintf(
-                    linebuf, sizeof(linebuf), "%u %.4f\n",
-                    unsigned{spike.source.gid}, float(spike.time));
-            file_handle_.write(linebuf, n);
-        }
-    }
-
-    bool good() const override {
-        return file_handle_.good();
-    }
-
-    // Creates an indexed filename
-    static std::string create_output_file_path(
-        const std::string& file_name,
-        const std::string& path,
-        const std::string& file_extension,
-        unsigned index)
-    {
-        return path + file_name + "_" +  std::to_string(index) + "." + file_extension;
-    }
-
-    // The name of the output path and file name.
-    // May be either relative or absolute path.
-    const std::string& file_path() const {
-        return file_path_;
-    }
-
-private:
-
-    // Handle to opened file handle
-    std::ofstream file_handle_;
-    std::string file_path_;
-};
-
-} //communication
-} // namespace arb
diff --git a/arbor/lif_cell_group.cpp b/arbor/lif_cell_group.cpp
index 7cce6cc5ed11a0e9f4451ba7608595d4374573f5..7622dab2ab8ed04724f7ee3868b7ac00a3f465c8 100644
--- a/arbor/lif_cell_group.cpp
+++ b/arbor/lif_cell_group.cpp
@@ -6,8 +6,8 @@
 using namespace arb;
 
 // Constructor containing gid of first cell in a group and a container of all cells.
-lif_cell_group::lif_cell_group(std::vector<cell_gid_type> gids, const recipe& rec):
-gids_(std::move(gids))
+lif_cell_group::lif_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec):
+    gids_(gids)
 {
     // Default to no binning of events
     set_binning_policy(binning_kind::none, 0);
diff --git a/arbor/lif_cell_group.hpp b/arbor/lif_cell_group.hpp
index 6ed49d39ba94931419e5a19237d957953cb5e49d..034f3e3452756ee69fa0487c89939fe2f39321dc 100644
--- a/arbor/lif_cell_group.hpp
+++ b/arbor/lif_cell_group.hpp
@@ -19,7 +19,7 @@ public:
     lif_cell_group() = default;
 
     // Constructor containing gid of first cell in a group and a container of all cells.
-    lif_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
+    lif_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec);
 
     virtual cell_kind get_cell_kind() const override;
     virtual void reset() override;
diff --git a/arbor/load_balance.hpp b/arbor/load_balance.hpp
deleted file mode 100644
index 757f5a31422d673a3b781ed2b6de9ea1a98b4d3e..0000000000000000000000000000000000000000
--- a/arbor/load_balance.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include <arbor/distributed_context.hpp>
-#include <arbor/domain_decomposition.hpp>
-#include <arbor/recipe.hpp>
-
-#include "hardware/node_info.hpp"
-
-namespace arb {
-
-domain_decomposition partition_load_balance(const recipe& rec,
-                                            hw::node_info nd,
-                                            const distributed_context* ctx);
-
-} // namespace arb
diff --git a/arbor/local_alloc.cpp b/arbor/local_alloc.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..92204a5f97d49f09e5b03097ffabf423a8c2c921
--- /dev/null
+++ b/arbor/local_alloc.cpp
@@ -0,0 +1,16 @@
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/threadinfo.hpp>
+
+#include "hardware/node_info.hpp"
+
+namespace arb {
+
+proc_allocation local_allocation() {
+    proc_allocation info;
+    info.num_threads = arb::num_threads();
+    info.num_gpus = arb::hw::node_gpus();
+
+    return info;
+}
+
+} // namespace arb
diff --git a/arbor/mc_cell_group.cpp b/arbor/mc_cell_group.cpp
index e03ae795d05a0cd93d6e71050edcc20323add82a..54793ddc0681af4f085a593d98dbdf4ca5e89d4e 100644
--- a/arbor/mc_cell_group.cpp
+++ b/arbor/mc_cell_group.cpp
@@ -23,8 +23,8 @@
 
 namespace arb {
 
-mc_cell_group::mc_cell_group(std::vector<cell_gid_type> gids, const recipe& rec, fvm_lowered_cell_ptr lowered):
-    gids_(std::move(gids)), lowered_(std::move(lowered))
+mc_cell_group::mc_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec, fvm_lowered_cell_ptr lowered):
+    gids_(gids), lowered_(std::move(lowered))
 {
     // Default to no binning of events
     set_binning_policy(binning_kind::none, 0);
diff --git a/arbor/mc_cell_group.hpp b/arbor/mc_cell_group.hpp
index be9599d75e5264335237be721b3d36fdffd8fe1c..47653506aa642842a127ccf8e142684dfbb79b47 100644
--- a/arbor/mc_cell_group.hpp
+++ b/arbor/mc_cell_group.hpp
@@ -30,7 +30,7 @@ class mc_cell_group: public cell_group {
 public:
     mc_cell_group() = default;
 
-    mc_cell_group(std::vector<cell_gid_type> gids, const recipe& rec, fvm_lowered_cell_ptr lowered);
+    mc_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec, fvm_lowered_cell_ptr lowered);
 
     cell_kind get_cell_kind() const override {
         return cell_kind::cable1d_neuron;
diff --git a/arbor/morphology.cpp b/arbor/morphology.cpp
index 5508f8357dd0c9f934e16702303ef532cb129e51..2243772b9cfe99f89460080cf9fd8e5b7664a349 100644
--- a/arbor/morphology.cpp
+++ b/arbor/morphology.cpp
@@ -2,8 +2,7 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
-
-#include "math.hpp"
+#include <arbor/math.hpp>
 
 namespace arb {
 
diff --git a/arbor/partition_load_balance.cpp b/arbor/partition_load_balance.cpp
index 472c61ef006fea85807767b0bd75ccf13320f616..e6a80892972ebb050cffedcdf4c9cc6855e6a728 100644
--- a/arbor/partition_load_balance.cpp
+++ b/arbor/partition_load_balance.cpp
@@ -1,16 +1,20 @@
 #include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/recipe.hpp>
 
-#include "hardware/node_info.hpp"
+#include "cell_group_factory.hpp"
+#include "util/maputil.hpp"
 #include "util/partition.hpp"
 #include "util/span.hpp"
 
 namespace arb {
 
-domain_decomposition partition_load_balance(const recipe& rec,
-                                            hw::node_info nd,
-                                            const distributed_context* ctx)
+domain_decomposition partition_load_balance(
+    const recipe& rec,
+    proc_allocation nd,
+    const distributed_context* ctx,
+    partition_hint_map hint_map)
 {
     struct partition_gid_domain {
         partition_gid_domain(std::vector<cell_gid_type> divs):
@@ -45,8 +49,7 @@ domain_decomposition partition_load_balance(const recipe& rec,
 
     // Local load balance
 
-    std::unordered_map<cell_kind, std::vector<cell_gid_type>>
-        kind_lists;
+    std::unordered_map<cell_kind, std::vector<cell_gid_type>> kind_lists;
     for (auto gid: make_span(gid_part[domain_id])) {
         kind_lists[rec.get_cell_kind(gid)].push_back(gid);
     }
@@ -61,6 +64,11 @@ domain_decomposition partition_load_balance(const recipe& rec,
     // the threading internals. We need support for setting the priority
     // of cell group updates according to rules such as the back end on
     // which the cell group is running.
+
+    auto has_gpu_backend = [](cell_kind c) {
+        return cell_kind_supported(c, backend_kind::gpu);
+    };
+
     std::vector<cell_kind> kinds;
     for (auto l: kind_lists) {
         kinds.push_back(cell_kind(l.first));
@@ -69,16 +77,30 @@ domain_decomposition partition_load_balance(const recipe& rec,
 
     std::vector<group_description> groups;
     for (auto k: kinds) {
-        // put all cells into a single cell group on the gpu if possible
-        if (nd.num_gpus && has_gpu_backend(k)) {
-            groups.push_back({k, std::move(kind_lists[k]), backend_kind::gpu});
+        partition_hint hint;
+        if (auto opt_hint = util::value_by_key(hint_map, k)) {
+            hint = opt_hint.value();
+        }
+
+        backend_kind backend = backend_kind::multicore;
+        std::size_t group_size = hint.cpu_group_size;
+
+        if (hint.prefer_gpu && nd.num_gpus>0 && has_gpu_backend(k)) {
+            backend = backend_kind::gpu;
+            group_size = hint.gpu_group_size;
         }
-        // otherwise place into cell groups of size 1 on the cpu cores
-        else {
-            for (auto gid: kind_lists[k]) {
-                groups.push_back({k, {gid}, backend_kind::multicore});
+
+        std::vector<cell_gid_type> group_elements;
+        for (auto gid: kind_lists[k]) {
+            group_elements.push_back(gid);
+            if (group_elements.size()>=group_size) {
+                groups.push_back({k, std::move(group_elements), backend});
+                group_elements.clear();
             }
         }
+        if (!group_elements.empty()) {
+            groups.push_back({k, std::move(group_elements), backend});
+        }
     }
 
     // calculate the number of local cells
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index 8339b0c05985b4fd2bc278bd1a4ab3dcf54df800..f0c90f77687ca7d56664527dcb52389ce7b0de33 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -105,17 +105,17 @@ private:
 
     // Apply a functional to each cell group in parallel.
     template <typename L>
-    void foreach_group(L fn) {
+    void foreach_group(L&& fn) {
         threading::parallel_for::apply(0, cell_groups_.size(),
-            [&](int i) { fn(cell_groups_[i]); });
+            [&, fn = std::forward<L>(fn)](int i) { fn(cell_groups_[i]); });
     }
 
     // Apply a functional to each cell group in parallel, supplying
     // the cell group pointer reference and index.
     template <typename L>
-    void foreach_group_index(L fn) {
+    void foreach_group_index(L&& fn) {
         threading::parallel_for::apply(0, cell_groups_.size(),
-            [&](int i) { fn(cell_groups_[i], i); });
+            [&, fn = std::forward<L>(fn)](int i) { fn(cell_groups_[i], i); });
     }
 };
 
@@ -162,7 +162,11 @@ simulation_state::simulation_state(
     // Generate the cell groups in parallel, with one task per cell group.
     cell_groups_.resize(decomp.groups.size());
     foreach_group_index(
-        [&](cell_group_ptr& group, int i) { group = cell_group_factory(rec, decomp.groups[i]); });
+        [&](cell_group_ptr& group, int i) {
+            const auto& group_info = decomp.groups[i];
+            auto factory = cell_kind_implementation(group_info.kind, group_info.backend);
+            group = factory(group_info.gids, rec);
+        });
 
     // Create event lane buffers.
     // There is one set for each epoch: current (0) and next (1).
diff --git a/arbor/spike_source_cell_group.cpp b/arbor/spike_source_cell_group.cpp
index 3859ba0c57a0903a93f6abd07d7b816c83400cba..aea7deaa04f11c641bdf96599f7b3798869b9409 100644
--- a/arbor/spike_source_cell_group.cpp
+++ b/arbor/spike_source_cell_group.cpp
@@ -11,8 +11,8 @@
 
 namespace arb {
 
-spike_source_cell_group::spike_source_cell_group(std::vector<cell_gid_type> gids, const recipe& rec):
-    gids_(std::move(gids))
+spike_source_cell_group::spike_source_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec):
+    gids_(gids)
 {
     time_sequences_.reserve(gids_.size());
     for (auto gid: gids_) {
diff --git a/arbor/spike_source_cell_group.hpp b/arbor/spike_source_cell_group.hpp
index 7169fb08bef2c887fbde6cbdf334c2ab49aa9205..1fcf8a8856ca99f1e6b998479e116647d783bdc5 100644
--- a/arbor/spike_source_cell_group.hpp
+++ b/arbor/spike_source_cell_group.hpp
@@ -15,7 +15,7 @@ namespace arb {
 
 class spike_source_cell_group: public cell_group {
 public:
-    spike_source_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
+    spike_source_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec);
 
     cell_kind get_cell_kind() const override;
 
diff --git a/arbor/swcio.cpp b/arbor/swcio.cpp
index 2f93bef8769d52db3d3871e5fcb7abf2507c58b7..ef18c38619eb3466ecff5d83302b31d1cf4449b7 100644
--- a/arbor/swcio.cpp
+++ b/arbor/swcio.cpp
@@ -8,16 +8,15 @@
 #include <arbor/assert.hpp>
 #include <arbor/morphology.hpp>
 #include <arbor/point.hpp>
+#include <arbor/swcio.hpp>
 
 #include "algorithms.hpp"
-#include "swcio.hpp"
+#include "util/span.hpp"
 
 namespace arb {
-namespace io {
 
 // swc_record implementation
 
-
 // helper function: return error message if inconsistent, or nullptr if ok.
 const char* swc_record_error(const swc_record& r) {
     constexpr int max_type = static_cast<int>(swc_record::kind::custom);
@@ -57,7 +56,7 @@ void swc_record::assert_consistent() const {
     }
 }
 
-bool parse_record(const std::string& line, swc_record& record) {
+static bool parse_record(const std::string& line, swc_record& record) {
     std::istringstream is(line);
     swc_record r;
 
@@ -76,7 +75,7 @@ bool parse_record(const std::string& line, swc_record& record) {
     return false;
 }
 
-bool is_comment(const std::string& line) {
+static bool is_comment(const std::string& line) {
     auto pos = line.find_first_not_of(" \f\n\r\t\v");
     return pos==std::string::npos || line[pos]=='#';
 }
@@ -151,9 +150,133 @@ std::vector<swc_record> parse_swc_file(std::istream& is) {
         throw swc_error("SWC parse error", line_number);
     }
 
-    swc_canonicalize_sequence(records);
+    swc_canonicalize(records);
     return records;
 }
 
-} // namespace io
+morphology swc_as_morphology(const std::vector<swc_record>& swc_records) {
+    morphology morph;
+
+    std::vector<swc_record::id_type> swc_parent_index;
+    for (const auto& r: swc_records) {
+        swc_parent_index.push_back(r.parent_id);
+    }
+
+    if (swc_parent_index.empty()) {
+        return morph;
+    }
+
+    // The parent of soma must be 0, while in SWC files is -1
+    swc_parent_index[0] = 0;
+    auto branch_index = algorithms::branches(swc_parent_index); // partitions [0, #records] by branch.
+    auto parent_branch_index = algorithms::tree_reduce(swc_parent_index, branch_index);
+
+    // sanity check
+    arb_assert(parent_branch_index.size() == branch_index.size() - 1);
+
+    // Add the soma first; then the segments
+    const auto& soma = swc_records[0];
+    morph.soma = { soma.x, soma.y, soma.z, soma.r };
+
+    for (auto i: util::make_span(1, parent_branch_index.size())) {
+        auto b_start = swc_records.begin() + branch_index[i];
+        auto b_end   = swc_records.begin() + branch_index[i+1];
+
+        unsigned parent_id = parent_branch_index[i];
+        std::vector<section_point> points;
+        section_kind kind = section_kind::none;
+
+        if (parent_id != 0) {
+            // include the parent of current record if not branching from soma
+            auto parent_record = swc_records[swc_parent_index[branch_index[i]]];
+
+            points.push_back(section_point{parent_record.x, parent_record.y, parent_record.z, parent_record.r});
+        }
+
+        for (auto b = b_start; b!=b_end; ++b) {
+            points.push_back(section_point{b->x, b->y, b->z, b->r});
+
+            switch (b->type) {
+            case swc_record::kind::axon:
+                kind = section_kind::axon;
+                break;
+            case swc_record::kind::dendrite:
+            case swc_record::kind::apical_dendrite:
+                kind = section_kind::dendrite;
+                break;
+            case swc_record::kind::soma:
+                kind = section_kind::soma;
+                break;
+            default: ; // stick with what we have
+            }
+        }
+
+        morph.add_section(std::move(points), parent_id, kind);
+    }
+
+    morph.assert_valid();
+    return morph;
+}
+
+void swc_canonicalize(std::vector<swc_record>& swc_records) {
+    std::unordered_set<swc_record::id_type> ids;
+
+    std::size_t         num_trees = 0;
+    swc_record::id_type last_id   = -1;
+    bool                needsort  = false;
+
+    for (const auto& r: swc_records) {
+        r.assert_consistent();
+
+        if (r.parent_id == -1 && ++num_trees > 1) {
+            // only a single tree is allowed
+            throw swc_error("multiple trees found in SWC record sequence");
+        }
+        if (ids.count(r.id)) {
+            throw swc_error("records with duplicated ids in SWC record sequence");
+        }
+
+        if (!needsort && r.id < last_id) {
+            needsort = true;
+        }
+
+        last_id = r.id;
+        ids.insert(r.id);
+    }
+
+    if (needsort) {
+        std::sort(swc_records.begin(), swc_records.end(),
+            [](const swc_record& a, const swc_record& b) { return a.id<b.id; });
+    }
+
+    // Renumber records if necessary.
+    std::map<swc_record::id_type, swc_record::id_type> idmap;
+    swc_record::id_type next_id = 0;
+    for (auto& r: swc_records) {
+        if (r.id != next_id) {
+            auto old_id = r.id;
+            r.id = next_id;
+
+            auto new_parent_id = idmap.find(r.parent_id);
+            if (new_parent_id != idmap.end()) {
+                r.parent_id = new_parent_id->second;
+            }
+
+            r.assert_consistent();
+            idmap.insert(std::make_pair(old_id, next_id));
+        }
+        ++next_id;
+    }
+
+    // Reject if branches are not contiguously numbered.
+    std::vector<swc_record::id_type> parent_list = { 0 };
+    for (std::size_t i = 1; i < swc_records.size(); ++i) {
+        parent_list.push_back(swc_records[i].parent_id);
+    }
+
+    if (!arb::algorithms::has_contiguous_compartments(parent_list)) {
+        throw swc_error("branches are not contiguously numbered", 0);
+    }
+}
+
 } // namespace arb
diff --git a/arbor/swcio.hpp b/arbor/swcio.hpp
deleted file mode 100644
index 1502811817048d92f90c8b03e7b46ba7f25dfdef..0000000000000000000000000000000000000000
--- a/arbor/swcio.hpp
+++ /dev/null
@@ -1,243 +0,0 @@
-#pragma once
-
-#include <exception>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include <arbor/assert.hpp>
-#include <arbor/morphology.hpp>
-#include <arbor/point.hpp>
-
-#include "algorithms.hpp"
-
-namespace arb {
-namespace io {
-
-class swc_record {
-public:
-    using id_type = int;
-    using coord_type = double;
-
-    // More on SWC files: http://research.mssm.edu/cnic/swc.html
-    enum class kind {
-        undefined = 0,
-        soma,
-        axon,
-        dendrite,
-        apical_dendrite,
-        fork_point,
-        end_point,
-        custom
-    };
-
-    kind type = kind::undefined; // record type
-    id_type id = 0;              // record id
-    coord_type x = 0;            // record coordinates
-    coord_type y = 0;
-    coord_type z = 0;
-    coord_type r = 0;            // record radius
-    id_type parent_id= -1;      // record parent's id
-
-    // swc records assume zero-based indexing; root's parent remains -1
-    swc_record(swc_record::kind type, int id,
-               coord_type x, coord_type y, coord_type z, coord_type r,
-               int parent_id):
-        type(type), id(id), x(x), y(y), z(z), r(r), parent_id(parent_id)
-    {}
-
-    swc_record() = default;
-    swc_record(const swc_record& other) = default;
-    swc_record& operator=(const swc_record& other) = default;
-
-    bool operator==(const swc_record& other) const {
-        return id == other.id &&
-            x == other.x &&
-            y == other.y &&
-            z == other.z &&
-            r == other.r &&
-            parent_id == other.parent_id;
-    }
-
-    friend bool operator!=(const swc_record& lhs, const swc_record& rhs) {
-        return !(lhs == rhs);
-    }
-
-    friend std::ostream& operator<<(std::ostream& os, const swc_record& record);
-
-    coord_type diameter() const {
-        return 2*r;
-    }
-
-    arb::point<coord_type> coord() const {
-        return arb::point<coord_type>(x, y, z);
-    }
-
-    arb::section_point as_section_point() const {
-        return arb::section_point{x, y, z, r};
-    }
-
-    // validity checks
-    bool is_consistent() const;
-    void assert_consistent() const; // throw swc_error if inconsistent.
-};
-
-
-class swc_error: public std::runtime_error {
-public:
-    explicit swc_error(const char* msg, std::size_t lineno = 0):
-        std::runtime_error(msg), line_number(lineno)
-    {}
-
-    explicit swc_error(const std::string& msg, std::size_t lineno = 0):
-        std::runtime_error(msg), line_number(lineno)
-    {}
-
-    std::size_t line_number;
-};
-
-// Parse one record, skipping comments and blank lines.
-std::istream& operator>>(std::istream& is, swc_record& record);
-
-// Parse and canonicalize an EOF-terminated sequence of records.
-// Throw on parsing failure.
-std::vector<swc_record> parse_swc_file(std::istream& is);
-
-// Convert a canonical (see below) sequence of SWC records to a morphology object.
-template <typename RandomAccessSequence>
-morphology swc_as_morphology(const RandomAccessSequence& swc_records) {
-    morphology morph;
-
-    std::vector<swc_record::id_type> swc_parent_index;
-    for (const auto& r: swc_records) {
-        swc_parent_index.push_back(r.parent_id);
-    }
-
-    if (swc_parent_index.empty()) {
-        return morph;
-    }
-
-    // The parent of soma must be 0, while in SWC files is -1
-    swc_parent_index[0] = 0;
-    auto branch_index = algorithms::branches(swc_parent_index); // partitions [0, #records] by branch.
-    auto parent_branch_index = algorithms::tree_reduce(swc_parent_index, branch_index);
-
-    // sanity check
-    arb_assert(parent_branch_index.size() == branch_index.size() - 1);
-
-    // Add the soma first; then the segments
-    const auto& soma = swc_records[0];
-    morph.soma = { soma.x, soma.y, soma.z, soma.r };
-
-    auto n_branches = parent_branch_index.size();
-    for (std::size_t i = 1; i < n_branches; ++i) {
-        auto b_start = std::next(swc_records.begin(), branch_index[i]);
-        auto b_end   = std::next(swc_records.begin(), branch_index[i+1]);
-
-        unsigned parent_id = parent_branch_index[i];
-        std::vector<section_point> points;
-        section_kind kind = section_kind::none;
-
-        if (parent_id != 0) {
-            // include the parent of current record if not branching from soma
-            auto parent_record = swc_records[swc_parent_index[branch_index[i]]];
-
-            points.push_back(section_point{parent_record.x, parent_record.y, parent_record.z, parent_record.r});
-        }
-
-        for (auto b = b_start; b!=b_end; ++b) {
-            points.push_back(section_point{b->x, b->y, b->z, b->r});
-
-            switch (b->type) {
-            case swc_record::kind::axon:
-                kind = section_kind::axon;
-                break;
-            case swc_record::kind::dendrite:
-            case swc_record::kind::apical_dendrite:
-                kind = section_kind::dendrite;
-                break;
-            case swc_record::kind::soma:
-                kind = section_kind::soma;
-                break;
-            default: ; // stick with what we have
-            }
-        }
-
-        morph.add_section(std::move(points), parent_id, kind);
-    }
-
-    morph.assert_valid();
-    return morph;
-}
-
-// Given a random-access mutable sequence of `swc_record` describing
-// a single morphology, check for consistency and renumber records
-// so that ids are contiguous within branches, have no gaps, and
-// are ordered with repect to parent indices.
-template <typename RandomAccessSequence>
-void swc_canonicalize_sequence(RandomAccessSequence& swc_records) {
-    std::unordered_set<swc_record::id_type> ids;
-
-    std::size_t         num_trees = 0;
-    swc_record::id_type last_id   = -1;
-    bool                needsort  = false;
-
-    for (const auto& r: swc_records) {
-        r.assert_consistent();
-
-        if (r.parent_id == -1 && ++num_trees > 1) {
-            // only a single tree is allowed
-            throw swc_error("multiple trees found in SWC record sequence");
-        }
-        if (ids.count(r.id)) {
-            throw swc_error("records with duplicated ids in SWC record sequence");
-        }
-
-        if (!needsort && r.id < last_id) {
-            needsort = true;
-        }
-
-        last_id = r.id;
-        ids.insert(r.id);
-    }
-
-    if (needsort) {
-        std::sort(std::begin(swc_records), std::end(swc_records),
-            [](const swc_record& a, const swc_record& b) { return a.id<b.id; });
-    }
-
-    // Renumber records if necessary
-    std::map<swc_record::id_type, swc_record::id_type> idmap;
-    swc_record::id_type next_id = 0;
-    for (auto& r: swc_records) {
-        if (r.id != next_id) {
-            auto old_id = r.id;
-            r.id = next_id;
-
-            auto new_parent_id = idmap.find(r.parent_id);
-            if (new_parent_id != idmap.end()) {
-                r.parent_id = new_parent_id->second;
-            }
-
-            r.assert_consistent();
-            idmap.insert(std::make_pair(old_id, next_id));
-        }
-        ++next_id;
-    }
-
-    // Reject if branches are not contiguously numbered
-    std::vector<swc_record::id_type> parent_list = { 0 };
-    for (std::size_t i = 1; i < swc_records.size(); ++i) {
-        parent_list.push_back(swc_records[i].parent_id);
-    }
-
-    if (!arb::algorithms::has_contiguous_compartments(parent_list)) {
-        throw swc_error("branches are not contiguously numbered", 0);
-    }
-}
-
-} // namespace io
-} // namespace arb
diff --git a/arbor/threading/threading.cpp b/arbor/threading/threading.cpp
index 6e2ab43edb4ac59c54bca4a39973255304e174d9..a05bf1bd69b95f6926f082028c4ad14317201ea5 100644
--- a/arbor/threading/threading.cpp
+++ b/arbor/threading/threading.cpp
@@ -5,7 +5,7 @@
 
 #include <arbor/arbexcept.hpp>
 #include <arbor/util/optional.hpp>
-#include <hardware/affinity.hpp>
+#include <hardware/node_info.hpp>
 
 #include "threading.hpp"
 #include "util/strprintf.hpp"
@@ -58,13 +58,18 @@ util::optional<size_t> get_env_num_threads() {
     return nthreads;
 }
 
-size_t num_threads_init() {
-    auto env_threads = get_env_num_threads();
-    if (!env_threads || *env_threads==0u) {
-        auto detect_threads = hw::num_cores();
-        return detect_threads? *detect_threads: 1;
+std::size_t num_threads_init() {
+    std::size_t n = 0;
+
+    if (auto env_threads = get_env_num_threads()) {
+        n = env_threads.value();
+    }
+
+    if (!n) {
+        n = hw::node_processors();
     }
-    return *env_threads;
+
+    return n? n: 1;
 }
 
 // Returns the number of threads used by the threading back end.
diff --git a/aux/CMakeLists.txt b/aux/CMakeLists.txt
index 91de62074d4251e373a14ce8f9e20d3b34a20717..d30301542bc64f71178b77dbc3ac525e18f9ecf3 100644
--- a/aux/CMakeLists.txt
+++ b/aux/CMakeLists.txt
@@ -1,7 +1,13 @@
 set(aux-sources
-    json_meter.cpp)
+
+    glob.cpp
+    ioutil.cpp
+    json_meter.cpp
+    path.cpp
+    spike_emitter.cpp
+)
 
 add_library(arbor-aux ${aux-sources})
 target_link_libraries(arbor-aux PUBLIC ext-json arbor)
-target_include_directories(arbor-aux INTERFACE .)
+target_include_directories(arbor-aux PUBLIC include)
 set_target_properties(arbor-aux PROPERTIES OUTPUT_NAME arboraux)
diff --git a/aux/glob.cpp b/aux/glob.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9b67a4721f83b712e6d3d01bfb1829b364eeae48
--- /dev/null
+++ b/aux/glob.cpp
@@ -0,0 +1,47 @@
+// POSIX headers
+extern "C" {
+#define _POSIX_C_SOURCE 2
+#include <glob.h>
+}
+
+// GLOB_TILDE and GLOB_BRACE are non-standard but convenient and common
+// flags for glob().
+
+#ifndef GLOB_TILDE
+#define GLOB_TILDE 0
+#endif
+#ifndef GLOB_BRACE
+#define GLOB_BRACE 0
+#endif
+
+#include <cerrno>
+
+#include <aux/path.hpp>
+#include <aux/scope_exit.hpp>
+
+namespace aux {
+
+std::vector<path> glob(const std::string& pattern) {
+    std::vector<path> paths;
+    glob_t matches;
+
+    int flags = GLOB_MARK | GLOB_NOCHECK | GLOB_TILDE | GLOB_BRACE;
+    auto r = ::glob(pattern.c_str(), flags, nullptr, &matches);
+    auto glob_guard = on_scope_exit([&]() { ::globfree(&matches); });
+
+    if (r==GLOB_NOSPACE) {
+        throw std::bad_alloc{};
+    }
+    else if (r==0) {
+        // success
+        paths.reserve(matches.gl_pathc);
+        for (auto pathp = matches.gl_pathv; *pathp; ++pathp) {
+            paths.push_back(*pathp);
+        }
+    }
+
+    return paths;
+}
+
+} // namespace aux
+
diff --git a/aux/include/aux/glob.hpp b/aux/include/aux/glob.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..406167ce8aeabdc00f9c22eb0815d734c085a58d
--- /dev/null
+++ b/aux/include/aux/glob.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+// glob (3) wrapper
+// TODO: emulate for not-entirely-POSIX platforms.
+
+
+#include <aux/path.hpp>
+
+namespace aux {
+
+std::vector<path> glob(const std::string& pattern);
+
+} // namespace aux
+
diff --git a/arbor/util/ioutil.hpp b/aux/include/aux/ioutil.hpp
similarity index 78%
rename from arbor/util/ioutil.hpp
rename to aux/include/aux/ioutil.hpp
index b45db228b31f59ce651d41f39d53a56395a8523e..2a9ba4ab7e0bba181518f4f26d1588c4d7db2450 100644
--- a/arbor/util/ioutil.hpp
+++ b/aux/include/aux/ioutil.hpp
@@ -1,27 +1,22 @@
 #pragma once
 
-#include <iostream>
-
-namespace arb {
-namespace util {
-
-class iosfmt_guard {
-public:
-    explicit iosfmt_guard(std::ios& stream) :
-        save_(nullptr), stream_(stream)
-    {
-        save_.copyfmt(stream_);
-    }
+// Provides:
+//
+// * mask_stream
+//
+//   Stream manipulator that enables or disables writing to a stream based on a flag.
+//
+// * open_or_throw
+//
+//   Open an fstream, throwing on error. If the 'excl' flag is set, throw a
+//   std::runtime_error if the path exists.
 
-    ~iosfmt_guard() {
-        stream_.copyfmt(save_);
-    }
+#include <iostream>
+#include <fstream>
 
-private:
-    std::ios save_;
-    std::ios& stream_;
-};
+#include <aux/path.hpp>
 
+namespace aux {
 
 template <typename charT, typename traitsT = std::char_traits<charT> >
 class basic_null_streambuf: public std::basic_streambuf<charT, traitsT> {
@@ -92,6 +87,12 @@ private:
     bool mask_;
 };
 
-} // namespace util
-} // namespace arb
+std::fstream open_or_throw(const aux::path& p, std::ios_base::openmode, bool exclusive);
+
+inline std::fstream open_or_throw(const aux::path& p, bool exclusive) {
+    using std::ios_base;
+    return open_or_throw(p, ios_base::in|ios_base::out, exclusive);
+}
+
+} // namespace aux
 
diff --git a/aux/json_meter.hpp b/aux/include/aux/json_meter.hpp
similarity index 100%
rename from aux/json_meter.hpp
rename to aux/include/aux/json_meter.hpp
diff --git a/arbor/util/path.hpp b/aux/include/aux/path.hpp
similarity index 92%
rename from arbor/util/path.hpp
rename to aux/include/aux/path.hpp
index d11cd1b73e1e44458540849760db78cf7ae71d1e..c5429cf5e6ccb6e035a3db6095b672abda6943bb 100644
--- a/arbor/util/path.hpp
+++ b/aux/include/aux/path.hpp
@@ -24,11 +24,7 @@
 #include <utility>
 #include <vector>
 
-#include "util/meta.hpp"
-#include "util/rangeutil.hpp"
-
-namespace arb {
-namespace util {
+namespace aux {
 
 class posix_path {
 public:
@@ -51,22 +47,23 @@ public:
 
     // Construct or assign from value_type string or sequence.
 
-    template <typename Source>
-    posix_path(Source&& source) { assign(std::forward<Source>(source)); }
+    posix_path(string_type source): p_(std::move(source)) {}
+
+    posix_path(const value_type* source): p_(source) {}
 
     template <typename Iter>
     posix_path(Iter b, Iter e) { assign(b, e); }
 
     template <typename Source>
-    posix_path& operator=(const Source& source) { return assign(source); }
+    posix_path& operator=(Source&& source) { return assign(std::forward<Source>(source)); }
 
     posix_path& assign(const posix_path& other) {
         p_ = other.p_;
         return *this;
     }
 
-    posix_path& assign(const string_type& source) {
-        p_ = source;
+    posix_path& assign(string_type source) {
+        p_ = std::move(source);
         return *this;
     }
 
@@ -75,12 +72,6 @@ public:
         return *this;
     }
 
-    template <typename Seq, typename = enable_if_sequence_t<Seq>>
-    posix_path& assign(const Seq& seq) {
-        util::assign(p_, seq);
-        return *this;
-    }
-
     template <typename Iter>
     posix_path& assign(Iter b, Iter e) {
         p_.assign(b, e);
@@ -349,9 +340,6 @@ private:
 namespace posix {
     file_status status(const path&, std::error_code&);
     file_status symlink_status(const path&, std::error_code&);
-
-    // POSIX glob (3) wrapper (not part of std::filesystem!).
-    std::vector<path> glob(const std::string& pattern);
 }
 
 inline file_status status(const path& p, std::error_code& ec) {
@@ -362,15 +350,11 @@ inline file_status symlink_status(const path& p, std::error_code& ec) {
     return posix::symlink_status(p, ec);
 }
 
-inline std::vector<path> glob(const std::string& pattern) {
-    return posix::glob(pattern);
-}
-
 // Wrappers for `status()`, again following std::filesystem.
 
 inline file_status status(const path& p) {
     std::error_code ec;
-    auto r = ::arb::util::posix::status(p, ec);
+    auto r = ::aux::posix::status(p, ec);
     if (ec) {
         throw filesystem_error("status()", p, ec);
     }
@@ -425,6 +409,5 @@ inline bool exists(const path& p, std::error_code& ec) {
     return exists(status(p, ec));
 }
 
-} // namespace util
-} // namespace arb
+} // namespace aux
 
diff --git a/arbor/util/scope_exit.hpp b/aux/include/aux/scope_exit.hpp
similarity index 93%
rename from arbor/util/scope_exit.hpp
rename to aux/include/aux/scope_exit.hpp
index 5f83678a8e1194033f881c18b600e208a22a3d59..4db228d8b31ea275122260094bbf848eade1ff24 100644
--- a/arbor/util/scope_exit.hpp
+++ b/aux/include/aux/scope_exit.hpp
@@ -5,8 +5,7 @@
 
 // Convenience class for RAII control of resources.
 
-namespace arb {
-namespace util {
+namespace aux {
 
 // `scope_exit` guard object will call provided functional object
 // on destruction. The provided functional object must be nothrow
@@ -48,5 +47,4 @@ scope_exit<std::decay_t<F>> on_scope_exit(F&& f) {
     return scope_exit<std::decay_t<F>>(std::forward<F>(f));
 }
 
-} // namespace util
-} // namespace arb
+} // namespace aux
diff --git a/aux/include/aux/spike_emitter.hpp b/aux/include/aux/spike_emitter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fdbfa16f16297f2896e5ee3b3fa113b5814dd571
--- /dev/null
+++ b/aux/include/aux/spike_emitter.hpp
@@ -0,0 +1,16 @@
+#include <functional>
+#include <iosfwd>
+#include <vector>
+
+#include <arbor/spike.hpp>
+
+namespace aux {
+
+struct spike_emitter {
+    std::reference_wrapper<std::ostream> out;
+
+    spike_emitter(std::ostream& out);
+    void operator()(const std::vector<arb::spike>&);
+};
+
+} // namespace aux
diff --git a/aux/include/aux/strsub.hpp b/aux/include/aux/strsub.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b2dca8497b7ec894aeb8ef0587f1cde40066458e
--- /dev/null
+++ b/aux/include/aux/strsub.hpp
@@ -0,0 +1,66 @@
+#pragma once
+
+// Substitute instances of a given character (defaults to '%') in a template C
+// string with the remaining arguments, and write the result to an ostream or
+// return the result as a string.
+//
+// The special character itself can be escaped by duplicating it, e.g.
+//
+//     strsub("%%%-%%%", 30, 70)
+//
+// returns the string
+//
+//     "%30-%70"
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace aux {
+
+// Stream-writing strsub(...):
+
+inline std::ostream& strsub(std::ostream& o, char c, const char* templ) {
+    return o << templ;
+}
+
+template <typename T, typename... Tail>
+std::ostream& strsub(std::ostream& o, char c, const char* templ, T value, Tail&&... tail) {
+    const char* t = templ;
+    for (;;) {
+        while (*t && !(*t==c)) ++t;
+
+        if (t>templ) o.write(templ, t-templ);
+
+        if (!*t) return o;
+
+        if (t[1]!=c) break;
+
+        o.put(c);
+        templ = t += 2;
+    }
+
+    o << std::forward<T>(value);
+    return strsub(o, c, t+1, std::forward<Tail>(tail)...);
+}
+
+template <typename... Args>
+std::ostream& strsub(std::ostream& o, const char* templ, Args&&... args) {
+    return strsub(o, '%', templ, std::forward<Args>(args)...);
+}
+
+// String-returning strsub(...) wrappers:
+
+template <typename... Args>
+std::string strsub(char c, const char* templ, Args&&... args) {
+    std::ostringstream o;
+    return strsub(o, c, templ, std::forward<Args>(args)...), o.str();
+}
+
+template <typename... Args>
+std::string strsub(const char* templ, Args&&... args) {
+    return strsub('%', templ, std::forward<Args>(args)...);
+}
+
+} // namespace aux
diff --git a/aux/tinyopt.hpp b/aux/include/aux/tinyopt.hpp
similarity index 100%
rename from aux/tinyopt.hpp
rename to aux/include/aux/tinyopt.hpp
diff --git a/aux/with_mpi.hpp b/aux/include/aux/with_mpi.hpp
similarity index 100%
rename from aux/with_mpi.hpp
rename to aux/include/aux/with_mpi.hpp
diff --git a/aux/ioutil.cpp b/aux/ioutil.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7572208bc168cd84bffe7eb0fd4f5b952b3acbc4
--- /dev/null
+++ b/aux/ioutil.cpp
@@ -0,0 +1,25 @@
+#include <fstream>
+#include <utility>
+
+#include <aux/ioutil.hpp>
+#include <aux/path.hpp>
+#include <aux/strsub.hpp>
+
+namespace aux {
+
+std::fstream open_or_throw(const path& p, std::ios_base::openmode mode, bool exclusive) {
+    if (exclusive && exists(p)) {
+        throw std::runtime_error(strsub("file % already exists", p));
+    }
+
+    std::fstream file;
+    file.open(p, mode);
+    if (!file) {
+        throw std::runtime_error(strsub("unable to open file %", p));
+    }
+
+    return std::move(file);
+}
+
+} // namespace aux
+
diff --git a/arbor/util/path.cpp b/aux/path.cpp
similarity index 66%
rename from arbor/util/path.cpp
rename to aux/path.cpp
index df571b2fe53cea978eb9a4729fd0fc0fb4cf445a..64321b23bfadb8cb7d5266e05a553cf858788035 100644
--- a/arbor/util/path.cpp
+++ b/aux/path.cpp
@@ -1,51 +1,15 @@
 // POSIX headers
 extern "C" {
-#define _POSIX_C_SOURCE 2
-#include <glob.h>
 #include <sys/stat.h>
 }
 
-// GLOB_TILDE and GLOB_BRACE are non-standard but convenient and common
-// flags for glob().
-
-#ifndef GLOB_TILDE
-#define GLOB_TILDE 0
-#endif
-#ifndef GLOB_BRACE
-#define GLOB_BRACE 0
-#endif
-
 #include <cerrno>
 
-#include <util/scope_exit.hpp>
-#include <util/path.hpp>
+#include <aux/path.hpp>
 
-namespace arb {
-namespace util {
+namespace aux {
 namespace posix {
 
-std::vector<path> glob(const std::string& pattern) {
-    std::vector<path> paths;
-    glob_t matches;
-
-    int flags = GLOB_MARK | GLOB_NOCHECK | GLOB_TILDE | GLOB_BRACE;
-    auto r = ::glob(pattern.c_str(), flags, nullptr, &matches);
-    auto glob_guard = on_scope_exit([&]() { ::globfree(&matches); });
-
-    if (r==GLOB_NOSPACE) {
-        throw std::bad_alloc{};
-    }
-    else if (r==0) {
-        // success
-        paths.reserve(matches.gl_pathc);
-        for (auto pathp = matches.gl_pathv; *pathp; ++pathp) {
-            paths.push_back(path{*pathp});
-        }
-    }
-
-    return paths;
-}
-
 namespace impl {
     file_status status(const char* p, int r, struct stat& st, std::error_code& ec) {
         if (!r) {
@@ -100,6 +64,5 @@ file_status symlink_status(const path& p, std::error_code& ec) {
 }
 
 } // namespace posix
-} // namespace util
 } // namespace arb
 
diff --git a/aux/spike_emitter.cpp b/aux/spike_emitter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..db33121dcecd28b5e33523a0b19c079f03e55629
--- /dev/null
+++ b/aux/spike_emitter.cpp
@@ -0,0 +1,23 @@
+#include <functional>
+#include <iostream>
+
+#include <arbor/spike.hpp>
+#include <aux/spike_emitter.hpp>
+
+namespace aux {
+
+spike_emitter::spike_emitter(std::ostream& out): out(out) {}
+
+void spike_emitter::operator()(const std::vector<arb::spike>& spikes) {
+    char line[45];
+    for (auto& s: spikes) {
+        int n = std::snprintf(line, sizeof(line), "%u %.4f",  s.source.gid, s.time);
+        if (n<0) {
+            throw std::system_error(errno, std::generic_category());
+        }
+
+        out.get().write(line, n).put('\n');
+    }
+};
+
+} // namespace aux
diff --git a/example/bench/CMakeLists.txt b/example/bench/CMakeLists.txt
index 1423c792f55be187b026912f311cfe857b4326a6..8814dd310a7f8a610ebc46e60e16a6d317bb4218 100644
--- a/example/bench/CMakeLists.txt
+++ b/example/bench/CMakeLists.txt
@@ -1,6 +1,3 @@
 add_executable(bench bench.cpp recipe.cpp parameters.cpp)
 
 target_link_libraries(bench PRIVATE arbor arbor-aux ext-tclap ext-json)
-
-# TODO: resolve public headers
-target_link_libraries(bench PRIVATE arbor-private-headers)
diff --git a/example/bench/bench.cpp b/example/bench/bench.cpp
index 1ac4dcf19b6259a2265862451a539399939bb7d2..b1ada30747323b2b184bcbd304be94238509ee41 100644
--- a/example/bench/bench.cpp
+++ b/example/bench/bench.cpp
@@ -11,31 +11,31 @@
 #include <arbor/profile/meter_manager.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/profile/profiler.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
-#include "util/ioutil.hpp"
-
-#include "json_meter.hpp"
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include <aux/with_mpi.hpp>
 
 #include "parameters.hpp"
 #include "recipe.hpp"
 
-using namespace arb;
+namespace profile = arb::profile;
 
 int main(int argc, char** argv) {
     try {
-        distributed_context context;
-        #ifdef ARB_HAVE_MPI
-        mpi::scoped_guard guard(&argc, &argv);
+        arb::distributed_context context;
+#ifdef ARB_HAVE_MPI
+        aux::with_mpi guard(&argc, &argv);
         context = mpi_context(MPI_COMM_WORLD);
-        #endif
+#endif
         const bool is_root =  context.id()==0;
 
-        std::cout << util::mask_stream(is_root);
+        std::cout << aux::mask_stream(is_root);
 
         bench_params params = read_options(argc, argv);
 
@@ -49,8 +49,8 @@ int main(int argc, char** argv) {
         meters.checkpoint("recipe-build");
 
         // Make the domain decomposition for the model
-        auto node = arb::hw::get_node_info();
-        auto decomp = arb::partition_load_balance(recipe, node, &context);
+        auto local = arb::local_allocation();
+        auto decomp = arb::partition_load_balance(recipe, local, &context);
         meters.checkpoint("domain-decomp");
 
         // Construct the model.
@@ -73,8 +73,8 @@ int main(int argc, char** argv) {
         }
 
         // output profile and diagnostic feedback
-        auto profile = profile::profiler_summary();
-        std::cout << profile << "\n";
+        auto summary = profile::profiler_summary();
+        std::cout << summary << "\n";
 
         std::cout << "there were " << sim.num_spikes() << " spikes\n";
     }
diff --git a/example/bench/recipe.cpp b/example/bench/recipe.cpp
index e70e8060541262b8826533dc82210304a1deb681..b8dd5f3f97197a779b9191e0231eca797a61b22e 100644
--- a/example/bench/recipe.cpp
+++ b/example/bench/recipe.cpp
@@ -6,6 +6,10 @@
 
 #include "recipe.hpp"
 
+using arb::cell_gid_type;
+using arb::cell_size_type;
+using arb::cell_kind;
+
 cell_size_type bench_recipe::num_cells() const {
     return params_.num_cells;
 }
@@ -27,8 +31,8 @@ arb::util::unique_any bench_recipe::get_cell_description(cell_gid_type gid) cons
     return std::move(cell);
 }
 
-arb::cell_kind bench_recipe::get_cell_kind(arb::cell_gid_type gid) const {
-    return arb::cell_kind::benchmark;
+cell_kind bench_recipe::get_cell_kind(cell_gid_type gid) const {
+    return cell_kind::benchmark;
 }
 
 std::vector<arb::cell_connection> bench_recipe::connections_on(cell_gid_type gid) const {
diff --git a/example/bench/recipe.hpp b/example/bench/recipe.hpp
index 4c8c8102f21f10d33a0861c691a73051e03d4e78..e95f607704e9ea35cde3a181d768ced35d4aea0d 100644
--- a/example/bench/recipe.hpp
+++ b/example/bench/recipe.hpp
@@ -2,22 +2,21 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/recipe.hpp>
+#include <arbor/util/unique_any.hpp>
 
 #include "parameters.hpp"
 
-using arb::cell_kind;
-using arb::cell_gid_type;
-using arb::cell_size_type;
-
 class bench_recipe: public arb::recipe {
+private:
     bench_params params_;
+
 public:
     bench_recipe(bench_params p): params_(std::move(p)) {}
-    cell_size_type num_cells() const override;
-    arb::util::unique_any get_cell_description(cell_gid_type gid) const override;
+    arb::cell_size_type num_cells() const override;
+    arb::util::unique_any get_cell_description(arb::cell_gid_type gid) const override;
     arb::cell_kind get_cell_kind(arb::cell_gid_type gid) const override;
-    cell_size_type num_targets(cell_gid_type gid) const override;
-    cell_size_type num_sources(cell_gid_type gid) const override;
-    std::vector<arb::cell_connection> connections_on(cell_gid_type) const override;
+    arb::cell_size_type num_targets(arb::cell_gid_type gid) const override;
+    arb::cell_size_type num_sources(arb::cell_gid_type gid) const override;
+    std::vector<arb::cell_connection> connections_on(arb::cell_gid_type) const override;
 };
 
diff --git a/example/brunel/CMakeLists.txt b/example/brunel/CMakeLists.txt
index 15ed1ee7d71288e1bf8a2e4e7130058c3a60ee7c..657ec23f89b1f758d9fa71f75d2e937dc1c6470f 100644
--- a/example/brunel/CMakeLists.txt
+++ b/example/brunel/CMakeLists.txt
@@ -3,6 +3,3 @@ add_executable(brunel-miniapp
     io.cpp)
 
 target_link_libraries(brunel-miniapp PRIVATE arbor arbor-aux ext-tclap)
-
-# TODO: resolve public headers
-target_link_libraries(brunel-miniapp PRIVATE arbor-private-headers)
diff --git a/example/brunel/brunel_miniapp.cpp b/example/brunel/brunel_miniapp.cpp
index ae716caf434f564230c785f0732dee72fbb5bc5f..fb1195f1cd7bf6104e66b7dad3ab524b77e55b46 100644
--- a/example/brunel/brunel_miniapp.cpp
+++ b/example/brunel/brunel_miniapp.cpp
@@ -8,8 +8,10 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/event_generator.hpp>
 #include <arbor/lif_cell.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/profile/meter_manager.hpp>
 #include <arbor/profile/profiler.hpp>
 #include <arbor/recipe.hpp>
@@ -17,22 +19,20 @@
 #include <arbor/threadinfo.hpp>
 #include <arbor/version.hpp>
 
-#include "json_meter.hpp"
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include <aux/path.hpp>
+#include <aux/spike_emitter.hpp>
+#include <aux/strsub.hpp>
 #ifdef ARB_MPI_ENABLED
-#include "with_mpi.hpp"
+#include <aux/with_mpi.hpp>
 #endif
 
-#include "hardware/gpu.hpp"
-#include "hardware/node_info.hpp"
-#include "io/exporter_spike_file.hpp"
-#include "util/ioutil.hpp"
-
-#include "partitioner.hpp"
 #include "io.hpp"
 
 using namespace arb;
 
-void banner(hw::node_info, const distributed_context*);
+void banner(proc_allocation, const distributed_context*);
 
 // Samples m unique values in interval [start, end) - gid.
 // We exclude gid because we don't want self-loops.
@@ -186,9 +186,6 @@ private:
     int seed_;
 };
 
-using util::any_cast;
-using util::make_span;
-
 int main(int argc, char** argv) {
     distributed_context context;
 
@@ -199,12 +196,10 @@ int main(int argc, char** argv) {
 #endif
         arb::profile::meter_manager meters(&context);
         meters.start();
-        std::cout << util::mask_stream(context.id()==0);
+        std::cout << aux::mask_stream(context.id()==0);
         // read parameters
         io::cl_options options = io::read_options(argc, argv, context.id()==0);
-        hw::node_info nd;
-        nd.num_cpu_cores = arb::num_threads();
-        nd.num_gpus = hw::num_gpus()>0? 1: 0;
+        proc_allocation nd = local_allocation();
         banner(nd, &context);
 
         meters.checkpoint("setup");
@@ -240,37 +235,31 @@ int main(int argc, char** argv) {
 
         brunel_recipe recipe(nexc, ninh, next, in_degree_prop, w, d, rel_inh_strength, poiss_lambda, seed);
 
-        auto register_exporter = [] (const io::cl_options& options) {
-            return std::make_unique<io::exporter_spike_file>
-                       (options.file_name, options.output_path,
-                        options.file_extension, options.over_write);
-        };
+        partition_hint_map hints;
+        hints[cell_kind::lif_neuron].cpu_group_size = group_size;
+        auto decomp = partition_load_balance(recipe, nd, &context, hints);
 
-        auto decomp = decompose(recipe, group_size, &context);
         simulation sim(recipe, decomp, &context);
 
         // Initialize the spike exporting interface
-        std::unique_ptr<io::exporter_spike_file> file_exporter;
+        std::fstream spike_out;
         if (options.spike_file_output) {
-            if (options.single_file_per_rank) {
-                file_exporter = register_exporter(options);
+            using std::ios_base;
 
-                sim.set_local_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                        file_exporter->output(spikes);
-                    }
-                );
+            auto rank = context.id();
+            aux::path p = options.output_path;
+            p /= aux::strsub("%_%.%", options.file_name, rank, options.file_extension);
+
+            if (options.single_file_per_rank) {
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_local_spike_callback(aux::spike_emitter(spike_out));
             }
-            else if(context.id()==0) {
-                file_exporter = register_exporter(options);
-
-                sim.set_global_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                        file_exporter->output(spikes);
-                    }
-                );
+            else if (rank==0) {
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_global_spike_callback(aux::spike_emitter(spike_out));
             }
         }
+
         meters.checkpoint("model-init");
 
         // run simulation
@@ -293,7 +282,7 @@ int main(int argc, char** argv) {
     }
     catch (io::usage_error& e) {
         // only print usage/startup errors on master
-        std::cerr << util::mask_stream(context.id()==0);
+        std::cerr << aux::mask_stream(context.id()==0);
         std::cerr << e.what() << "\n";
         return 1;
     }
@@ -304,12 +293,12 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner(hw::node_info nd, const distributed_context* ctx) {
+void banner(proc_allocation nd, const distributed_context* ctx) {
     std::cout << "==========================================\n";
     std::cout << "  Arbor miniapp\n";
     std::cout << "  - distributed : " << ctx->size()
               << " (" << ctx->name() << ")\n";
-    std::cout << "  - threads     : " << nd.num_cpu_cores
+    std::cout << "  - threads     : " << nd.num_threads
               << " (" << arb::thread_implementation() << ")\n";
     std::cout << "  - gpus        : " << nd.num_gpus << "\n";
     std::cout << "==========================================\n";
diff --git a/example/brunel/io.cpp b/example/brunel/io.cpp
index 82624cd7bc9642c69d94ec8504fa118a21a1c03b..540cc559526f6181a58f53a6ffce09b8bca88513 100644
--- a/example/brunel/io.cpp
+++ b/example/brunel/io.cpp
@@ -8,6 +8,7 @@
 #include <type_traits>
 
 #include <tclap/CmdLine.h>
+
 #include <arbor/util/optional.hpp>
 
 #include "io.hpp"
@@ -32,176 +33,176 @@ namespace arb {
             return I;
         }
     }
-
-    namespace io {
-        // Override annoying parameters listed back-to-front behaviour.
-        //
-        // TCLAP argument creation _prepends_ its arguments to the internal
-        // list (_argList), where standard options --help etc. are already
-        // pre-inserted.
-        //
-        // reorder_arguments() reverses the arguments to restore ordering,
-        // and moves the standard options to the end.
-        class CustomCmdLine: public TCLAP::CmdLine {
-        public:
-            CustomCmdLine(const std::string &message, const std::string &version = "none"):
-            TCLAP::CmdLine(message, ' ', version, true)
-            {}
-
-            void reorder_arguments() {
-                _argList.reverse();
-                for (auto opt: {"help", "version", "ignore_rest"}) {
-                    auto i = std::find_if(
-                                          _argList.begin(), _argList.end(),
-                                          [&opt](TCLAP::Arg* a) { return a->getName()==opt; });
-
-                    if (i!=_argList.end()) {
-                        auto a = *i;
-                        _argList.erase(i);
-                        _argList.push_back(a);
-                    }
+}
+
+namespace io {
+    // Override annoying parameters listed back-to-front behaviour.
+    //
+    // TCLAP argument creation _prepends_ its arguments to the internal
+    // list (_argList), where standard options --help etc. are already
+    // pre-inserted.
+    //
+    // reorder_arguments() reverses the arguments to restore ordering,
+    // and moves the standard options to the end.
+    class CustomCmdLine: public TCLAP::CmdLine {
+    public:
+        CustomCmdLine(const std::string &message, const std::string &version = "none"):
+        TCLAP::CmdLine(message, ' ', version, true)
+        {}
+
+        void reorder_arguments() {
+            _argList.reverse();
+            for (auto opt: {"help", "version", "ignore_rest"}) {
+                auto i = std::find_if(
+                                      _argList.begin(), _argList.end(),
+                                      [&opt](TCLAP::Arg* a) { return a->getName()==opt; });
+
+                if (i!=_argList.end()) {
+                    auto a = *i;
+                    _argList.erase(i);
+                    _argList.push_back(a);
                 }
             }
-        };
+        }
+    };
 
-        // Update an option value from command line argument if set.
-        template <
+    // Update an option value from command line argument if set.
+    template <
         typename T,
         typename Arg,
         typename = std::enable_if_t<std::is_base_of<TCLAP::Arg, Arg>::value>
-        >
-        static void update_option(T& opt, Arg& arg) {
-            if (arg.isSet()) {
-                opt = arg.getValue();
-            }
+    >
+    static void update_option(T& opt, Arg& arg) {
+        if (arg.isSet()) {
+            opt = arg.getValue();
         }
+    }
 
-        // Read options from (optional) json file and command line arguments.
-        cl_options read_options(int argc, char** argv, bool allow_write) {
-            cl_options options;
-            std::string save_file = "";
-
-            // Parse command line arguments.
-            try {
-                cl_options defopts;
-
-                CustomCmdLine cmd("nest brunel miniapp harness", "0.1");
-
-                TCLAP::ValueArg<uint32_t> nexc_arg
-                    ("n", "n-excitatory", "total number of cells in the excitatory population",
-                     false, defopts.nexc, "integer", cmd);
-
-                TCLAP::ValueArg<uint32_t> ninh_arg
-                    ("m", "n-inhibitory", "total number of cells in the inhibitory population",
-                     false, defopts.ninh, "integer", cmd);
-
-                TCLAP::ValueArg<uint32_t> next_arg
-                    ("e", "n-external", "total number of incoming Poisson (external) connections per cell.",
-                     false, defopts.ninh, "integer", cmd);
-
-                TCLAP::ValueArg<double> syn_prop_arg
-                    ("p", "in-degree-prop", "the proportion of connections both the excitatory and inhibitory populations that each neuron receives",
-                     false, defopts.syn_per_cell_prop, "double", cmd);
-
-                TCLAP::ValueArg<float> weight_arg
-                    ("w", "weight", "the weight of all excitatory connections",
-                     false, defopts.weight, "float", cmd);
-
-                TCLAP::ValueArg<float> delay_arg
-                    ("d", "delay", "the delay of all connections",
-                     false, defopts.delay, "float", cmd);
-
-                TCLAP::ValueArg<float> rel_inh_strength_arg
-                    ("g", "rel-inh-w", "relative strength of inhibitory synapses with respect to the excitatory ones",
-                     false, defopts.rel_inh_strength, "float", cmd);
-
-                TCLAP::ValueArg<double> poiss_lambda_arg
-                    ("l", "lambda", "Expected number of spikes from a single poisson cell per ms",
-                     false, defopts.poiss_lambda, "double", cmd);
-
-                TCLAP::ValueArg<double> tfinal_arg
-                    ("t", "tfinal", "length of the simulation period [ms]",
-                     false, defopts.tfinal, "time", cmd);
-
-                TCLAP::ValueArg<double> dt_arg
-                    ("s", "delta-t", "simulation time step [ms] (this parameter is ignored)",
-                     false, defopts.dt, "time", cmd);
-
-                TCLAP::ValueArg<uint32_t> group_size_arg
-                    ("G", "group-size", "number of cells per cell group",
-                     false, defopts.group_size, "integer", cmd);
-
-                TCLAP::ValueArg<uint32_t> seed_arg
-                    ("S", "seed", "seed for poisson spike generators",
-                     false, defopts.seed, "integer", cmd);
-
-                TCLAP::SwitchArg spike_output_arg
-                    ("f","spike-file-output","save spikes to file", cmd, false);
-
-                TCLAP::SwitchArg profile_only_zero_arg
-                    ("z", "profile-only-zero", "Only output profile information for rank 0",
-                     cmd, false);
-
-                TCLAP::SwitchArg verbose_arg
-                    ("v", "verbose", "Present more verbose information to stdout", cmd, false);
-
-                cmd.reorder_arguments();
-                cmd.parse(argc, argv);
-
-                // Handle verbosity separately from other options: it is not considered part
-                // of the saved option state.
-                options.verbose = verbose_arg.getValue();
-                update_option(options.nexc, nexc_arg);
-                update_option(options.ninh, ninh_arg);
-                update_option(options.next, next_arg);
-                update_option(options.syn_per_cell_prop, syn_prop_arg);
-                update_option(options.weight, weight_arg);
-                update_option(options.delay, delay_arg);
-                update_option(options.rel_inh_strength, rel_inh_strength_arg);
-                update_option(options.poiss_lambda, poiss_lambda_arg);
-                update_option(options.tfinal, tfinal_arg);
-                update_option(options.dt, dt_arg);
-                update_option(options.group_size, group_size_arg);
-                update_option(options.seed, seed_arg);
-                update_option(options.spike_file_output, spike_output_arg);
-                update_option(options.profile_only_zero, profile_only_zero_arg);
-
-                if (options.group_size < 1) {
-                    throw usage_error("minimum of one cell per group");
-                }
-
-                if (options.rel_inh_strength <= 0 || options.rel_inh_strength > 1) {
-                    throw usage_error("relative strength of inhibitory connections must be in the interval (0, 1].");
-                }
-            }
-            catch (TCLAP::ArgException& e) {
-                throw usage_error("error parsing command line argument "+e.argId()+": "+e.error());
+    // Read options from (optional) json file and command line arguments.
+    cl_options read_options(int argc, char** argv, bool allow_write) {
+        cl_options options;
+        std::string save_file = "";
+
+        // Parse command line arguments.
+        try {
+            cl_options defopts;
+
+            CustomCmdLine cmd("nest brunel miniapp harness", "0.1");
+
+            TCLAP::ValueArg<uint32_t> nexc_arg
+                ("n", "n-excitatory", "total number of cells in the excitatory population",
+                 false, defopts.nexc, "integer", cmd);
+
+            TCLAP::ValueArg<uint32_t> ninh_arg
+                ("m", "n-inhibitory", "total number of cells in the inhibitory population",
+                 false, defopts.ninh, "integer", cmd);
+
+            TCLAP::ValueArg<uint32_t> next_arg
+                ("e", "n-external", "total number of incoming Poisson (external) connections per cell.",
+                 false, defopts.ninh, "integer", cmd);
+
+            TCLAP::ValueArg<double> syn_prop_arg
+                ("p", "in-degree-prop", "the proportion of connections both the excitatory and inhibitory populations that each neuron receives",
+                 false, defopts.syn_per_cell_prop, "double", cmd);
+
+            TCLAP::ValueArg<float> weight_arg
+                ("w", "weight", "the weight of all excitatory connections",
+                 false, defopts.weight, "float", cmd);
+
+            TCLAP::ValueArg<float> delay_arg
+                ("d", "delay", "the delay of all connections",
+                 false, defopts.delay, "float", cmd);
+
+            TCLAP::ValueArg<float> rel_inh_strength_arg
+                ("g", "rel-inh-w", "relative strength of inhibitory synapses with respect to the excitatory ones",
+                 false, defopts.rel_inh_strength, "float", cmd);
+
+            TCLAP::ValueArg<double> poiss_lambda_arg
+                ("l", "lambda", "Expected number of spikes from a single poisson cell per ms",
+                 false, defopts.poiss_lambda, "double", cmd);
+
+            TCLAP::ValueArg<double> tfinal_arg
+                ("t", "tfinal", "length of the simulation period [ms]",
+                 false, defopts.tfinal, "time", cmd);
+
+            TCLAP::ValueArg<double> dt_arg
+                ("s", "delta-t", "simulation time step [ms] (this parameter is ignored)",
+                 false, defopts.dt, "time", cmd);
+
+            TCLAP::ValueArg<uint32_t> group_size_arg
+                ("G", "group-size", "number of cells per cell group",
+                 false, defopts.group_size, "integer", cmd);
+
+            TCLAP::ValueArg<uint32_t> seed_arg
+                ("S", "seed", "seed for poisson spike generators",
+                 false, defopts.seed, "integer", cmd);
+
+            TCLAP::SwitchArg spike_output_arg
+                ("f","spike-file-output","save spikes to file", cmd, false);
+
+            TCLAP::SwitchArg profile_only_zero_arg
+                ("z", "profile-only-zero", "Only output profile information for rank 0",
+                 cmd, false);
+
+            TCLAP::SwitchArg verbose_arg
+                ("v", "verbose", "Present more verbose information to stdout", cmd, false);
+
+            cmd.reorder_arguments();
+            cmd.parse(argc, argv);
+
+            // Handle verbosity separately from other options: it is not considered part
+            // of the saved option state.
+            options.verbose = verbose_arg.getValue();
+            update_option(options.nexc, nexc_arg);
+            update_option(options.ninh, ninh_arg);
+            update_option(options.next, next_arg);
+            update_option(options.syn_per_cell_prop, syn_prop_arg);
+            update_option(options.weight, weight_arg);
+            update_option(options.delay, delay_arg);
+            update_option(options.rel_inh_strength, rel_inh_strength_arg);
+            update_option(options.poiss_lambda, poiss_lambda_arg);
+            update_option(options.tfinal, tfinal_arg);
+            update_option(options.dt, dt_arg);
+            update_option(options.group_size, group_size_arg);
+            update_option(options.seed, seed_arg);
+            update_option(options.spike_file_output, spike_output_arg);
+            update_option(options.profile_only_zero, profile_only_zero_arg);
+
+            if (options.group_size < 1) {
+                throw usage_error("minimum of one cell per group");
             }
 
-            // If verbose output requested, emit option summary.
-            if (options.verbose) {
-                std::cout << options << "\n";
+            if (options.rel_inh_strength <= 0 || options.rel_inh_strength > 1) {
+                throw usage_error("relative strength of inhibitory connections must be in the interval (0, 1].");
             }
-
-            return options;
+        }
+        catch (TCLAP::ArgException& e) {
+            throw usage_error("error parsing command line argument "+e.argId()+": "+e.error());
         }
 
-        std::ostream& operator<<(std::ostream& o, const cl_options& options) {
-            o << "simulation options:\n";
-            o << "  excitatory cells                                           : " << options.nexc << "\n";
-            o << "  inhibitory cells                                           : " << options.ninh << "\n";
-            o << "  Poisson connections per cell                               : " << options.next << "\n";
-            o << "  proportion of synapses/cell from each population           : " << options.syn_per_cell_prop << "\n";
-            o << "  weight of excitatory synapses                              : " << options.weight << "\n";
-            o << "  relative strength of inhibitory synapses                   : " << options.rel_inh_strength << "\n";
-            o << "  delay of all synapses                                      : " << options.delay << "\n";
-            o << "  expected number of spikes from a single poisson cell per ms: " << options.poiss_lambda << "\n";
-            o << "\n";
-            o << "  simulation time                                            : " << options.tfinal << "\n";
-            o << "  dt                                                         : " << options.dt << "\n";
-            o << "  group size                                                 : " << options.group_size << "\n";
-            o << "  seed                                                       : " << options.seed << "\n";
-            return o;
+        // If verbose output requested, emit option summary.
+        if (options.verbose) {
+            std::cout << options << "\n";
         }
-    } // namespace io
-} // namespace arbor
+
+        return options;
+    }
+
+    std::ostream& operator<<(std::ostream& o, const cl_options& options) {
+        o << "simulation options:\n";
+        o << "  excitatory cells                                           : " << options.nexc << "\n";
+        o << "  inhibitory cells                                           : " << options.ninh << "\n";
+        o << "  Poisson connections per cell                               : " << options.next << "\n";
+        o << "  proportion of synapses/cell from each population           : " << options.syn_per_cell_prop << "\n";
+        o << "  weight of excitatory synapses                              : " << options.weight << "\n";
+        o << "  relative strength of inhibitory synapses                   : " << options.rel_inh_strength << "\n";
+        o << "  delay of all synapses                                      : " << options.delay << "\n";
+        o << "  expected number of spikes from a single poisson cell per ms: " << options.poiss_lambda << "\n";
+        o << "\n";
+        o << "  simulation time                                            : " << options.tfinal << "\n";
+        o << "  dt                                                         : " << options.dt << "\n";
+        o << "  group size                                                 : " << options.group_size << "\n";
+        o << "  seed                                                       : " << options.seed << "\n";
+        return o;
+    }
+} // namespace io
diff --git a/example/brunel/io.hpp b/example/brunel/io.hpp
index deeedb5fd4b291f578e2f7ed1f4650aaf2c8248f..370e02acf6b8918ba5c6e43623ad500385ca4fc3 100644
--- a/example/brunel/io.hpp
+++ b/example/brunel/io.hpp
@@ -8,7 +8,6 @@
 #include <arbor/common_types.hpp>
 #include <arbor/util/optional.hpp>
 
-namespace arb {
 namespace io {
     // Holds the options for a simulation run.
     // Default constructor gives default options.
@@ -60,4 +59,3 @@ namespace io {
 
     cl_options read_options(int argc, char** argv, bool allow_write = true);
 } // namespace io
-} // namespace arbor
diff --git a/example/brunel/partitioner.hpp b/example/brunel/partitioner.hpp
deleted file mode 100644
index cd4d383d37eef21743e8d9ddc532a4cec5d7a825..0000000000000000000000000000000000000000
--- a/example/brunel/partitioner.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <arbor/distributed_context.hpp>
-#include <arbor/domain_decomposition.hpp>
-#include <arbor/recipe.hpp>
-
-#include "hardware/node_info.hpp"
-#include "util/partition.hpp"
-#include "util/span.hpp"
-#include "util/transform.hpp"
-
-namespace arb {
-    static
-    domain_decomposition decompose(const recipe& rec, const unsigned group_size, const distributed_context* ctx) {
-        struct partition_gid_domain {
-            partition_gid_domain(std::vector<cell_gid_type> divs):
-                gid_divisions(std::move(divs))
-            {}
-
-            int operator()(cell_gid_type gid) const {
-                auto gid_part = util::partition_view(gid_divisions);
-                return gid_part.index(gid);
-            }
-
-            const std::vector<cell_gid_type> gid_divisions;
-        };
-
-        cell_size_type num_global_cells = rec.num_cells();
-        unsigned num_domains = ctx->size();
-        int domain_id = ctx->id();
-
-        auto dom_size = [&](unsigned dom) -> cell_gid_type {
-            const cell_gid_type B = num_global_cells/num_domains;
-            const cell_gid_type R = num_global_cells - num_domains*B;
-            return B + (dom<R);
-        };
-
-        // Global load balance
-        std::vector<cell_gid_type> gid_divisions;
-        auto gid_part = make_partition(
-            gid_divisions, util::transform_view(util::make_span(num_domains), dom_size));
-
-        auto range = gid_part[domain_id];
-        cell_size_type num_local_cells = range.second - range.first;
-
-        unsigned num_groups = num_local_cells / group_size + (num_local_cells%group_size== 0 ? 0 : 1);
-        std::vector<group_description> groups;
-
-        // Local load balance
-        // i.e. all the groups that the current rank (domain) owns
-        for (unsigned i = 0; i < num_groups; ++i) {
-            unsigned start = i * group_size;
-            unsigned end = std::min(start + group_size, num_local_cells);
-            std::vector<cell_gid_type> group_elements;
-
-            for (unsigned j = start; j < end; ++j) {
-                group_elements.push_back(j);
-            }
-
-            groups.push_back({cell_kind::lif_neuron, std::move(group_elements), backend_kind::multicore});
-        }
-
-        domain_decomposition d;
-        d.num_domains = num_domains;
-        d.domain_id = domain_id;
-        d.num_local_cells = num_local_cells;
-        d.num_global_cells = num_global_cells;
-        d.groups = std::move(groups);
-        d.gid_domain = partition_gid_domain(std::move(gid_divisions));
-
-        return d;
-    }
-}
diff --git a/example/generators/CMakeLists.txt b/example/generators/CMakeLists.txt
index 7235b1047a6b13f8a43e1cef1f772f9b273f2ef2..bc2a70b58edc2321cd657e12d0cc89c11f677830 100644
--- a/example/generators/CMakeLists.txt
+++ b/example/generators/CMakeLists.txt
@@ -1,6 +1,3 @@
 add_executable(event-gen event_gen.cpp)
 
 target_link_libraries(event-gen PRIVATE arbor arbor-aux ext-json)
-
-# TODO: resolve public headers
-target_link_libraries(event-gen PRIVATE arbor-private-headers)
diff --git a/example/generators/event_gen.cpp b/example/generators/event_gen.cpp
index 22607137bf164242b1996c4cc2c7b5981ca352b0..f3c577c3044ed12b8815aa772e20a03825a58c2e 100644
--- a/example/generators/event_gen.cpp
+++ b/example/generators/event_gen.cpp
@@ -14,15 +14,14 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/event_generator.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
-
 using arb::cell_gid_type;
 using arb::cell_lid_type;
 using arb::cell_size_type;
@@ -135,7 +134,7 @@ int main() {
     generator_recipe recipe;
 
     // Make the domain decomposition for the model
-    auto node = arb::hw::get_node_info();
+    auto node = arb::local_allocation();
     auto decomp = arb::partition_load_balance(recipe, node, &context);
 
     // Construct the model.
diff --git a/example/miniapp/CMakeLists.txt b/example/miniapp/CMakeLists.txt
index f2d8e9bd6aa1bfd9d5ae489aa7e788a04f06be97..84480623b65fc1db4c1ffe7ef2c88bce718a43dd 100644
--- a/example/miniapp/CMakeLists.txt
+++ b/example/miniapp/CMakeLists.txt
@@ -7,6 +7,3 @@ add_executable(miniapp
 )
 
 target_link_libraries(miniapp PRIVATE arbor arbor-aux ext-tclap ext-json)
-
-# TODO: resolve public headers
-target_link_libraries(miniapp PRIVATE arbor-private-headers)
diff --git a/example/miniapp/io.hpp b/example/miniapp/io.hpp
index f9e89f22ca377752828c242baae210c7fb2573db..8d300931d6134ea6244b60593567e6b6ab025ddb 100644
--- a/example/miniapp/io.hpp
+++ b/example/miniapp/io.hpp
@@ -9,8 +9,7 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/util/optional.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 // TODO: this shouldn't be in arb namespace
 namespace arb {
diff --git a/example/miniapp/miniapp.cpp b/example/miniapp/miniapp.cpp
index c1095b08c8dc3929b1599f4d2cff9339c5d0eeb4..e384a01523b5309a4986f6dd8a30987cb462df71 100644
--- a/example/miniapp/miniapp.cpp
+++ b/example/miniapp/miniapp.cpp
@@ -7,6 +7,7 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/profile/meter_manager.hpp>
 #include <arbor/profile/profiler.hpp>
@@ -17,15 +18,14 @@
 #include <arbor/util/any.hpp>
 #include <arbor/version.hpp>
 
-#include "hardware/gpu.hpp"
-#include "hardware/node_info.hpp"
-#include "io/exporter_spike_file.hpp"
-#include "load_balance.hpp"
-#include "util/ioutil.hpp"
 
-#include "json_meter.hpp"
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include <aux/path.hpp>
+#include <aux/spike_emitter.hpp>
+#include <aux/strsub.hpp>
 #ifdef ARB_MPI_ENABLED
-#include "with_mpi.hpp"
+#include <aux/with_mpi.hpp>
 #endif
 
 #include "io.hpp"
@@ -36,10 +36,10 @@ using namespace arb;
 
 using util::any_cast;
 
-void banner(hw::node_info, const distributed_context*);
+void banner(proc_allocation, const distributed_context*);
 std::unique_ptr<recipe> make_recipe(const io::cl_options&, const probe_distribution&);
 sample_trace make_trace(const probe_info& probe);
-
+std::fstream& open_or_throw(std::fstream& file, const aux::path& p, bool exclusive = false);
 void report_compartment_stats(const recipe&);
 
 int main(int argc, char** argv) {
@@ -55,7 +55,7 @@ int main(int argc, char** argv) {
         profile::meter_manager meters(&context);
         meters.start();
 
-        std::cout << util::mask_stream(context.id()==0);
+        std::cout << aux::mask_stream(context.id()==0);
         // read parameters
         io::cl_options options = io::read_options(argc, argv, context.id()==0);
 
@@ -63,9 +63,8 @@ int main(int argc, char** argv) {
 
         // Use a node description that uses the number of threads used by the
         // threading back end, and 1 gpu if available.
-        hw::node_info nd;
-        nd.num_cpu_cores = arb::num_threads();
-        nd.num_gpus = hw::num_gpus()>0? 1: 0;
+        proc_allocation nd = local_allocation();
+        nd.num_gpus = nd.num_gpus>=1? 1: 0;
         banner(nd, &context);
 
         meters.checkpoint("setup");
@@ -80,13 +79,6 @@ int main(int argc, char** argv) {
             report_compartment_stats(*recipe);
         }
 
-        auto register_exporter = [] (const io::cl_options& options) {
-            return
-                std::make_unique<io::exporter_spike_file>(
-                    options.file_name, options.output_path,
-                    options.file_extension, options.over_write);
-        };
-
         auto decomp = partition_load_balance(*recipe, nd, &context);
         simulation sim(*recipe, decomp, &context);
 
@@ -121,21 +113,21 @@ int main(int argc, char** argv) {
         sim.set_binning_policy(binning_policy, options.bin_dt);
 
         // Initialize the spike exporting interface
-        std::unique_ptr<io::exporter_spike_file> file_exporter;
+        std::fstream spike_out;
         if (options.spike_file_output) {
+            using std::ios_base;
+
+            auto rank = context.id();
+            aux::path p = options.output_path;
+            p /= aux::strsub("%_%.%", options.file_name, rank, options.file_extension);
+
             if (options.single_file_per_rank) {
-                file_exporter = register_exporter(options);
-                sim.set_local_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                        file_exporter->output(spikes);
-                    });
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_local_spike_callback(aux::spike_emitter(spike_out));
             }
-            else if(context.id()==0) {
-                file_exporter = register_exporter(options);
-                sim.set_global_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                       file_exporter->output(spikes);
-                    });
+            else if (rank==0) {
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_global_spike_callback(aux::spike_emitter(spike_out));
             }
         }
 
@@ -168,7 +160,7 @@ int main(int argc, char** argv) {
     }
     catch (io::usage_error& e) {
         // only print usage/startup errors on master
-        std::cerr << util::mask_stream(context.id()==0);
+        std::cerr << aux::mask_stream(context.id()==0);
         std::cerr << e.what() << "\n";
         return 1;
     }
@@ -179,12 +171,12 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner(hw::node_info nd, const distributed_context* ctx) {
+void banner(proc_allocation nd, const distributed_context* ctx) {
     std::cout << "==========================================\n";
     std::cout << "  Arbor miniapp\n";
     std::cout << "  - distributed : " << ctx->size()
               << " (" << ctx->name() << ")\n";
-    std::cout << "  - threads     : " << nd.num_cpu_cores
+    std::cout << "  - threads     : " << nd.num_threads
               << " (" << arb::thread_implementation() << ")\n";
     std::cout << "  - gpus        : " << nd.num_gpus << "\n";
     std::cout << "==========================================\n";
@@ -258,3 +250,4 @@ void report_compartment_stats(const recipe& rec) {
 
     std::cout << "compartments/cell: min=" << ncomp_min <<"; max=" << ncomp_max << "; mean=" << (double)ncomp_total/ncell << "\n";
 }
+
diff --git a/example/miniapp/morphology_pool.cpp b/example/miniapp/morphology_pool.cpp
index 4d3937e4fc5e78058f55ab88952e0497a7cb7961..8515f20e03ad25aa9440c2bd0d9f5a75638869a7 100644
--- a/example/miniapp/morphology_pool.cpp
+++ b/example/miniapp/morphology_pool.cpp
@@ -3,9 +3,10 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
+#include <arbor/swcio.hpp>
 
-#include "swcio.hpp"
-#include "util/path.hpp"
+#include <aux/glob.hpp>
+#include <aux/path.hpp>
 
 #include "morphology_pool.hpp"
 
@@ -30,22 +31,22 @@ static morphology make_basic_y_morphology() {
 
 morphology_pool default_morphology_pool(make_basic_y_morphology());
 
-void load_swc_morphology(morphology_pool& pool, const util::path& swc_path) {
+void load_swc_morphology(morphology_pool& pool, const aux::path& swc_path) {
     std::ifstream fi;
     fi.exceptions(std::ifstream::failbit);
 
     fi.open(swc_path.c_str());
-    pool.insert(io::swc_as_morphology(io::parse_swc_file(fi)));
+    pool.insert(swc_as_morphology(parse_swc_file(fi)));
 }
 
 void load_swc_morphology_glob(morphology_pool& pool, const std::string& swc_pattern) {
     std::ifstream fi;
     fi.exceptions(std::ifstream::failbit);
 
-    auto swc_paths = util::glob(swc_pattern);
+    auto swc_paths = aux::glob(swc_pattern);
     for (const auto& p: swc_paths) {
         fi.open(p.c_str());
-        pool.insert(io::swc_as_morphology(io::parse_swc_file(fi)));
+        pool.insert(swc_as_morphology(parse_swc_file(fi)));
         pool[pool.size()-1].assert_valid();
         fi.close();
     }
diff --git a/example/miniapp/morphology_pool.hpp b/example/miniapp/morphology_pool.hpp
index a7b46230bd34635ab20cf709c630ade7242ef87f..31567c0bd99e45706ef1152fa12ca8b478437917 100644
--- a/example/miniapp/morphology_pool.hpp
+++ b/example/miniapp/morphology_pool.hpp
@@ -9,8 +9,7 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 namespace arb {
 
@@ -35,7 +34,7 @@ public:
 
 extern morphology_pool default_morphology_pool;
 
-void load_swc_morphology(morphology_pool& pool, const util::path& swc_path);
+void load_swc_morphology(morphology_pool& pool, const aux::path& swc_path);
 void load_swc_morphology_glob(morphology_pool& pool, const std::string& pattern);
 
 } // namespace arb
diff --git a/example/miniapp/trace.cpp b/example/miniapp/trace.cpp
index 5fc878982bcb919b0f7e6c3b13f0bbeaa06ea3a8..39db713883c110fca91f520c21bf9ad2feef2988 100644
--- a/example/miniapp/trace.cpp
+++ b/example/miniapp/trace.cpp
@@ -1,12 +1,11 @@
 #include <fstream>
+#include <iomanip>
 #include <string>
 
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
 
-#include "util/strprintf.hpp"
-
 #include "trace.hpp"
 
 using namespace arb;
@@ -23,8 +22,10 @@ void write_trace_csv(const sample_trace& trace, const std::string& prefix) {
     file << "# probe: " << trace.probe_id.index << "\n";
     file << "time_ms, " << trace.name << "_" << trace.units << "\n";
 
+    file.precision(15);
+    file << std::fixed;
     for (const auto& sample: trace.samples) {
-        file << util::strprintf("% 20.15f, % 20.15f\n", sample.t, sample.v);
+        file << std::setw(20) << sample.t << ',' << std::setw(20) << sample.v << '\n';
     }
 }
 
diff --git a/include/arbor/domain_decomposition.hpp b/include/arbor/domain_decomposition.hpp
index aecf5a83d287200718d04695328a71358c22604a..61f1710bee94c4d5b08ee2f04788fc21bd8661d1 100644
--- a/include/arbor/domain_decomposition.hpp
+++ b/include/arbor/domain_decomposition.hpp
@@ -9,14 +9,16 @@
 
 namespace arb {
 
-inline bool has_gpu_backend(cell_kind k) {
-    if (k==cell_kind::cable1d_neuron) {
-        return true;
-    }
-    return false;
-}
+/// Local resource info for domain partitioning.
+struct proc_allocation {
+    unsigned num_threads = 1;
+    unsigned num_gpus = 0;
+};
+
+/// Determine available local domain resources.
+proc_allocation local_allocation();
 
-/// Meta data for a local cell group.
+/// Metadata for a local cell group.
 struct group_description {
     /// The kind of cell in the group. All cells in a cell_group have the same type.
     const cell_kind kind;
diff --git a/include/arbor/event_generator.hpp b/include/arbor/event_generator.hpp
index e18290d7d28de9c95c1c7f74d817b579f0166122..02ad77b8d454a4f8b4e19ab48b04745ca893667c 100644
--- a/include/arbor/event_generator.hpp
+++ b/include/arbor/event_generator.hpp
@@ -1,9 +1,11 @@
 #pragma once
 
+#include <algorithm>
 #include <cstdint>
 #include <memory>
 #include <random>
 
+#include <arbor/assert.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/generic_event.hpp>
 #include <arbor/spike_event.hpp>
diff --git a/include/arbor/load_balance.hpp b/include/arbor/load_balance.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8235da030976565dbea850e18c013116a10473e9
--- /dev/null
+++ b/include/arbor/load_balance.hpp
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/recipe.hpp>
+
+namespace arb {
+
+struct partition_hint {
+    constexpr static std::size_t max_size = -1;
+
+    std::size_t cpu_group_size = 1;
+    std::size_t gpu_group_size = max_size;
+    bool prefer_gpu = true;
+};
+
+using partition_hint_map = std::unordered_map<cell_kind, partition_hint>;
+
+domain_decomposition partition_load_balance(
+    const recipe& rec,
+    proc_allocation nd,
+    const distributed_context* ctx,
+    partition_hint_map hint_map = {});
+
+} // namespace arb
diff --git a/arbor/math.hpp b/include/arbor/math.hpp
similarity index 95%
rename from arbor/math.hpp
rename to include/arbor/math.hpp
index 6f538cf058a2a807bfe6c22fd4d8c362ea57d9e4..d0b2460065de85fca09c43b0f79a3bd51f0a9970 100644
--- a/arbor/math.hpp
+++ b/include/arbor/math.hpp
@@ -114,18 +114,6 @@ C round_up(T v, U b) {
     return v-m+signum(m)*impl::abs_if_signed(b, Signed{});
 }
 
-// Return minimum of the two values
-template <typename T>
-T min(const T& lhs, const T& rhs) {
-    return lhs<rhs? lhs: rhs;
-}
-
-// Return maximum of the two values
-template <typename T>
-T max(const T& lhs, const T& rhs) {
-    return lhs<rhs? rhs: lhs;
-}
-
 // Value of x/(exp(x)-1) with care taken to handle x=0 case
 template <typename T>
 inline
diff --git a/include/arbor/swcio.hpp b/include/arbor/swcio.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..50003bd1180db7023ba1a50000a67b5e0107907b
--- /dev/null
+++ b/include/arbor/swcio.hpp
@@ -0,0 +1,110 @@
+#pragma once
+
+#include <exception>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <arbor/assert.hpp>
+#include <arbor/arbexcept.hpp>
+#include <arbor/morphology.hpp>
+#include <arbor/point.hpp>
+
+namespace arb {
+
+struct swc_error: public arbor_exception {
+    explicit swc_error(const std::string& msg, unsigned line_number = 0):
+        arbor_exception(msg), line_number(line_number)
+    {}
+    unsigned line_number;
+};
+
+class swc_record {
+public:
+    using id_type = int;
+    using coord_type = double;
+
+    // More on SWC files: http://research.mssm.edu/cnic/swc.html
+    enum class kind {
+        undefined = 0,
+        soma,
+        axon,
+        dendrite,
+        apical_dendrite,
+        fork_point,
+        end_point,
+        custom
+    };
+
+    kind type = kind::undefined; // record type
+    id_type id = 0;              // record id
+    coord_type x = 0;            // record coordinates
+    coord_type y = 0;
+    coord_type z = 0;
+    coord_type r = 0;            // record radius
+    id_type parent_id= -1;      // record parent's id
+
+    // swc records assume zero-based indexing; root's parent remains -1
+    swc_record(swc_record::kind type, int id,
+               coord_type x, coord_type y, coord_type z, coord_type r,
+               int parent_id):
+        type(type), id(id), x(x), y(y), z(z), r(r), parent_id(parent_id)
+    {}
+
+    swc_record() = default;
+    swc_record(const swc_record& other) = default;
+    swc_record& operator=(const swc_record& other) = default;
+
+    bool operator==(const swc_record& other) const {
+        return id == other.id &&
+            x == other.x &&
+            y == other.y &&
+            z == other.z &&
+            r == other.r &&
+            parent_id == other.parent_id;
+    }
+
+    friend bool operator!=(const swc_record& lhs, const swc_record& rhs) {
+        return !(lhs == rhs);
+    }
+
+    friend std::ostream& operator<<(std::ostream& os, const swc_record& record);
+
+    coord_type diameter() const {
+        return 2*r;
+    }
+
+    arb::point<coord_type> coord() const {
+        return arb::point<coord_type>(x, y, z);
+    }
+
+    arb::section_point as_section_point() const {
+        return arb::section_point{x, y, z, r};
+    }
+
+    // validity checks
+    bool is_consistent() const;
+    void assert_consistent() const; // throw swc_error if inconsistent.
+};
+
+
+// Parse one record, skipping comments and blank lines.
+std::istream& operator>>(std::istream& is, swc_record& record);
+
+// Parse and canonicalize an EOF-terminated sequence of records.
+// Throw on parsing failure.
+std::vector<swc_record> parse_swc_file(std::istream& is);
+
+// Convert a canonical (see below) vector of SWC records to a morphology object.
+morphology swc_as_morphology(const std::vector<swc_record>& swc_records);
+
+// Given a vector of random-access mutable sequence of `swc_record` describing
+// a single morphology, check for consistency and renumber records
+// so that ids are contiguous within branches, have no gaps, and
+// are ordered with repect to parent indices.
+void swc_canonicalize(std::vector<swc_record>& swc_records);
+
+} // namespace arb
diff --git a/include/arbor/time_sequence.hpp b/include/arbor/time_sequence.hpp
index dff057c5d42eb52404ebc4d419d98576b81c263c..9d2d00291f8d25675cb639db06d9bbab210b9da3 100644
--- a/include/arbor/time_sequence.hpp
+++ b/include/arbor/time_sequence.hpp
@@ -7,11 +7,8 @@
 
 #include <arbor/common_types.hpp>
 
-#include "util/rangeutil.hpp"
-
 namespace arb {
 
-
 struct empty_time_seq {
     time_type front() { return terminal_time; }
     void pop() {}
diff --git a/lmorpho/lmorpho.cpp b/lmorpho/lmorpho.cpp
index b4f5492b6b722d031a5e0cefbf37c35ece05a196..892a732cadb0a45e08dcf1f09c7671a50795dac7 100644
--- a/lmorpho/lmorpho.cpp
+++ b/lmorpho/lmorpho.cpp
@@ -5,9 +5,9 @@
 #include <sstream>
 #include <vector>
 
-#include <tinyopt.hpp>
 #include <arbor/morphology.hpp>
 #include <arbor/util/optional.hpp>
+#include <aux/tinyopt.hpp>
 
 #include "morphio.hpp"
 #include "lsystem.hpp"
diff --git a/lmorpho/lsystem.cpp b/lmorpho/lsystem.cpp
index e2ceee43b5b6662cd75f795527c3f8df849e8762..ad3cf88ba0c359e5513dfe7fc00934982c71a8c2 100644
--- a/lmorpho/lsystem.cpp
+++ b/lmorpho/lsystem.cpp
@@ -5,7 +5,7 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
-#include "math.hpp"
+#include <arbor/math.hpp>
 
 #include "lsystem.hpp"
 
diff --git a/lmorpho/morphio.cpp b/lmorpho/morphio.cpp
index ee319dda24b9671d67b5284c22805af53aaa299e..e556f172c54e4c7d2654a7374e9b8ac970504009 100644
--- a/lmorpho/morphio.cpp
+++ b/lmorpho/morphio.cpp
@@ -1,18 +1,18 @@
 #include <fstream>
+#include <iomanip>
 #include <iterator>
 #include <map>
+#include <ostream>
+#include <sstream>
 #include <string>
 #include <vector>
 
 #include <arbor/morphology.hpp>
-
-#include "swcio.hpp"
-#include "util/strprintf.hpp"
+#include <arbor/swcio.hpp>
 
 #include "morphio.hpp"
 
-using arb::io::swc_record;
-using arb::util::strprintf;
+using arb::swc_record;
 
 std::vector<swc_record> as_swc(const arb::morphology& morph);
 
@@ -25,21 +25,10 @@ multi_file::multi_file(const std::string& pattern, int digits) {
     use_stdout_ = pattern.empty() || pattern=="-";
 
     if (!concat_) {
-        std::string nfmt = digits? "%0"+std::to_string(digits)+"d": "%d";
-        std::string::size_type i = 0;
-        for (;;) {
-            auto p = pattern.find("%", i);
-
-            if (p==npos) {
-                fmt_ += pattern.substr(i);
-                break;
-            }
-            else {
-                fmt_ += pattern.substr(i, p-i);
-                fmt_ += i==0? nfmt: "%%";
-                i = p+1;
-            }
-        }
+        auto p = pattern.find("%");
+        fmt_prefix_ = pattern.substr(0, p);
+        fmt_suffix_ = pattern.substr(p+1);
+        fmt_digits_ = digits;
     }
     else {
         filename_ = pattern;
@@ -53,15 +42,28 @@ void multi_file::open(unsigned n) {
 
     if (file_.is_open()) file_.close();
 
-    std::string fname = concat_? filename_: strprintf(fmt_, n);
+    std::string fname;
+    if (concat_) {
+        fname = filename_;
+    }
+    else {
+        std::stringstream ss;
+        ss << fmt_prefix_ << std::setfill('0') << std::setw(fmt_digits_) << n << fmt_suffix_;
+        fname = ss.str();
+    }
+
     file_.open(fname);
 
     current_n_ = n;
 }
 
-// SWC transform
+static std::string short_cable_message(int id, unsigned sz) {
+    std::stringstream ss;
+    ss << "surprisingly short cable: id=" << id << ", size=" << sz;
+    return ss.str();
+}
 
-using arb::io::swc_record;
+// SWC transform
 
 // TODO: Move this functionality to arbor library.
 std::vector<swc_record> as_swc(const arb::morphology& morph) {
@@ -82,7 +84,7 @@ std::vector<swc_record> as_swc(const arb::morphology& morph) {
         const auto& points = sec.points;
         auto n = points.size();
         if (n<2) {
-            throw std::runtime_error(strprintf("surprisingly short cable: id=%d, size=%ul", sec.id, n));
+            throw std::runtime_error(short_cable_message(sec.id, n));
         }
 
         // Include first point only for dendrites segments attached to soma.
@@ -140,7 +142,7 @@ std::vector<int> as_pvector(const arb::morphology& morph, unsigned offset) {
 
         auto n = sec.points.size();
         if (n<2) {
-            throw std::runtime_error(strprintf("surprisingly short cable: id=%d, size=%ul", sec.id, n));
+            throw std::runtime_error(short_cable_message(sec.id, n));
         }
 
         for (unsigned i = 1; i<n; ++i) {
diff --git a/lmorpho/morphio.hpp b/lmorpho/morphio.hpp
index 3d31dab76dfca65a7758d2fbc30de33d04f74ad9..23a2854bead20fa2f796c4b1b340abcf9fd62dcc 100644
--- a/lmorpho/morphio.hpp
+++ b/lmorpho/morphio.hpp
@@ -14,8 +14,12 @@ private:
     std::ofstream file_;
     bool concat_ = false;
     bool use_stdout_ = false;
-    std::string fmt_;       // use if not concat_
-    std::string filename_;  // use if concat_
+    // use if not concat_:
+    std::string fmt_prefix_;
+    std::string fmt_suffix_;
+    int fmt_digits_ = 0;
+    // use if concat_:
+    std::string filename_;   // use if concat_
     unsigned current_n_ = 0;
 
 public:
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index 6a0cc330e1b4e8be24361d6d4f442fda8a9790a8..0c7a6376ffd031ec1214f84801e0b7c3f61046b5 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -131,11 +131,12 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     io::pfxstringstream out;
 
     out <<
+        "#include <algorithm>\n"
         "#include <cmath>\n"
         "#include <cstddef>\n"
         "#include <memory>\n"
         "#include <" << arb_private_header_prefix() << "backends/multicore/mechanism.hpp>\n"
-        "#include <" << arb_private_header_prefix() << "math.hpp>\n";
+        "#include <" << arb_header_prefix() << "math.hpp>\n";
 
     opt.profile &&
         out << "#include <" << arb_header_prefix() << "profile/profiler.hpp>\n";
@@ -152,13 +153,13 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         "using value_type = base::value_type;\n"
         "using size_type = base::size_type;\n"
         "using index_type = base::index_type;\n"
+        "using ::arb::math::exprelr;\n"
         "using ::std::abs;\n"
         "using ::std::cos;\n"
         "using ::std::exp;\n"
-        "using ::arb::math::exprelr;\n"
         "using ::std::log;\n"
-        "using ::arb::math::max;\n"
-        "using ::arb::math::min;\n"
+        "using ::std::max;\n"
+        "using ::std::min;\n"
         "using ::std::pow;\n"
         "using ::std::sin;\n"
         "\n";
diff --git a/test/ubench/accumulate_functor_values.cpp b/test/ubench/accumulate_functor_values.cpp
index fdf6653b1512865921253278cd5dc01eebe371b7..37586820e14b76403ed4a294437f8febc340b502 100644
--- a/test/ubench/accumulate_functor_values.cpp
+++ b/test/ubench/accumulate_functor_values.cpp
@@ -10,8 +10,8 @@
 
 #include <benchmark/benchmark.h>
 
-#include <util/span.hpp>
-#include <util/transform.hpp>
+#include "util/span.hpp"
+#include "util/transform.hpp"
 
 #define NOINLINE __attribute__((noinline))
 
diff --git a/test/ubench/default_construct.cpp b/test/ubench/default_construct.cpp
index dc0614e23d5aa1ff937919108b53ebd54f927730..8dbb770ad9d1dfc85c5cfe0937b2f921b3cff4dd 100644
--- a/test/ubench/default_construct.cpp
+++ b/test/ubench/default_construct.cpp
@@ -8,7 +8,7 @@
 
 #include <benchmark/benchmark.h>
 
-#include <util/span.hpp>
+#include "util/span.hpp"
 
 using arb::util::make_span;
 
diff --git a/test/ubench/event_binning.cpp b/test/ubench/event_binning.cpp
index 1c33cfa737a08d0a4814c9e772a2ebbde68eaa90..85e78db5dddbab4d176e8bc87396c214c6b3c357 100644
--- a/test/ubench/event_binning.cpp
+++ b/test/ubench/event_binning.cpp
@@ -7,12 +7,13 @@
 #include <unordered_map>
 #include <vector>
 
+#include <benchmark/benchmark.h>
+
 #include <arbor/spike_event.hpp>
 
 #include "event_queue.hpp"
 #include "backends/event.hpp"
 
-#include <benchmark/benchmark.h>
 
 using namespace arb;
 
diff --git a/test/ubench/event_setup.cpp b/test/ubench/event_setup.cpp
index 9931e8bac0a220c834448fff74ea65218a37b452..f9677b4de9f2f346e536bd33588a139105d07eb6 100644
--- a/test/ubench/event_setup.cpp
+++ b/test/ubench/event_setup.cpp
@@ -10,14 +10,15 @@
 // TODO: The staged_events output is a vector of spike_event, not
 // a deliverable event.
 
+#include <algorithm>
 #include <random>
 #include <vector>
 
-#include <event_queue.hpp>
-#include <backends/event.hpp>
-
 #include <benchmark/benchmark.h>
 
+#include "event_queue.hpp"
+#include "backends/event.hpp"
+
 using namespace arb;
 
 std::vector<spike_event> generate_inputs(size_t ncells, size_t ev_per_cell) {
diff --git a/test/ubench/mech_vec.cpp b/test/ubench/mech_vec.cpp
index 6f61d726014ae21df60c556ad4f897e2c822f22f..32e596974c243902c6845ea1a6e0ec7df0e14325 100644
--- a/test/ubench/mech_vec.cpp
+++ b/test/ubench/mech_vec.cpp
@@ -6,9 +6,9 @@
 
 #include <arbor/mc_cell.hpp>
 
-#include <backends/multicore/fvm.hpp>
-#include <benchmark/benchmark.h>
-#include <fvm_lowered_cell_impl.hpp>
+#include "backends/multicore/fvm.hpp"
+#include "benchmark/benchmark.h"
+#include "fvm_lowered_cell_impl.hpp"
 
 using namespace arb;
 
diff --git a/test/unit-distributed/CMakeLists.txt b/test/unit-distributed/CMakeLists.txt
index fdd9b77f8d4b3c6e78082a507f4bb4df123185c3..6e5030f05dd3eaa19f97d8c15519063bce4bfa88 100644
--- a/test/unit-distributed/CMakeLists.txt
+++ b/test/unit-distributed/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(unit-distributed_sources
     distributed_listener.cpp
     test_domain_decomposition.cpp
-    test_exporter_spike_file.cpp
     test_communicator.cpp
     test_mpi.cpp
 
diff --git a/test/unit-distributed/test.cpp b/test/unit-distributed/test.cpp
index 152afa347fc8278667473e3f1a3adefbbded1ded..f5be0bdd5e72c38735e53858dbd8f0a09030902f 100644
--- a/test/unit-distributed/test.cpp
+++ b/test/unit-distributed/test.cpp
@@ -7,16 +7,14 @@
 
 #include <arbor/distributed_context.hpp>
 
-#include <tinyopt.hpp>
-#include <communication/communicator.hpp>
-#include <util/ioutil.hpp>
-
-#include "distributed_listener.hpp"
-
+#include <aux/ioutil.hpp>
+#include <aux/tinyopt.hpp>
 #ifdef TEST_MPI
-#include "with_mpi.hpp"
+#include <aux/with_mpi.hpp>
 #endif
 
+#include "distributed_listener.hpp"
+
 using namespace arb;
 
 distributed_context g_context;
@@ -28,9 +26,6 @@ const char* usage_str =
 "  -h, --help          Display usage information and exit\n";
 
 int main(int argc, char **argv) {
-    // We need to set the communicator policy at the top level
-    // this allows us to build multiple communicators in the tests
-
 #ifdef TEST_MPI
     with_mpi guard(argc, argv, false);
     g_context = mpi_context(MPI_COMM_WORLD);
diff --git a/test/unit-distributed/test_communicator.cpp b/test/unit-distributed/test_communicator.cpp
index 3c3daf176c6de3a1aacfe0bf2a902d79152f0a22..6e2907d2464f2af9bd05fbeacde0ad3bd32d61ac 100644
--- a/test/unit-distributed/test_communicator.cpp
+++ b/test/unit-distributed/test_communicator.cpp
@@ -5,11 +5,11 @@
 #include <vector>
 
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/spike_event.hpp>
 
 #include "communication/communicator.hpp"
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
 #include "util/filter.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
@@ -371,7 +371,7 @@ TEST(communicator, ring)
     auto R = ring_recipe(n_global);
     // use a node decomposition that reflects the resources available
     // on the node that the test is running on, including gpus.
-    const auto D = partition_load_balance(R, hw::node_info(), &g_context);
+    const auto D = partition_load_balance(R, local_allocation(), &g_context);
     auto C = communicator(R, D, &g_context);
 
     // every cell fires
@@ -466,7 +466,7 @@ TEST(communicator, all2all)
     auto R = all2all_recipe(n_global);
     // use a node decomposition that reflects the resources available
     // on the node that the test is running on, including gpus.
-    const auto D = partition_load_balance(R, hw::node_info(), &g_context);
+    const auto D = partition_load_balance(R, local_allocation(), &g_context);
     auto C = communicator(R, D, &g_context);
 
     // every cell fires
diff --git a/test/unit-distributed/test_domain_decomposition.cpp b/test/unit-distributed/test_domain_decomposition.cpp
index 685cb75e981f2f9a03c58ce505372b2ed2d46b8e..bade370f688a74f66fe3b1eef4f1a23c7cb42d5f 100644
--- a/test/unit-distributed/test_domain_decomposition.cpp
+++ b/test/unit-distributed/test_domain_decomposition.cpp
@@ -8,10 +8,10 @@
 #include <vector>
 
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 
-#include <communication/communicator.hpp>
-#include <hardware/node_info.hpp>
-#include <load_balance.hpp>
+#include "util/span.hpp"
 
 #include "../simple_recipes.hpp"
 #include "test.hpp"
@@ -72,7 +72,7 @@ TEST(domain_decomposition, homogeneous_population) {
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         // 10 cells per domain
         unsigned n_local = 10;
@@ -103,7 +103,7 @@ TEST(domain_decomposition, homogeneous_population) {
     }
     {   // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that all cells will be placed on gpu in a single group.
-        hw::node_info nd(1, 1);
+        proc_allocation nd{1, 1};
 
         // 10 cells per domain
         unsigned n_local = 10;
@@ -141,7 +141,7 @@ TEST(domain_decomposition, heterogeneous_population) {
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         // 10 cells per domain
         const unsigned n_local = 10;
diff --git a/test/unit-distributed/test_exporter_spike_file.cpp b/test/unit-distributed/test_exporter_spike_file.cpp
deleted file mode 100644
index 00d26e5788e0aba5b98a875cf0592e3e5b4edcac..0000000000000000000000000000000000000000
--- a/test/unit-distributed/test_exporter_spike_file.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-#include "../gtest.h"
-#include "test.hpp"
-
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include <arbor/distributed_context.hpp>
-#include <arbor/spike.hpp>
-
-#include <communication/communicator.hpp>
-#include <io/exporter_spike_file.hpp>
-
-class exporter_spike_file_fixture : public ::testing::Test {
-protected:
-    using exporter_type = arb::io::exporter_spike_file;
-
-    std::string file_name_;
-    std::string path_;
-    std::string extension_;
-    unsigned index_;
-
-    exporter_spike_file_fixture() :
-        file_name_("spikes_exporter_spike_file_fixture"),
-        path_("./"),
-        extension_("gdf"),
-        index_(g_context.id())
-    {}
-
-    std::string get_standard_file_name() {
-        return exporter_type::create_output_file_path(file_name_, path_, extension_, index_);
-    }
-
-    void SetUp() {
-        // code here will execute just before the test ensues 
-    }
-
-    void TearDown() {
-        // delete the start create file
-        std::remove(get_standard_file_name().c_str());
-    }
-
-    ~exporter_spike_file_fixture()
-    {}
-};
-
-TEST_F(exporter_spike_file_fixture, constructor) {
-    // Create an exporter, and overwrite if neccesary.
-    exporter_type exporter(file_name_, path_, extension_, index_, true);
-
-    // Assert that the output file exists
-    {
-        std::ifstream f(get_standard_file_name());
-        ASSERT_TRUE(f.good());
-    }
-
-    // Create a new exporter with overwrite false. This should throw, because an
-    // outut file with the same name is in use by exporter.
-    try {
-        exporter_type exporter1(file_name_, path_, extension_, index_, false);
-        FAIL() << "expected a file already exists error";
-    }
-    catch (const std::runtime_error& err) {
-        EXPECT_EQ(
-            err.what(),
-            "Tried opening file for writing but it exists and over_write is false: " +
-            get_standard_file_name()
-        );
-    }
-    catch (...) {
-        FAIL() << "expected a file already exists error";
-    }
-}
-
-TEST_F(exporter_spike_file_fixture, create_output_file_path) {
-    // Create some random paths, no need for fancy tests here
-    std::string produced_filename =
-        exporter_type::create_output_file_path("spikes", "./", "gdf", 0);
-    EXPECT_STREQ(produced_filename.c_str(), "./spikes_0.gdf");
-
-    produced_filename =
-        exporter_type::create_output_file_path("a_name", "../../", "txt", 5);
-    EXPECT_STREQ(produced_filename.c_str(), "../../a_name_5.txt");
-}
-
-TEST_F(exporter_spike_file_fixture, do_export) {
-    {
-        exporter_type exporter(file_name_, path_, extension_, g_context.id());
-
-        // Create some spikes
-        std::vector<arb::spike> spikes;
-        spikes.push_back({ { 0, 0 }, 0.0 });
-        spikes.push_back({ { 0, 0 }, 0.1 });
-        spikes.push_back({ { 1, 0 }, 1.0 });
-        spikes.push_back({ { 1, 0 }, 1.1 });
-
-        // now do the export
-        exporter.output(spikes);
-    }
-
-    // Test if we have spikes in the file?
-    std::ifstream f(get_standard_file_name());
-    EXPECT_TRUE(f.good());
-
-    std::string line;
-
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "0 0.0000");
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "0 0.1000");
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "1 1.0000");
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "1 1.1000");
-}
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 887ffe53f8248f9ac32d5925b80640ce7913bcf1..5c2d5e1859e970863d93bafd5716bf818098d281 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -49,7 +49,6 @@ set(unit_sources
     test_mechcat.cpp
     test_merge_events.cpp
     test_multi_event_stream.cpp
-    test_nop.cpp
     test_optional.cpp
     test_mechinfo.cpp
     test_padded.cpp
@@ -67,6 +66,7 @@ set(unit_sources
     test_span.cpp
     test_spikes.cpp
     test_spike_store.cpp
+    test_spike_emitter.cpp
     test_stats.cpp
     test_strprintf.cpp
     test_swcio.cpp
@@ -106,5 +106,5 @@ endif()
 add_executable(unit ${unit_sources})
 target_compile_options(unit PRIVATE ${CXXOPT_ARCH})
 target_compile_definitions(unit PRIVATE "-DDATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}/swc\"")
-target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers)
+target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers arbor-aux)
 
diff --git a/test/unit/test_compartments.cpp b/test/unit/test_compartments.cpp
index 13196d732490a7277d8b2a951d3f9ab37c447cdd..a86ef3e65f0850894e1af0ed7134abcbc29bb4a0 100644
--- a/test/unit/test_compartments.cpp
+++ b/test/unit/test_compartments.cpp
@@ -3,9 +3,10 @@
 
 #include "../gtest.h"
 
+#include <arbor/math.hpp>
+
 #include "algorithms.hpp"
 #include "fvm_compartment.hpp"
-#include "math.hpp"
 #include "util/span.hpp"
 #include "util/transform.hpp"
 
diff --git a/test/unit/test_domain_decomposition.cpp b/test/unit/test_domain_decomposition.cpp
index dc27a4337cff92f565b995890506e4a8495954f3..effb760b0563accc8d88ce60924d211bb977bc1a 100644
--- a/test/unit/test_domain_decomposition.cpp
+++ b/test/unit/test_domain_decomposition.cpp
@@ -4,9 +4,8 @@
 
 #include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
 #include "util/span.hpp"
 
 #include "../simple_recipes.hpp"
@@ -55,7 +54,7 @@ TEST(domain_decomposition, homogenous_population)
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         unsigned num_cells = 10;
         const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), nd, &context);
@@ -81,7 +80,7 @@ TEST(domain_decomposition, homogenous_population)
     }
     {   // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that all cells will be placed on gpu in a single group.
-        hw::node_info nd(1, 1);
+        proc_allocation nd{1, 1};
 
         unsigned num_cells = 10;
         const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), nd, &context);
@@ -115,7 +114,7 @@ TEST(domain_decomposition, heterogenous_population)
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         unsigned num_cells = 10;
         auto R = hetero_recipe(num_cells);
@@ -153,7 +152,7 @@ TEST(domain_decomposition, heterogenous_population)
     {   // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that calble cells are on gpu in a single group, and
         // rff cells are on cpu in cell groups of size 1
-        hw::node_info nd(1, 1);
+        proc_allocation nd{1, 1};
 
         unsigned num_cells = 10;
         auto R = hetero_recipe(num_cells);
@@ -189,3 +188,43 @@ TEST(domain_decomposition, heterogenous_population)
         EXPECT_EQ(num_cells, ncells);
     }
 }
+
+TEST(domain_decomposition, hints) {
+    // Check that we can provide group size hint and gpu/cpu preference
+    // by cell kind.
+
+    distributed_context context;
+
+    partition_hint_map hints;
+    hints[cell_kind::cable1d_neuron].cpu_group_size = 3;
+    hints[cell_kind::cable1d_neuron].prefer_gpu = false;
+    hints[cell_kind::spike_source].cpu_group_size = 4;
+
+    domain_decomposition D = partition_load_balance(
+        hetero_recipe(20),
+        proc_allocation{16, 1}, // 16 threads, 1 gpu.
+        &context,
+        hints);
+
+    std::vector<std::vector<cell_gid_type>> expected_c1d_groups =
+        {{0, 2, 4}, {6, 8, 10}, {12, 14, 16}, {18}};
+
+    std::vector<std::vector<cell_gid_type>> expected_ss_groups =
+        {{1, 3, 5, 7}, {9, 11, 13, 15}, {17, 19}};
+
+    std::vector<std::vector<cell_gid_type>> c1d_groups, ss_groups;
+
+    for (auto& g: D.groups) {
+        EXPECT_TRUE(g.kind==cell_kind::cable1d_neuron || g.kind==cell_kind::spike_source);
+
+        if (g.kind==cell_kind::cable1d_neuron) {
+            c1d_groups.push_back(g.gids);
+        }
+        else if (g.kind==cell_kind::spike_source) {
+            ss_groups.push_back(g.gids);
+        }
+    }
+
+    EXPECT_EQ(expected_c1d_groups, c1d_groups);
+    EXPECT_EQ(expected_ss_groups, ss_groups);
+}
diff --git a/test/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp
index 8e3e311dda2e6b5f28bbee096f8ea0d6fd477636..d81e37457c03359ad5cd0137bd81eeb7aa3c74c3 100644
--- a/test/unit/test_fvm_layout.cpp
+++ b/test/unit/test_fvm_layout.cpp
@@ -3,10 +3,10 @@
 
 #include <arbor/util/optional.hpp>
 #include <arbor/mechcat.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mc_cell.hpp>
 
 #include "fvm_layout.hpp"
-#include "math.hpp"
 #include "util/maputil.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index 0bb0461a6d875dc10ab510e885b1c8c50f590e3a..f81069f60fd03991318613543b731876c94f1b3b 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -5,7 +5,10 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/load_balance.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/mc_segment.hpp>
 #include <arbor/recipe.hpp>
@@ -18,8 +21,6 @@
 #include "backends/multicore/mechanism.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "fvm_lowered_cell_impl.hpp"
-#include "load_balance.hpp"
-#include "math.hpp"
 #include "sampler_map.hpp"
 #include "util/meta.hpp"
 #include "util/maputil.hpp"
@@ -328,7 +329,7 @@ TEST(fvm_lowered, derived_mechs) {
         float times[] = {10.f, 20.f};
 
         distributed_context context;
-        auto decomp = partition_load_balance(rec, hw::node_info{1u, 0u}, &context);
+        auto decomp = partition_load_balance(rec, proc_allocation{1, 0}, &context);
         simulation sim(rec, decomp, &context);
         sim.add_sampler(all_probes, explicit_schedule(times), sampler);
         sim.run(30.0, 1.f/1024);
diff --git a/test/unit/test_lif_cell_group.cpp b/test/unit/test_lif_cell_group.cpp
index c2753e520245563707ac0ae0c92aadac0de73442..450226b08b52422fe74042f26ab556587c7fde9f 100644
--- a/test/unit/test_lif_cell_group.cpp
+++ b/test/unit/test_lif_cell_group.cpp
@@ -1,17 +1,15 @@
 #include "../gtest.h"
 
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/lif_cell.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/threadinfo.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/spike_source_cell.hpp>
 
-#include "cell_group_factory.hpp"
-#include "hardware/node_info.hpp"
 #include "lif_cell_group.hpp"
-#include "load_balance.hpp"
-#include "threading/threading.hpp"
 
 using namespace arb;
 // Simple ring network of LIF neurons.
@@ -154,13 +152,11 @@ TEST(lif_cell_group, recipe)
 }
 
 TEST(lif_cell_group, spikes) {
-    distributed_context context;
-
     // make two lif cells
     path_recipe recipe(2, 1000, 0.1);
 
-    hw::node_info nd;
-    nd.num_cpu_cores = arb::num_threads();
+    distributed_context context;
+    proc_allocation nd = local_allocation();
 
     auto decomp = partition_load_balance(recipe, nd, &context);
     simulation sim(recipe, decomp, &context);
@@ -195,13 +191,12 @@ TEST(lif_cell_group, ring)
     double weight = 1000;
     double delay = 1;
 
-    hw::node_info nd;
-    nd.num_cpu_cores = threading::num_threads();
-
     // Total simulation time.
     time_type simulation_time = 100;
 
     distributed_context context;
+    proc_allocation nd = local_allocation();
+
     auto recipe = ring_recipe(num_lif_cells, weight, delay);
     auto decomp = partition_load_balance(recipe, nd, &context);
 
diff --git a/test/unit/test_mask_stream.cpp b/test/unit/test_mask_stream.cpp
index a14de4d361435fc490cfe416faad4101c79f6c6d..bdc7cd8d46f5d3e179589ee580f55bf198410a49 100644
--- a/test/unit/test_mask_stream.cpp
+++ b/test/unit/test_mask_stream.cpp
@@ -1,10 +1,10 @@
-#include "../gtest.h"
-
 #include <sstream>
 
-#include <util/ioutil.hpp>
+#include "../gtest.h"
+
+#include <aux/ioutil.hpp>
 
-using namespace arb::util;
+using aux::mask_stream;
 
 TEST(mask_stream,nomask) {
     // expect mask_stream(true) on a new stream not to change rdbuf.
diff --git a/test/unit/test_math.cpp b/test/unit/test_math.cpp
index 7d0d777a80a915b37093d7ec67b556cdd88fb072..5c6c369587b5f7de90d19b391d330d959738c90f 100644
--- a/test/unit/test_math.cpp
+++ b/test/unit/test_math.cpp
@@ -3,7 +3,7 @@
 
 #include "../gtest.h"
 
-#include "math.hpp"
+#include <arbor/math.hpp>
 
 using namespace arb::math;
 
@@ -377,31 +377,3 @@ TEST(math, exprelr) {
     }
 }
 
-TEST(math, minmax) {
-    constexpr double inf = std::numeric_limits<double>::infinity();
-
-    struct X {
-        double lhs;
-        double rhs;
-        double expected_min;
-        double expected_max;
-    };
-
-    std::vector<X> inputs = {
-        {  0,    1,    0,   1},
-        { -1,    1,   -1,   1},
-        { 42,   42,   42,  42},
-        {inf, -inf, -inf, inf},
-        {  0,  inf,    0, inf},
-        {  0, -inf, -inf,   0},
-    };
-
-    for (auto x: inputs) {
-        // Call min and max with arguments in both possible orders.
-        EXPECT_EQ(min(x.lhs, x.rhs), x.expected_min);
-        EXPECT_EQ(min(x.rhs, x.lhs), x.expected_min);
-        EXPECT_EQ(max(x.lhs, x.rhs), x.expected_max);
-        EXPECT_EQ(max(x.rhs, x.lhs), x.expected_max);
-    }
-}
-
diff --git a/test/unit/test_matrix.cpp b/test/unit/test_matrix.cpp
index aa5d17e75eb648fbb032921f505e155fbd0d80b2..c977f88b9870c5ff419aafc7dd30821fabcb6693 100644
--- a/test/unit/test_matrix.cpp
+++ b/test/unit/test_matrix.cpp
@@ -3,11 +3,12 @@
 
 #include "../gtest.h"
 
-#include <math.hpp>
-#include <matrix.hpp>
-#include <backends/multicore/fvm.hpp>
-#include <util/rangeutil.hpp>
-#include <util/span.hpp>
+#include <arbor/math.hpp>
+
+#include "matrix.hpp"
+#include "backends/multicore/fvm.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
 
 #include "common.hpp"
 
diff --git a/test/unit/test_matrix.cu b/test/unit/test_matrix.cu
index a24863ac557a05b81058de3aea7515be572bd004..b01fff9228b415afb73ccc0f33bf8fada0a7296c 100644
--- a/test/unit/test_matrix.cu
+++ b/test/unit/test_matrix.cu
@@ -2,21 +2,23 @@
 #include <random>
 #include <vector>
 
-#include "../gtest.h"
-#include "common.hpp"
+#include <cuda.h>
 
-#include <algorithms.hpp>
-#include <math.hpp>
-#include <matrix.hpp>
-#include <memory/memory.hpp>
-#include <util/span.hpp>
+#include <arbor/math.hpp>
 
-#include <backends/gpu/cuda_common.hpp>
-#include <backends/gpu/matrix_state_flat.hpp>
-#include <backends/gpu/matrix_state_interleaved.hpp>
-#include <backends/gpu/matrix_interleave.hpp>
+#include "algorithms.hpp"
+#include "matrix.hpp"
+#include "memory/memory.hpp"
+#include "util/span.hpp"
+
+#include "backends/gpu/cuda_common.hpp"
+#include "backends/gpu/matrix_state_flat.hpp"
+#include "backends/gpu/matrix_state_interleaved.hpp"
+#include "backends/gpu/matrix_interleave.hpp"
+
+#include "../gtest.h"
+#include "common.hpp"
 
-#include <cuda.h>
 
 using namespace arb;
 
diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
index 4b4959bee0470c165efe7419afe1d8dcc4167f22..273906860fc28b35a16bfa0993d5c49aeb42da5e 100644
--- a/test/unit/test_matrix_cpuvsgpu.cpp
+++ b/test/unit/test_matrix_cpuvsgpu.cpp
@@ -2,17 +2,19 @@
 #include <random>
 #include <vector>
 
+#include <arbor/math.hpp>
+
+#include "algorithms.hpp"
+#include "matrix.hpp"
+#include "memory/memory.hpp"
+#include "util/span.hpp"
+
+#include "backends/gpu/fvm.hpp"
+#include "backends/multicore/fvm.hpp"
+
 #include "../gtest.h"
 #include "common.hpp"
 
-#include <algorithms.hpp>
-#include <math.hpp>
-#include <matrix.hpp>
-#include <memory/memory.hpp>
-#include <util/span.hpp>
-
-#include <backends/gpu/fvm.hpp>
-#include <backends/multicore/fvm.hpp>
 
 using namespace arb;
 
diff --git a/test/unit/test_mc_cell.cpp b/test/unit/test_mc_cell.cpp
index 67858182fe0b2086cd8dadbbab994e42ce69081a..683f133e43fb599d2cb6fc092ccee4994343f3ee 100644
--- a/test/unit/test_mc_cell.cpp
+++ b/test/unit/test_mc_cell.cpp
@@ -1,8 +1,8 @@
 #include "../gtest.h"
 
 #include <arbor/mc_cell.hpp>
+#include <arbor/math.hpp>
 
-#include "math.hpp"
 #include "tree.hpp"
 
 using namespace arb;
diff --git a/test/unit/test_merge_events.cpp b/test/unit/test_merge_events.cpp
index 03c58a6a57adba4fb20c6b513a843467df2bfce0..b906c02ea611fc3087cfdffa04ad6f5a253b9ccd 100644
--- a/test/unit/test_merge_events.cpp
+++ b/test/unit/test_merge_events.cpp
@@ -3,6 +3,8 @@
 #include <event_queue.hpp>
 #include <merge_events.hpp>
 
+#include "util/rangeutil.hpp"
+
 using namespace arb;
 
 std::vector<event_generator> empty_gens;
diff --git a/test/unit/test_nop.cpp b/test/unit/test_nop.cpp
deleted file mode 100644
index 63a8857f0fb085bcc6a0c534d3c6a7fa562c532d..0000000000000000000000000000000000000000
--- a/test/unit/test_nop.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#include "../gtest.h"
-#include "util/nop.hpp"
-
-using namespace arb::util;
-
-TEST(nop, void_fn) {
-    std::function<void ()> f(nop_function);
-
-    EXPECT_TRUE(f);
-    f(); // should do nothing
-
-    bool flag = false;
-    f = [&]() { flag = true; };
-    f();
-    EXPECT_TRUE(flag);
-
-    flag = false;
-    f = nop_function;
-    f();
-    EXPECT_FALSE(flag);
-
-    // with some arguments
-    std::function<void (int, int)> g(nop_function);
-    EXPECT_TRUE(g);
-    g(2, 3); // should do nothing
-
-    int sum = 0;
-    g = [&](int a, int b) { sum = a+b; };
-    g(2, 3);
-    EXPECT_EQ(5, sum);
-
-    sum = 0;
-    g = nop_function;
-    g(2, 3);
-    EXPECT_EQ(0, sum);
-}
-
-struct check_default {
-    int value = 100;
-
-    check_default() = default;
-    explicit check_default(int n): value(n) {}
-};
-
-TEST(nop, default_return_fn) {
-    std::function<check_default ()> f(nop_function);
-
-    EXPECT_TRUE(f);
-    auto result = f();
-    EXPECT_EQ(result.value, 100);
-
-    f = []() { return check_default(17); };
-    result = f();
-    EXPECT_EQ(result.value, 17);
-
-    f = nop_function;
-    result = f();
-    EXPECT_EQ(result.value, 100);
-
-    std::function<check_default (double, double)> g(nop_function);
-
-    EXPECT_TRUE(g);
-    result = g(1.4, 1.5);
-    EXPECT_EQ(result.value, 100);
-
-    g = [](double x, double y) { return check_default{(int)(x*y)}; };
-    result = g(1.4, 1.5);
-    EXPECT_EQ(result.value, 2);
-
-    g = nop_function;
-    result = g(1.4, 1.5);
-    EXPECT_EQ(result.value, 100);
-
-}
-
diff --git a/test/unit/test_partition.cpp b/test/unit/test_partition.cpp
index e2f5e5d9e757170bba38938238cec79b7e4d504a..d8c160c498eecad5197b2a10f002c583afe33a3e 100644
--- a/test/unit/test_partition.cpp
+++ b/test/unit/test_partition.cpp
@@ -7,8 +7,7 @@
 
 #include <arbor/assert.hpp>
 
-#include <util/nop.hpp>
-#include <util/partition.hpp>
+#include "util/partition.hpp"
 
 using namespace arb;
 
diff --git a/test/unit/test_path.cpp b/test/unit/test_path.cpp
index 4fc98e58827dddedb643146ef9c9b385f243eacd..d359dc2e5b9c4be6c1363348a88045419d62df24 100644
--- a/test/unit/test_path.cpp
+++ b/test/unit/test_path.cpp
@@ -5,9 +5,9 @@
 #include <string>
 #include <vector>
 
-#include <util/path.hpp>
+#include <aux/path.hpp>
 
-using namespace arb::util;
+using namespace aux;
 
 TEST(path, posix_ctor) {
     // test constructor ans assignment overloads with sample character sequences.
@@ -41,14 +41,12 @@ TEST(path, posix_ctor) {
     EXPECT_EQ(str_cs, (p=p2).native());
     EXPECT_EQ(str_cs, (p=cs).native());
     EXPECT_EQ(str_cs, (p=str_cs).native());
-    EXPECT_EQ(str_cs, (p=vec_cs).native());
     EXPECT_EQ(str_cs, (p=std::move(p7)).native());
 
     // test assign overloads (and ref return values)
     EXPECT_EQ(str_cs, p.assign(p2).native());
     EXPECT_EQ(str_cs, p.assign(cs).native());
     EXPECT_EQ(str_cs, p.assign(str_cs).native());
-    EXPECT_EQ(str_cs, p.assign(vec_cs).native());
     EXPECT_EQ(str_cs, p.assign(vec_cs.begin(), vec_cs.end()).native());
 }
 
@@ -318,7 +316,7 @@ TEST(path, posix_status_perms) {
     perms expected = perms::owner_read|perms::owner_write|perms::group_read|perms::group_write|perms::others_read|perms::others_write;
     EXPECT_EQ(expected, null_perm);
 
-    // Expect / to be have exec flag set for everyonr
+    // Expect / to be have exec flag set for everyone
     perms root_perm = status("/").permissions();
     EXPECT_NE(perms::none, root_perm&perms::owner_exec);
     EXPECT_NE(perms::none, root_perm&perms::group_exec);
diff --git a/test/unit/test_segment.cpp b/test/unit/test_segment.cpp
index de33809dde89a6c35a53d9e6fbd1b4e97df78dae..128f629bb83e8ce3395e5d98c53947539f621fa1 100644
--- a/test/unit/test_segment.cpp
+++ b/test/unit/test_segment.cpp
@@ -2,10 +2,9 @@
 
 #include "../gtest.h"
 
+#include <arbor/math.hpp>
 #include <arbor/mc_segment.hpp>
 
-#include "math.hpp"
-
 using namespace arb;
 
 TEST(mc_segment, kinfs) {
diff --git a/test/unit/test_spike_emitter.cpp b/test/unit/test_spike_emitter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..60af2a92cbeb3a1a75085999348453147c8f24ee
--- /dev/null
+++ b/test/unit/test_spike_emitter.cpp
@@ -0,0 +1,30 @@
+#include "../gtest.h"
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <arbor/spike.hpp>
+#include <aux/spike_emitter.hpp>
+
+TEST(spike_emitter, formatting) {
+    std::stringstream out;
+    auto callback = aux::spike_emitter(out);
+
+    std::vector<arb::spike> spikes = {
+        { { 0, 0 }, 0.0 },
+        { { 0, 0 }, 0.1 },
+        { { 1, 0 }, 1.0 },
+        { { 1, 0 }, 1.1 }
+    };
+
+    callback(spikes);
+
+    std::string expected =
+        "0 0.0000\n"
+        "0 0.1000\n"
+        "1 1.0000\n"
+        "1 1.1000\n";
+
+    EXPECT_EQ(expected, out.str());
+}
diff --git a/test/unit/test_swcio.cpp b/test/unit/test_swcio.cpp
index 85ad98d6246d124607db2c349e98eb4d1e8f2e3b..172439e6707f82322240f73ec77c11b660bace7b 100644
--- a/test/unit/test_swcio.cpp
+++ b/test/unit/test_swcio.cpp
@@ -9,10 +9,10 @@
 
 #include <arbor/mc_cell.hpp>
 #include <arbor/morphology.hpp>
+#include <arbor/swcio.hpp>
 
 #include "../gtest.h"
 
-#include "swcio.hpp"
 
 // Path to data directory can be overriden at compile time.
 #if !defined(DATADIR)
@@ -22,8 +22,8 @@
 using namespace arb;
 
 // SWC tests
-void expect_record_equals(const io::swc_record& expected,
-                          const io::swc_record& actual)
+void expect_record_equals(const swc_record& expected,
+                          const swc_record& actual)
 {
     EXPECT_EQ(expected.id, actual.id);
     EXPECT_EQ(expected.type, actual.type);
@@ -36,8 +36,6 @@ void expect_record_equals(const io::swc_record& expected,
 
 TEST(swc_record, construction)
 {
-    using namespace arb::io;
-
     {
         // force an invalid type
         swc_record::kind invalid_type = static_cast<swc_record::kind>(100);
@@ -105,8 +103,6 @@ TEST(swc_record, construction)
 
 TEST(swc_parser, invalid_input_istream)
 {
-    using namespace arb::io;
-
     {
         // check incomplete lines; missing parent
         std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830\n");
@@ -127,8 +123,6 @@ TEST(swc_parser, invalid_input_istream)
 
 TEST(swc_parser, invalid_input_parse)
 {
-    using namespace arb::io;
-
     {
         // check incomplete lines; missing parent
         std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830\n");
@@ -168,8 +162,6 @@ TEST(swc_parser, invalid_input_parse)
 
 TEST(swc_parser, valid_input)
 {
-    using namespace arb::io;
-
     {
         // check empty file; no record may be parsed
         swc_record record, record_orig;
@@ -304,8 +296,6 @@ TEST(swc_parser, valid_input)
 
 TEST(swc_parser, from_allen_db)
 {
-    using namespace arb::io;
-
     std::string datadir{DATADIR};
     auto fname = datadir + "/example.swc";
     std::ifstream fid(fname);
@@ -323,8 +313,6 @@ TEST(swc_parser, from_allen_db)
 
 TEST(swc_parser, input_cleaning)
 {
-    using namespace arb::io;
-
     {
         // Check duplicates
         std::stringstream is;
@@ -391,8 +379,6 @@ TEST(swc_parser, input_cleaning)
 
 TEST(swc_parser, raw)
 {
-    using namespace arb::io;
-
     {
         // Check valid usage
         std::stringstream is;
@@ -469,7 +455,7 @@ TEST(swc_io, cell_construction) {
     };
 
     // swc -> morphology
-    auto morph = io::swc_as_morphology(io::parse_swc_file(is));
+    auto morph = swc_as_morphology(parse_swc_file(is));
 
     mc_cell cell = make_mc_cell(morph, true);
     EXPECT_TRUE(cell.has_soma());
@@ -561,7 +547,7 @@ TEST(swc_parser, from_file_ball_and_stick) {
     }
 
     // read the file as morhpology
-    auto bas_morph = io::swc_as_morphology(io::parse_swc_file(fid));
+    auto bas_morph = swc_as_morphology(parse_swc_file(fid));
 
     // compare with expected morphology
     morphology expected;
diff --git a/test/validation/CMakeLists.txt b/test/validation/CMakeLists.txt
index 740ce9ad33d62dc1ac5255f25744bad19725acaa..8826d8e611e952f3ddae3fe154308c8fc1e18386 100644
--- a/test/validation/CMakeLists.txt
+++ b/test/validation/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(validation_sources
     # unit tests
     validate_ball_and_stick.cpp
-    validate_compartment_policy.cpp
     validate_soma.cpp
     validate_kinetic.cpp
     validate_synapses.cpp
@@ -17,7 +16,6 @@ set(validation_sources
 add_executable(validate ${validation_sources})
 target_compile_definitions(validate PRIVATE "ARB_DATADIR=\"${ARB_VALIDATION_DATA_DIR}\"")
 target_link_libraries(validate PRIVATE gtest arbor arbor-aux ext-json)
-target_link_libraries(validate PRIVATE arbor-private-headers) # temporary
 
 if(ARB_BUILD_VALIDATION_DATA)
     add_dependencies(validate validation_data)
diff --git a/test/validation/convergence_test.hpp b/test/validation/convergence_test.hpp
index 459e788f85efabb4f438092ebc5869d87e1206b5..25742cdb762f58d2e18524428dd27d52c027e060 100644
--- a/test/validation/convergence_test.hpp
+++ b/test/validation/convergence_test.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <iterator>
 #include <vector>
 
 #include <nlohmann/json.hpp>
@@ -8,9 +9,7 @@
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/schedule.hpp>
-
-#include "util/filter.hpp"
-#include "util/rangeutil.hpp"
+#include <aux/path.hpp>
 
 #include "../gtest.h"
 
@@ -53,19 +52,27 @@ public:
         run_validation_(false),
         meta_(meta)
     {
-        util::assign(probe_labels_, probe_labels);
+        using std::begin;
+        using std::end;
+
+        probe_labels_.assign(begin(probe_labels), end(probe_labels));
     }
 
     // Allow free access to JSON meta data attached to saved traces.
     nlohmann::json& metadata() { return meta_; }
 
-    void load_reference_data(const util::path& ref_path) {
+    void load_reference_data(const aux::path& ref_path) {
         run_validation_ = false;
         try {
             ref_data_ = g_trace_io.load_traces(ref_path);
 
-            run_validation_ = util::all_of(probe_labels_,
-                [&](const probe_label& pl) { return ref_data_.count(pl.label)>0; });
+            run_validation_ = true;
+            for (const auto& pl: probe_labels_) {
+                if (!(ref_data_.count(pl.label)>0)) {
+                    run_validation_ = false;
+                    break;
+                }
+            }
 
             EXPECT_TRUE(run_validation_);
         }
@@ -123,16 +130,27 @@ public:
     void report() {
         if (run_validation_ && g_trace_io.verbose()) {
             // reorder to group by id
-            util::stable_sort_by(conv_tbl_, [](const conv_entry<Param>& e) { return e.id; });
+            std::stable_sort(conv_tbl_.begin(), conv_tbl_.end(),
+                [](const auto& a, const auto& b) { return a.id<b.id; });
+
             report_conv_table(std::cout, conv_tbl_, param_name_);
         }
     }
 
     void assert_all_convergence() const {
+        std::vector<conv_entry<Param>> with_label;
+
         for (const auto& pl: probe_labels_) {
             SCOPED_TRACE(pl.label);
-            assert_convergence(util::filter(conv_tbl_,
-                        [&](const conv_entry<Param>& e) { return e.id==pl.label; }));
+
+            with_label.clear();
+            for (const auto& e: conv_tbl_) {
+                if (e.id==pl.label) {
+                    with_label.push_back(e);
+                }
+            }
+
+            assert_convergence(with_label);
         }
     }
 };
@@ -151,7 +169,7 @@ inline std::vector<float> stimulus_ends(const mc_cell& c) {
         ts.push_back(t1);
     }
 
-    util::sort(ts);
+    std::sort(ts.begin(), ts.end());
     return ts;
 }
 
diff --git a/test/validation/interpolate.hpp b/test/validation/interpolate.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..54d5da0a7c7e30822b682dca2a2d5b5dbdb6381c
--- /dev/null
+++ b/test/validation/interpolate.hpp
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <cmath>
+
+template <typename T, typename U>
+inline T lerp(T a, T b, U u) {
+    return std::fma(u, b, std::fma(-u, a, a));
+}
+
+// Piece-wise linear interpolation across a sequence of points (u_i, x_i),
+// monotonically increasing in u.
+// 
+// Parameters get_u and get_x provide the accessors for the point sequence;
+// consider moving to structured bindings in C++17 instead.
+
+template <typename U, typename Seq, typename GetU, typename GetX>
+auto pw_linear_interpolate(U u, const Seq& seq, GetU get_u, GetX get_x) {
+    using std::begin;
+    using std::end;
+    using value_type = decltype(get_x(*begin(seq)));
+
+    auto i = begin(seq);
+    auto e = end(seq);
+
+    if (i==e) {
+        return value_type(NAN);
+    }
+
+    auto u0 = get_u(*i);
+    auto x0 = get_x(*i);
+
+    if (u<u0) {
+        return x0;
+    }
+
+    while (++i!=e) {
+        auto u1 = get_u(*i);
+        auto x1 = get_x(*i);
+
+        if (u<u1) {
+            return lerp(x0, x1, (u-u0)/(u1-u0));
+        }
+
+        u0 = u1;
+        x0 = x1;
+    }
+
+    return x0;
+}
+
diff --git a/test/validation/trace_analysis.cpp b/test/validation/trace_analysis.cpp
index 6a9cd813b7040de88486aaa48f64d717d89ffc7d..ccf71a4284842d46bf70cf553800c4390b389bc2 100644
--- a/test/validation/trace_analysis.cpp
+++ b/test/validation/trace_analysis.cpp
@@ -6,13 +6,11 @@
 
 #include "../gtest.h"
 
-#include <arbor/util/optional.hpp>
+#include <arbor/math.hpp>
 #include <arbor/simple_sampler.hpp>
+#include <arbor/util/optional.hpp>
 
-#include "math.hpp"
-#include "util/partition.hpp"
-#include "util/rangeutil.hpp"
-
+#include "interpolate.hpp"
 #include "trace_analysis.hpp"
 
 namespace arb {
@@ -21,20 +19,9 @@ struct trace_interpolant {
     trace_interpolant(const trace_data<double>& trace): trace_(trace) {}
 
     double operator()(float t) const {
-        if (trace_.empty()) return std::nan("");
-
-        auto tx = times(trace_);
-        auto vx = values(trace_);
-
-        // special case for end points
-        if (t<tx.front()) return vx.front();
-        if (t>=tx.back()) return vx.back();
-
-        auto part = util::partition_view(tx);
-        auto i = part.index(t);
-        arb_assert(i != part.npos);
-        auto p = part[i];
-        return math::lerp(vx[i], vx[i+1], (t-p.first)/(p.second-p.first));
+        return pw_linear_interpolate(t, trace_,
+            [](auto& entry) { return entry.t; },
+            [](auto& entry) { return entry.v; });
     }
 
     const trace_data<double>& trace_;
@@ -43,9 +30,12 @@ struct trace_interpolant {
 double linf_distance(const trace_data<double>& u, const trace_data<double>& r) {
     trace_interpolant f{r};
 
-    return util::max_value(
-            util::transform_view(u,
-                [&](trace_entry<double> x) { return std::abs(x.v-f(x.t)); }));
+    double linf = 0;
+    for (auto entry: u) {
+        linf = std::max(linf, std::abs(entry.v-f(entry.t)));
+    }
+
+    return linf;
 }
 
 // Compute linf distance as above, but excluding sample points that lie
@@ -72,14 +62,14 @@ double linf_distance(const trace_data<double>& u, const trace_data<double>& ref,
         // include points up to and including uj-2, and then proceed from point uj+1,
         // excluding the two points closest to the discontinuity.
 
-        if (uj>1+ui) {
-            util::append(reduced, util::subrange_view(u, ui, uj-1));
+        for (unsigned k = ui; k+1<uj; ++k) {
+            reduced.push_back(u[k]);
         }
         ui = uj+1;
     }
 
-    if (ui<nu) {
-        util::append(reduced, util::subrange_view(u, ui, nu));
+    for (auto k = ui; k<nu; ++k) {
+        reduced.push_back(u[k]);
     }
 
     return linf_distance(reduced, ref);
@@ -89,21 +79,18 @@ std::vector<trace_peak> local_maxima(const trace_data<double>& u) {
     std::vector<trace_peak> peaks;
     if (u.size()<2) return peaks;
 
-    auto tx = times(u);
-    auto vx = values(u);
-
-    int s_prev = math::signum(vx[1]-vx[0]);
+    int s_prev = math::signum(u[1].v-u[0].v);
     std::size_t i_start = 0;
 
     for (std::size_t i = 2; i<u.size()-1; ++i) {
-        int s = math::signum(vx[i]-vx[i-1]);
+        int s = math::signum(u[i].v-u[i-1].v);
         if (s_prev==1 && s==-1) {
             // found peak between i_start and i,
             // observerd peak value at i-1.
-            float t0 = tx[i_start];
-            float t1 = tx[i];
+            float t0 = u[i_start].t;
+            float t1 = u[i].t;
 
-            peaks.push_back({(t0+t1)/2, vx[i-1], (t1-t0)/2});
+            peaks.push_back({(t0+t1)/2, u[i-1].v, (t1-t0)/2});
         }
 
         if (s!=0) {
diff --git a/test/validation/trace_analysis.hpp b/test/validation/trace_analysis.hpp
index e6ff032d74fc30fef9bac49ee594ea7ab26c678f..92b8d04811f5666bd7ec5d932b0b7544944b3545 100644
--- a/test/validation/trace_analysis.hpp
+++ b/test/validation/trace_analysis.hpp
@@ -8,24 +8,12 @@
 #include <arbor/simple_sampler.hpp>
 #include <arbor/util/optional.hpp>
 
-#include "util/rangeutil.hpp"
+#include "util.hpp"
 
 namespace arb {
 
 /* Trace data comparison */
 
-// Extract time or value data from trace.
-
-template <typename V>
-inline auto times(const trace_data<V>& trace) {
-   return util::transform_view(trace, [](auto& x) { return x.t; });
-}
-
-template <typename V>
-inline auto values(const trace_data<V>& trace) {
-   return util::transform_view(trace, [](auto& x) { return x.v; });
-}
-
 // Compute max |v_i - f(t_i)| where (t, v) is the 
 // first trace `u` and f is the piece-wise linear interpolant
 // of the second trace `r`.
@@ -77,7 +65,7 @@ using conv_data = std::vector<conv_entry<Param>>;
 
 template <typename ConvEntrySeq>
 void assert_convergence(const ConvEntrySeq& cs) {
-    if (util::empty(cs)) return;
+    if (size(cs)==0) return;
 
     auto tbound = [](trace_peak p) { return std::abs(p.t)+p.t_err; };
     float peak_dt_bound = INFINITY;
diff --git a/test/validation/util.hpp b/test/validation/util.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..af02639276c7c58ec8ec0ed7215efd8455c28d78
--- /dev/null
+++ b/test/validation/util.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+// Simple helper utilities for validation tests.
+
+#include <sstream>
+#include <string>
+
+#include <arbor/common_types.hpp>
+
+template <typename T, std::size_t N>
+constexpr std::size_t size(T (&)[N]) noexcept {
+    return N;
+}
+
+template <typename X>
+constexpr std::size_t size(const X& x) { return x.size(); }
+
+inline std::string to_string(arb::backend_kind kind) {
+    std::stringstream out;
+    out << kind;
+    return out.str();
+}
diff --git a/test/validation/validate.cpp b/test/validation/validate.cpp
index dec1edaf77cc17a1f8d6510da213bb39285dc80d..cac0ac3d1938199f1884422de45bedf33877704a 100644
--- a/test/validation/validate.cpp
+++ b/test/validation/validate.cpp
@@ -4,7 +4,7 @@
 #include <string>
 #include <exception>
 
-#include <tinyopt.hpp>
+#include <aux/tinyopt.hpp>
 
 #include "../gtest.h"
 
diff --git a/test/validation/validate_ball_and_stick.cpp b/test/validation/validate_ball_and_stick.cpp
index 6f736ff871a39b3a8cd268fe30fa9dfd7cf28a26..afc769fed3923027f2f75c64bbade0ccb11479e2 100644
--- a/test/validation/validate_ball_and_stick.cpp
+++ b/test/validation/validate_ball_and_stick.cpp
@@ -3,22 +3,20 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
-
-#include "load_balance.hpp"
-#include "hardware/node_info.hpp"
-#include "hardware/gpu.hpp"
-#include "util/meta.hpp"
-#include "util/path.hpp"
-#include "util/strprintf.hpp"
+#include <aux/path.hpp>
 
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
+
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 #include "../gtest.h"
@@ -33,7 +31,7 @@ struct probe_point {
 template <typename ProbePointSeq>
 void run_ncomp_convergence_test(
     const char* model_name,
-    const util::path& ref_data_path,
+    const aux::path& ref_data_path,
     backend_kind backend,
     const mc_cell& c,
     ProbePointSeq& probe_points,
@@ -51,12 +49,12 @@ void run_ncomp_convergence_test(
         {"dt", dt},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", util::to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     auto exclude = stimulus_ends(c);
 
-    auto n_probe = util::size(probe_points);
+    auto n_probe = size(probe_points);
     std::vector<probe_label> plabels;
     plabels.reserve(n_probe);
     for (unsigned i = 0; i<n_probe; ++i) {
@@ -66,6 +64,10 @@ void run_ncomp_convergence_test(
     convergence_test_runner<int> runner("ncomp", plabels, meta);
     runner.load_reference_data(ref_data_path);
 
+    distributed_context context;
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         for (auto& seg: c.segments()) {
             if (!seg->is_soma()) {
@@ -77,8 +79,6 @@ void run_ncomp_convergence_test(
             rec.add_probe(0, 0, cell_probe_address{p.where, cell_probe_address::membrane_voltage});
         }
 
-        distributed_context context;
-        hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
         auto decomp = partition_load_balance(rec, nd, &context);
         simulation sim(rec, decomp, &context);
 
@@ -196,35 +196,35 @@ void validate_ball_and_squiggle(arb::backend_kind backend) {
 
 TEST(ball_and_stick, neuron_ref) {
     validate_ball_and_stick(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_stick(backend_kind::gpu);
     }
 }
 
 TEST(ball_and_taper, neuron_ref) {
     validate_ball_and_taper(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_taper(backend_kind::gpu);
     }
 }
 
 TEST(ball_and_3stick, neuron_ref) {
     validate_ball_and_3stick(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_3stick(backend_kind::gpu);
     }
 }
 
 TEST(rallpack1, numeric_ref) {
     validate_rallpack1(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_rallpack1(backend_kind::gpu);
     }
 }
 
 TEST(ball_and_squiggle, neuron_ref) {
     validate_ball_and_squiggle(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_squiggle(backend_kind::gpu);
     }
 }
diff --git a/test/validation/validate_compartment_policy.cpp b/test/validation/validate_compartment_policy.cpp
deleted file mode 100644
index 7307c459cdda933bb0ca3a85e642e30f52238625..0000000000000000000000000000000000000000
--- a/test/validation/validate_compartment_policy.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-#include <fstream>
-#include <utility>
-
-#include <nlohmann/json.hpp>
-
-#include <arbor/common_types.hpp>
-#include <arbor/mc_cell.hpp>
-#include <arbor/recipe.hpp>
-#include <arbor/simple_sampler.hpp>
-#include <arbor/simulation.hpp>
-
-#include "util/rangeutil.hpp"
-
-#include "../gtest.h"
-
-#include "../common_cells.hpp"
-#include "../simple_recipes.hpp"
-
-#include "trace_analysis.hpp"
-#include "validation_data.hpp"
-
-using namespace arb;
-
-#if 0
-// *Temporarily* disabled: compartment division policy
-// will be moved to backend policy class.
-
-/*
- * Expect dendtrites composed of a simple frustrum to give
- * essentially identical results no matter the compartment
- * division policy.
- */
-
-template <typename CompPolicy>
-std::vector<trace_data> run_simulation(const cell& c, float sample_dt, float t_end, float dt) {
-    simulation<fvm::fvm_multicell<double, cell_local_size_type, div_compartment_by_ends>> m{singleton_recipe(c)};
-
-    const auto& probes = m.probes();
-    std::size_t n_probes = probes.size();
-    std::vector<simple_sampler> samplers(n_probes, sample_dt);
-
-    for (unsigned i = 0; i<n_probes; ++i) {
-        m.attach_sampler(probes[i].id, samplers[i].sampler<>());
-    }
-
-    m.run(t_end, dt);
-    std::vector<trace_data> traces;
-    for (auto& s: samplers) {
-        traces.push_back(std::move(s.trace));
-    }
-    return traces;
-}
-
-
-void run_test(cell&& c) {
-    add_common_voltage_probes(c);
-
-    float sample_dt = .025;
-    float t_end = 100;
-    float dt = 0.001;
-
-    auto traces_by_ends = run_simulation<div_compartment_by_ends>(c, sample_dt, t_end, dt);
-    auto traces_sampler = run_simulation<div_compartment_sampler>(c, sample_dt, t_end, dt);
-    auto traces_integrator = run_simulation<div_compartment_integrator>(c, sample_dt, t_end, dt);
-
-    auto n_trace = traces_by_ends.size();
-    ASSERT_GT(n_trace, 0);
-    ASSERT_EQ(n_trace, traces_sampler.size());
-    ASSERT_EQ(n_trace, traces_integrator.size());
-
-    for (unsigned i = 0; i<n_trace; ++i) {
-        auto& t1 = traces_by_ends[i];
-        auto& t2 = traces_sampler[i];
-        auto& t3 = traces_integrator[i];
-
-        // expect all traces to be (close to) the same
-        double epsilon = 1e-6;
-        double tol = epsilon*util::max_value(
-            util::transform_view(values(t1), [](double x) { return std::abs(x); }));
-        EXPECT_GE(tol, linf_distance(t1, t2));
-        EXPECT_GE(tol, linf_distance(t2, t3));
-        EXPECT_GE(tol, linf_distance(t3, t1));
-    }
-}
-
-TEST(compartment_policy, validate_ball_and_stick) {
-    SCOPED_TRACE("ball_and_stick");
-    run_test(make_cell_ball_and_stick());
-}
-
-TEST(compartment_policy, validate_ball_and_3stick) {
-    SCOPED_TRACE("ball_and_3stick");
-    run_test(make_cell_ball_and_3stick());
-}
-
-TEST(compartment_policy, validate_ball_and_taper) {
-    SCOPED_TRACE("ball_and_taper");
-    run_test(make_cell_ball_and_taper());
-}
-
-#endif
diff --git a/test/validation/validate_kinetic.cpp b/test/validation/validate_kinetic.cpp
index dd335976a727ecde1b7ad366962037e9acbd958f..3bd9f796c139ef4b15b72d2c6d51d1fc8074ca1e 100644
--- a/test/validation/validate_kinetic.cpp
+++ b/test/validation/validate_kinetic.cpp
@@ -5,22 +5,19 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/node_info.hpp"
-#include "hardware/gpu.hpp"
-#include "load_balance.hpp"
-#include "util/rangeutil.hpp"
-#include "util/strprintf.hpp"
-
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
 
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 void run_kinetic_dt(
@@ -41,13 +38,15 @@ void run_kinetic_dt(
     probe_label plabels[1] = {{"soma.mid", {0u, 0u}}};
 
     meta["sim"] = "arbor";
-    meta["backend_kind"] = util::to_string(backend);
+    meta["backend_kind"] = to_string(backend);
 
     convergence_test_runner<float> runner("dt", plabels, meta);
     runner.load_reference_data(ref_file);
 
     distributed_context context;
-    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     auto decomp = partition_load_balance(rec, nd, &context);
     simulation sim(rec, decomp, &context);
 
@@ -113,14 +112,14 @@ using namespace arb;
 
 TEST(kinetic, kin1_numeric_ref) {
     validate_kinetic_kin1(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_kinetic_kin1(arb::backend_kind::gpu);
     }
 }
 
 TEST(kinetic, kinlva_numeric_ref) {
     validate_kinetic_kinlva(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_kinetic_kinlva(arb::backend_kind::gpu);
     }
 }
diff --git a/test/validation/validate_soma.cpp b/test/validation/validate_soma.cpp
index 90534c4c54e8987326cd7561b0ef00c8aa97c172..6c1b17bd9520473bc1b7c6048ff7e9a6e1a5671a 100644
--- a/test/validation/validate_soma.cpp
+++ b/test/validation/validate_soma.cpp
@@ -1,22 +1,19 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/gpu.hpp"
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
-#include "util/rangeutil.hpp"
-#include "util/strprintf.hpp"
-
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
 
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 #include "../gtest.h"
@@ -33,7 +30,9 @@ void validate_soma(backend_kind backend) {
     probe_label plabels[1] = {{"soma.mid", {0u, 0u}}};
 
     distributed_context context;
-    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     auto decomp = partition_load_balance(rec, nd, &context);
     simulation sim(rec, decomp, &context);
 
@@ -42,7 +41,7 @@ void validate_soma(backend_kind backend) {
         {"model", "soma"},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", util::to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     convergence_test_runner<float> runner("dt", plabels, meta);
@@ -70,7 +69,7 @@ end:
 
 TEST(soma, numeric_ref) {
     validate_soma(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_soma(backend_kind::gpu);
     }
 }
diff --git a/test/validation/validate_synapses.cpp b/test/validation/validate_synapses.cpp
index b987b718bb5fdf9892a899c1923034b94fa0ed77..354dab944f2c8ca512b83fe42855e3e33c278157 100644
--- a/test/validation/validate_synapses.cpp
+++ b/test/validation/validate_synapses.cpp
@@ -1,15 +1,13 @@
 #include <nlohmann/json.hpp>
 
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
+#include <aux/path.hpp>
 
-#include "hardware/node_info.hpp"
-#include "hardware/gpu.hpp"
-#include "load_balance.hpp"
-#include "util/path.hpp"
-#include "util/strprintf.hpp"
 
 #include "../gtest.h"
 
@@ -18,13 +16,14 @@
 
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 using namespace arb;
 
 void run_synapse_test(
     const char* syn_type,
-    const util::path& ref_data_path,
+    const aux::path& ref_data_path,
     backend_kind backend,
     float t_end=70.f,
     float dt=0.001)
@@ -35,7 +34,7 @@ void run_synapse_test(
         {"model", syn_type},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", util::to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     mc_cell c = make_cell_ball_and_stick(false); // no stimuli
@@ -63,7 +62,9 @@ void run_synapse_test(
     runner.load_reference_data(ref_data_path);
 
     distributed_context context;
-    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         c.cable(1)->set_compartments(ncomp);
 
@@ -89,7 +90,7 @@ void run_synapse_test(
 TEST(simple_synapse, expsyn_neuron_ref) {
     SCOPED_TRACE("expsyn-multicore");
     run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         SCOPED_TRACE("expsyn-gpu");
         run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::gpu);
     }
@@ -98,7 +99,7 @@ TEST(simple_synapse, expsyn_neuron_ref) {
 TEST(simple_synapse, exp2syn_neuron_ref) {
     SCOPED_TRACE("exp2syn-multicore");
     run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         SCOPED_TRACE("exp2syn-gpu");
         run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::gpu);
     }
diff --git a/test/validation/validation_data.cpp b/test/validation/validation_data.cpp
index 2722c909708341358c1caa26377fc23585c5d0a0..cf5963c45e628e04817534970368be23ab7d381b 100644
--- a/test/validation/validation_data.cpp
+++ b/test/validation/validation_data.cpp
@@ -7,8 +7,7 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/simple_sampler.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 #include "trace_analysis.hpp"
 #include "validation_data.hpp"
@@ -21,11 +20,11 @@ trace_io g_trace_io;
 #define ARB_DATADIR ""
 #endif
 
-util::path trace_io::find_datadir() {
+aux::path trace_io::find_datadir() {
     // If environment variable is set, use that in preference.
 
     if (const char* env_path = std::getenv("ARB_DATADIR")) {
-        return util::path(env_path);
+        return env_path;
     }
 
     // Otherwise try compile-time path ARB_DATADIR and hard-coded
@@ -40,13 +39,13 @@ util::path trace_io::find_datadir() {
 
     std::error_code ec;
     for (auto p: paths) {
-        if (util::is_directory(p, ec)) {
-            return util::path(p);
+        if (aux::is_directory(p, ec)) {
+            return p;
         }
     }
 
     // Otherwise set to empty path, and rely on command-line option.
-    return util::path();
+    return "";
 }
 
 void trace_io::save_trace(const std::string& label, const trace_data<double>& data, const nlohmann::json& meta) {
@@ -54,13 +53,16 @@ void trace_io::save_trace(const std::string& label, const trace_data<double>& da
 }
 
 void trace_io::save_trace(const std::string& abscissa, const std::string& label, const trace_data<double>& data, const nlohmann::json& meta) {
-    using namespace arb;
+    using nlohmann::json;
 
-    nlohmann::json j = meta;
-    j["data"] = {
-        {abscissa, times(data)},
-        {label, values(data)}
-    };
+    json j = meta;
+    json& times = j["data"][abscissa];
+    json& values = j["data"][label];
+
+    for (const auto& e: data) {
+        times.push_back(e.t);
+        values.push_back(e.v);
+    }
 
     jtraces_ += std::move(j);
 }
@@ -95,8 +97,8 @@ static void parse_trace_json(const nlohmann::json& j, std::map<std::string, trac
     }
 }
 
-std::map<std::string, trace_data<double>> trace_io::load_traces(const util::path& name) {
-    util::path file  = datadir_/name;
+std::map<std::string, trace_data<double>> trace_io::load_traces(const aux::path& name) {
+    aux::path file  = datadir_/name;
     std::ifstream fid(file);
     if (!fid) {
         throw std::runtime_error("unable to load validation data: "+file.native());
diff --git a/test/validation/validation_data.hpp b/test/validation/validation_data.hpp
index 1317738f1ae6891f2900564a9ed18f47b2ccd25f..620f79eb2657c85d5c3fde9140faabb32057d6a3 100644
--- a/test/validation/validation_data.hpp
+++ b/test/validation/validation_data.hpp
@@ -8,8 +8,7 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/simple_sampler.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 namespace arb {
 
@@ -47,7 +46,7 @@ public:
 
     void save_trace(const std::string& label, const trace_data<double>& data, const nlohmann::json& meta);
     void save_trace(const std::string& abscissa, const std::string& label, const trace_data<double>& data, const nlohmann::json& meta);
-    std::map<std::string, trace_data<double>> load_traces(const util::path& name);
+    std::map<std::string, trace_data<double>> load_traces(const aux::path& name);
 
     // common flags, options set by driver
 
@@ -63,9 +62,9 @@ public:
     void set_sample_dt(float dt) { sample_dt_ = dt; }
     float sample_dt() const { return sample_dt_; }
 
-    void set_datadir(const util::path& dir) { datadir_ = dir; }
+    void set_datadir(const aux::path& dir) { datadir_ = dir; }
 
-    void set_output(const util::path& file) {
+    void set_output(const aux::path& file) {
         out_.open(file);
         if (!out_) {
             throw std::runtime_error("unable to open file for writing");
@@ -81,7 +80,7 @@ public:
     }
 
 private:
-    util::path datadir_;
+    aux::path datadir_;
     std::ofstream out_;
     nlohmann::json jtraces_ = nlohmann::json::array();
     bool verbose_flag_ = false;
@@ -94,7 +93,7 @@ private:
     // starting with ARB_DATADIR preprocessor define if defined and
     // if the directory exists, or else try './validation/data'
     // and '../validation/data'.
-    static util::path find_datadir();
+    static aux::path find_datadir();
 };
 
 extern trace_io g_trace_io;