From d6af0c4d96422a6a3d59341439d338a5d818404f Mon Sep 17 00:00:00 2001
From: Sam Yates <yates@cscs.ch>
Date: Fri, 13 Jul 2018 14:58:53 +0200
Subject: [PATCH] Feature/lib install target part 4 (#531)

All example code and validation tests no longer require access to private include directories. This provides the minimal requirement for an installable target

Note that it is still not possible to separately build mechanisms from NMODL with just the public includes, and there is not yet any package configuration file creation for use with CMake or pkg-config.

* Replace `hw::node_info` with `proc_allocation`, describing local resources for the purposes of domain decomposition.
* Group processor counting and gpu counting implementation under `node_info.cpp`.
* Remove `domain_decomposition` dependency from `cell_group_factory.hpp` so we can use the latter to test for backend support for a cell kind.
* Add `arb::cell_kind_implementation()` which performs the mapping from cell kind and backend kind to a `cell_group_ptr`-producing function (this will then become the site for custom cell group kind mapping support in future work).
* Move headers for aux library into `aux/include/aux`, so that there is less ambiguity in header location within test and example code.
* Simplify a little the affinity scanning code.
* Add `sysconf()` and `std::thread::hardware_concurrency()` fallbacks for processor count determination.
* Move `util::mask_stream`, `util::path`, `util::scope_exit` into aux header.
* Remove `util::nop`.
* Remove `io::exporter` and `io::exporter_spike_file`; replace with simpler interface in `aux::spike_emitter`.
* Move `load_balance.hpp`, `math.hpp`, `scwio.hpp` into public includes.
* Move path, glob and scope_exit utilities to aux lib.
* Move cell group size-based partitioning functionality into partition_load_balance, through the use of new `partition_hint` structure.
* Remove brunel example specific partitioner.
* Move brunel example option code out of `::arb` namespace.
* Move swc routines out of `arb::io` and into `arb::`.
* Specialize public swc-routines to take a vector of swc_record objects instead of an arbitrary sequence; place implementations into swcio.cpp.
* Remove dependency on `util/strprintf.hpp` from lmorpho.
* Remove `arb::math::min`, `arb::math::max`.
* Remove range utility dependence from validation tests: add piece-wise linear interpolator in test/validation/interpolate.hpp; remove `times` and `values` adaptors for simple trace data; add test/validation/util.hpp with generic size function and a `to_string` for `backend_kind`.
* Add `aux::open_or_throw` helper function to replicate no-overwrite spike output behaviour in example applications.
* Add `aux::strsub` simple string substitution function for use in examples, so as to avoid a `util::strprintf` or `util::pprintf` dependency.
* Remove arbor private header dependencies from examples and validation tests.
---
 arbor/CMakeLists.txt                          |   5 +-
 .../backends/gpu/matrix_state_interleaved.hpp |   2 +-
 arbor/backends/gpu/mechanism.cpp              |   2 +-
 arbor/backends/multicore/mechanism.cpp        |   2 +-
 arbor/backends/multicore/shared_state.cpp     |   4 +-
 .../backends/multicore/threshold_watcher.hpp  |   3 +-
 arbor/benchmark_cell_group.cpp                |   4 +-
 arbor/benchmark_cell_group.hpp                |   2 +-
 arbor/cell_group.hpp                          |   5 -
 arbor/cell_group_factory.cpp                  |  41 ++-
 arbor/cell_group_factory.cu                   |   1 -
 arbor/cell_group_factory.hpp                  |  20 +-
 arbor/fvm_compartment.hpp                     |   2 +-
 arbor/hardware/affinity.cpp                   |  59 ++--
 arbor/hardware/affinity.hpp                   |  11 +-
 arbor/hardware/gpu.cpp                        |  21 --
 arbor/hardware/gpu.hpp                        |   9 -
 arbor/hardware/node_info.cpp                  |  51 ++-
 arbor/hardware/node_info.hpp                  |  20 +-
 arbor/io/exporter.hpp                         |  28 --
 arbor/io/exporter_spike_file.hpp              |  91 -----
 arbor/lif_cell_group.cpp                      |   4 +-
 arbor/lif_cell_group.hpp                      |   2 +-
 arbor/load_balance.hpp                        |  15 -
 arbor/local_alloc.cpp                         |  16 +
 arbor/mc_cell_group.cpp                       |   4 +-
 arbor/mc_cell_group.hpp                       |   2 +-
 arbor/morphology.cpp                          |   3 +-
 arbor/partition_load_balance.cpp              |  48 ++-
 arbor/simulation.cpp                          |  14 +-
 arbor/spike_source_cell_group.cpp             |   4 +-
 arbor/spike_source_cell_group.hpp             |   2 +-
 arbor/swcio.cpp                               | 137 +++++++-
 arbor/swcio.hpp                               | 243 -------------
 arbor/threading/threading.cpp                 |  19 +-
 aux/CMakeLists.txt                            |  10 +-
 aux/glob.cpp                                  |  47 +++
 aux/include/aux/glob.hpp                      |  14 +
 {arbor/util => aux/include/aux}/ioutil.hpp    |  43 +--
 aux/{ => include/aux}/json_meter.hpp          |   0
 {arbor/util => aux/include/aux}/path.hpp      |  35 +-
 .../util => aux/include/aux}/scope_exit.hpp   |   6 +-
 aux/include/aux/spike_emitter.hpp             |  16 +
 aux/include/aux/strsub.hpp                    |  66 ++++
 aux/{ => include/aux}/tinyopt.hpp             |   0
 aux/{ => include/aux}/with_mpi.hpp            |   0
 aux/ioutil.cpp                                |  25 ++
 {arbor/util => aux}/path.cpp                  |  41 +--
 aux/spike_emitter.cpp                         |  23 ++
 example/bench/CMakeLists.txt                  |   3 -
 example/bench/bench.cpp                       |  30 +-
 example/bench/recipe.cpp                      |   8 +-
 example/bench/recipe.hpp                      |  17 +-
 example/brunel/CMakeLists.txt                 |   3 -
 example/brunel/brunel_miniapp.cpp             |  71 ++--
 example/brunel/io.cpp                         | 319 +++++++++---------
 example/brunel/io.hpp                         |   2 -
 example/brunel/partitioner.hpp                |  71 ----
 example/generators/CMakeLists.txt             |   3 -
 example/generators/event_gen.cpp              |   7 +-
 example/miniapp/CMakeLists.txt                |   3 -
 example/miniapp/io.hpp                        |   3 +-
 example/miniapp/miniapp.cpp                   |  63 ++--
 example/miniapp/morphology_pool.cpp           |  13 +-
 example/miniapp/morphology_pool.hpp           |   5 +-
 example/miniapp/trace.cpp                     |   7 +-
 include/arbor/domain_decomposition.hpp        |  16 +-
 include/arbor/event_generator.hpp             |   2 +
 include/arbor/load_balance.hpp                |  25 ++
 {arbor => include/arbor}/math.hpp             |  12 -
 include/arbor/swcio.hpp                       | 110 ++++++
 include/arbor/time_sequence.hpp               |   3 -
 lmorpho/lmorpho.cpp                           |   2 +-
 lmorpho/lsystem.cpp                           |   2 +-
 lmorpho/morphio.cpp                           |  52 +--
 lmorpho/morphio.hpp                           |   8 +-
 modcc/printer/cprinter.cpp                    |   9 +-
 test/ubench/accumulate_functor_values.cpp     |   4 +-
 test/ubench/default_construct.cpp             |   2 +-
 test/ubench/event_binning.cpp                 |   3 +-
 test/ubench/event_setup.cpp                   |   7 +-
 test/ubench/mech_vec.cpp                      |   6 +-
 test/unit-distributed/CMakeLists.txt          |   1 -
 test/unit-distributed/test.cpp                |  15 +-
 test/unit-distributed/test_communicator.cpp   |   8 +-
 .../test_domain_decomposition.cpp             |  12 +-
 .../test_exporter_spike_file.cpp              | 117 -------
 test/unit/CMakeLists.txt                      |   4 +-
 test/unit/test_compartments.cpp               |   3 +-
 test/unit/test_domain_decomposition.cpp       |  51 ++-
 test/unit/test_fvm_layout.cpp                 |   2 +-
 test/unit/test_fvm_lowered.cpp                |   7 +-
 test/unit/test_lif_cell_group.cpp             |  17 +-
 test/unit/test_mask_stream.cpp                |   8 +-
 test/unit/test_math.cpp                       |  30 +-
 test/unit/test_matrix.cpp                     |  11 +-
 test/unit/test_matrix.cu                      |  26 +-
 test/unit/test_matrix_cpuvsgpu.cpp            |  18 +-
 test/unit/test_mc_cell.cpp                    |   2 +-
 test/unit/test_merge_events.cpp               |   2 +
 test/unit/test_nop.cpp                        |  75 ----
 test/unit/test_partition.cpp                  |   3 +-
 test/unit/test_path.cpp                       |   8 +-
 test/unit/test_segment.cpp                    |   3 +-
 test/unit/test_spike_emitter.cpp              |  30 ++
 test/unit/test_swcio.cpp                      |  24 +-
 test/validation/CMakeLists.txt                |   2 -
 test/validation/convergence_test.hpp          |  40 ++-
 test/validation/interpolate.hpp               |  50 +++
 test/validation/trace_analysis.cpp            |  55 ++-
 test/validation/trace_analysis.hpp            |  16 +-
 test/validation/util.hpp                      |  22 ++
 test/validation/validate.cpp                  |   2 +-
 test/validation/validate_ball_and_stick.cpp   |  34 +-
 .../validate_compartment_policy.cpp           | 101 ------
 test/validation/validate_kinetic.cpp          |  19 +-
 test/validation/validate_soma.cpp             |  17 +-
 test/validation/validate_synapses.cpp         |  21 +-
 test/validation/validation_data.cpp           |  32 +-
 test/validation/validation_data.hpp           |  13 +-
 120 files changed, 1387 insertions(+), 1627 deletions(-)
 delete mode 100644 arbor/cell_group_factory.cu
 delete mode 100644 arbor/hardware/gpu.cpp
 delete mode 100644 arbor/hardware/gpu.hpp
 delete mode 100644 arbor/io/exporter.hpp
 delete mode 100644 arbor/io/exporter_spike_file.hpp
 delete mode 100644 arbor/load_balance.hpp
 create mode 100644 arbor/local_alloc.cpp
 delete mode 100644 arbor/swcio.hpp
 create mode 100644 aux/glob.cpp
 create mode 100644 aux/include/aux/glob.hpp
 rename {arbor/util => aux/include/aux}/ioutil.hpp (78%)
 rename aux/{ => include/aux}/json_meter.hpp (100%)
 rename {arbor/util => aux/include/aux}/path.hpp (92%)
 rename {arbor/util => aux/include/aux}/scope_exit.hpp (93%)
 create mode 100644 aux/include/aux/spike_emitter.hpp
 create mode 100644 aux/include/aux/strsub.hpp
 rename aux/{ => include/aux}/tinyopt.hpp (100%)
 rename aux/{ => include/aux}/with_mpi.hpp (100%)
 create mode 100644 aux/ioutil.cpp
 rename {arbor/util => aux}/path.cpp (66%)
 create mode 100644 aux/spike_emitter.cpp
 delete mode 100644 example/brunel/partitioner.hpp
 create mode 100644 include/arbor/load_balance.hpp
 rename {arbor => include/arbor}/math.hpp (95%)
 create mode 100644 include/arbor/swcio.hpp
 delete mode 100644 test/unit-distributed/test_exporter_spike_file.cpp
 delete mode 100644 test/unit/test_nop.cpp
 create mode 100644 test/unit/test_spike_emitter.cpp
 create mode 100644 test/validation/interpolate.hpp
 create mode 100644 test/validation/util.hpp
 delete mode 100644 test/validation/validate_compartment_policy.cpp

diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 4ea56fd8..aef98060 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -10,18 +10,18 @@ set(arbor_sources
     builtin_mechanisms.cpp
     cell_group_factory.cpp
     common_types_io.cpp
-    mc_cell.cpp
+    local_alloc.cpp
     event_binner.cpp
     fvm_layout.cpp
     fvm_lowered_cell_impl.cpp
     hardware/affinity.cpp
-    hardware/gpu.cpp
     hardware/memory.cpp
     hardware/node_info.cpp
     hardware/power.cpp
     io/locked_ostream.cpp
     io/serialize_hex.cpp
     lif_cell_group.cpp
+    mc_cell.cpp
     mc_cell_group.cpp
     mechcat.cpp
     memory/cuda_wrappers.cpp
@@ -43,7 +43,6 @@ set(arbor_sources
     threading/threading.cpp
     thread_private_spike_store.cpp
     util/hostname.cpp
-    util/path.cpp
     util/unwind.cpp
     version.cpp
 )
diff --git a/arbor/backends/gpu/matrix_state_interleaved.hpp b/arbor/backends/gpu/matrix_state_interleaved.hpp
index 0cb2aac8..2ac1989e 100644
--- a/arbor/backends/gpu/matrix_state_interleaved.hpp
+++ b/arbor/backends/gpu/matrix_state_interleaved.hpp
@@ -2,8 +2,8 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 
-#include "math.hpp"
 #include "memory/memory.hpp"
 #include "util/span.hpp"
 #include "util/partition.hpp"
diff --git a/arbor/backends/gpu/mechanism.cpp b/arbor/backends/gpu/mechanism.cpp
index cb57c95a..e545e791 100644
--- a/arbor/backends/gpu/mechanism.cpp
+++ b/arbor/backends/gpu/mechanism.cpp
@@ -8,10 +8,10 @@
 #include <arbor/arbexcept.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mechanism.hpp>
 #include <arbor/util/optional.hpp>
 
-#include "math.hpp"
 #include "memory/memory.hpp"
 #include "util/index_into.hpp"
 #include "util/maputil.hpp"
diff --git a/arbor/backends/multicore/mechanism.cpp b/arbor/backends/multicore/mechanism.cpp
index a5b22f3e..81c48e8f 100644
--- a/arbor/backends/multicore/mechanism.cpp
+++ b/arbor/backends/multicore/mechanism.cpp
@@ -7,10 +7,10 @@
 
 #include <arbor/fvm_types.hpp>
 #include <arbor/common_types.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mechanism.hpp>
 #include <arbor/util/optional.hpp>
 
-#include "math.hpp"
 #include "util/index_into.hpp"
 #include "util/maputil.hpp"
 #include "util/padded_alloc.hpp"
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index 2af16850..6cb520ef 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -6,15 +6,15 @@
 #include <vector>
 
 #include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
 #include <arbor/constants.hpp>
 #include <arbor/fvm_types.hpp>
-#include <arbor/common_types.hpp>
 #include <arbor/ion.hpp>
+#include <arbor/math.hpp>
 #include <arbor/simd/simd.hpp>
 
 #include "backends/event.hpp"
 #include "io/sepval.hpp"
-#include "math.hpp"
 #include "util/padded_alloc.hpp"
 #include "util/rangeutil.hpp"
 
diff --git a/arbor/backends/multicore/threshold_watcher.hpp b/arbor/backends/multicore/threshold_watcher.hpp
index cc5b7416..dcb38168 100644
--- a/arbor/backends/multicore/threshold_watcher.hpp
+++ b/arbor/backends/multicore/threshold_watcher.hpp
@@ -2,10 +2,9 @@
 
 #include <arbor/assert.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 
 #include "backends/threshold_crossing.hpp"
-#include "math.hpp"
-
 #include "multicore_common.hpp"
 
 namespace arb {
diff --git a/arbor/benchmark_cell_group.cpp b/arbor/benchmark_cell_group.cpp
index 0556045d..2da67ef0 100644
--- a/arbor/benchmark_cell_group.cpp
+++ b/arbor/benchmark_cell_group.cpp
@@ -13,9 +13,9 @@
 
 namespace arb {
 
-benchmark_cell_group::benchmark_cell_group(std::vector<cell_gid_type> gids,
+benchmark_cell_group::benchmark_cell_group(const std::vector<cell_gid_type>& gids,
                                            const recipe& rec):
-    gids_(std::move(gids))
+    gids_(gids)
 {
     cells_.reserve(gids_.size());
     for (auto gid: gids_) {
diff --git a/arbor/benchmark_cell_group.hpp b/arbor/benchmark_cell_group.hpp
index 02fee8d5..f915eed0 100644
--- a/arbor/benchmark_cell_group.hpp
+++ b/arbor/benchmark_cell_group.hpp
@@ -13,7 +13,7 @@ namespace arb {
 
 class benchmark_cell_group: public cell_group {
 public:
-    benchmark_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
+    benchmark_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec);
 
     cell_kind get_cell_kind() const override;
 
diff --git a/arbor/cell_group.hpp b/arbor/cell_group.hpp
index 98cf20ab..a77d720e 100644
--- a/arbor/cell_group.hpp
+++ b/arbor/cell_group.hpp
@@ -42,9 +42,4 @@ public:
 
 using cell_group_ptr = std::unique_ptr<cell_group>;
 
-template <typename T, typename... Args>
-cell_group_ptr make_cell_group(Args&&... args) {
-    return cell_group_ptr(new T(std::forward<Args>(args)...));
-}
-
 } // namespace arb
diff --git a/arbor/cell_group_factory.cpp b/arbor/cell_group_factory.cpp
index 54e343ea..d82bada0 100644
--- a/arbor/cell_group_factory.cpp
+++ b/arbor/cell_group_factory.cpp
@@ -1,12 +1,11 @@
 #include <vector>
 
-#include <arbor/arbexcept.hpp>
 #include <arbor/common_types.hpp>
-#include <arbor/domain_decomposition.hpp>
 #include <arbor/recipe.hpp>
 
 #include "benchmark_cell_group.hpp"
 #include "cell_group.hpp"
+#include "cell_group_factory.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "lif_cell_group.hpp"
 #include "mc_cell_group.hpp"
@@ -14,23 +13,45 @@
 
 namespace arb {
 
-cell_group_ptr cell_group_factory(const recipe& rec, const group_description& group) {
-    switch (group.kind) {
+template <typename Impl, typename... Args>
+cell_group_ptr make_cell_group(Args&&... args) {
+    return cell_group_ptr(new Impl(std::forward<Args>(args)...));
+}
+
+cell_group_factory cell_kind_implementation(cell_kind ck, backend_kind bk) {
+    using gid_vector = std::vector<cell_gid_type>;
+
+    switch (ck) {
     case cell_kind::cable1d_neuron:
-        return make_cell_group<mc_cell_group>(group.gids, rec, make_fvm_lowered_cell(group.backend));
+        return [bk](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<mc_cell_group>(gids, rec, make_fvm_lowered_cell(bk));
+        };
 
     case cell_kind::spike_source:
-        return make_cell_group<spike_source_cell_group>(group.gids, rec);
+        if (bk!=backend_kind::multicore) break;
+
+        return [](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<spike_source_cell_group>(gids, rec);
+        };
 
     case cell_kind::lif_neuron:
-        return make_cell_group<lif_cell_group>(group.gids, rec);
+        if (bk!=backend_kind::multicore) break;
+
+        return [](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<lif_cell_group>(gids, rec);
+        };
 
     case cell_kind::benchmark:
-        return make_cell_group<benchmark_cell_group>(group.gids, rec);
+        if (bk!=backend_kind::multicore) break;
+
+        return [](const gid_vector& gids, const recipe& rec) {
+            return make_cell_group<benchmark_cell_group>(gids, rec);
+        };
 
-    default:
-        throw arbor_internal_error("cell_group_factory: unknown cell kind");
+    default: ;
     }
+
+    return cell_group_factory{}; // empty function => not supported
 }
 
 } // namespace arb
diff --git a/arbor/cell_group_factory.cu b/arbor/cell_group_factory.cu
deleted file mode 100644
index 21d20e7b..00000000
--- a/arbor/cell_group_factory.cu
+++ /dev/null
@@ -1 +0,0 @@
-#include "cell_group_factory.cpp"
diff --git a/arbor/cell_group_factory.hpp b/arbor/cell_group_factory.hpp
index 320e4ff5..47c476e5 100644
--- a/arbor/cell_group_factory.hpp
+++ b/arbor/cell_group_factory.hpp
@@ -1,13 +1,27 @@
 #pragma once
 
-#include <arbor/domain_decomposition.hpp>
+// Provide a map from cell group kinds and execution back-end to implementation,
+// as represented by a `cell_group_factory` function wrapper below.
+//
+// An empty function implies there is no support for that cell kind on that
+// back-end.
+
+#include <functional>
+#include <vector>
+
+#include <arbor/common_types.hpp>
 #include <arbor/recipe.hpp>
 
 #include "cell_group.hpp"
 
 namespace arb {
 
-// Helper factory for building cell groups
-cell_group_ptr cell_group_factory(const recipe& rec, const group_description& group);
+using cell_group_factory = std::function<cell_group_ptr (const std::vector<cell_gid_type>&, const recipe&)>;
+
+cell_group_factory cell_kind_implementation(cell_kind, backend_kind);
+
+inline bool cell_kind_supported(cell_kind c, backend_kind b) {
+    return static_cast<bool>(cell_kind_implementation(c, b));
+}
 
 } // namespace arb
diff --git a/arbor/fvm_compartment.hpp b/arbor/fvm_compartment.hpp
index 6361c511..0d626328 100644
--- a/arbor/fvm_compartment.hpp
+++ b/arbor/fvm_compartment.hpp
@@ -4,10 +4,10 @@
 #include <utility>
 
 #include <arbor/common_types.hpp>
+#include <arbor/math.hpp>
 #include <arbor/util/compat.hpp>
 
 #include "algorithms.hpp"
-#include "math.hpp"
 #include "util/iterutil.hpp"
 #include "util/partition.hpp"
 #include "util/rangeutil.hpp"
diff --git a/arbor/hardware/affinity.cpp b/arbor/hardware/affinity.cpp
index 29ef7ee4..73e2762f 100644
--- a/arbor/hardware/affinity.cpp
+++ b/arbor/hardware/affinity.cpp
@@ -1,63 +1,52 @@
 #include <cstdlib>
+#include <system_error>
 #include <vector>
 
-#include <arbor/util/optional.hpp>
-
 #ifdef __linux__
 
-    #ifndef _GNU_SOURCE
-        #define _GNU_SOURCE
-    #endif
-
-    extern "C" {
-        #include <sched.h>
-    }
-
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
 #endif
 
+extern "C" {
+#include <sched.h>
+}
+
 namespace arb {
 namespace hw {
 
-#ifdef __linux__
 std::vector<int> get_affinity() {
+    std::vector<int> cores;
     cpu_set_t cpu_set_mask;
 
-    auto status = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_mask);
-
-    if(status==-1) {
-        return {};
+    int status = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_mask);
+    if (status) {
+        throw std::system_error(errno, std::generic_category());
     }
 
-    unsigned cpu_count = CPU_COUNT(&cpu_set_mask);
-
-    std::vector<int> cores;
-    for(auto i=0; i<CPU_SETSIZE && cores.size()<cpu_count; ++i) {
-        if(CPU_ISSET(i, &cpu_set_mask)) {
+    for (int i=0; i<CPU_SETSIZE; ++i) {
+        if (CPU_ISSET(i, &cpu_set_mask)) {
             cores.push_back(i);
         }
     }
 
-    if(cores.size() != cpu_count) {
-        return {};
-    }
-
     return cores;
 }
-#else
 
-// No support for non-linux systems
+} // namespace hw
+} // namespace arb
+
+#else // def __linux__
+
+// No support for non-linux systems.
+namespace arb {
+namespace hw {
+
 std::vector<int> get_affinity() {
     return {};
 }
-#endif
-
-util::optional<std::size_t> num_cores() {
-    auto cores = get_affinity();
-    if (cores.size()==0u) {
-        return util::nullopt;
-    }
-    return cores.size();
-}
 
 } // namespace hw
 } // namespace arb
+
+#endif // def __linux__
diff --git a/arbor/hardware/affinity.hpp b/arbor/hardware/affinity.hpp
index 8c5cbc59..db6c8f6b 100644
--- a/arbor/hardware/affinity.hpp
+++ b/arbor/hardware/affinity.hpp
@@ -3,12 +3,10 @@
 #include <cstdint>
 #include <vector>
 
-#include <arbor/util/optional.hpp>
-
 namespace arb {
 namespace hw {
 
-// The list of cores for which the calling thread has affinity.
+// The list of logical processors for which the calling thread has affinity.
 // If calling from the main thread at application start up, before
 // attempting to change thread affinity, may produce unreliable
 // results.
@@ -19,12 +17,5 @@ namespace hw {
 // available cores.
 std::vector<int> get_affinity();
 
-// Attempts to find the number of cores available to the application
-// This is likely to give inaccurate results if the caller has already
-// been playing with thread affinity.
-//
-// Returns 0 if unable to determine the number of cores.
-util::optional<std::size_t> num_cores();
-
 } // namespace util
 } // namespace arb
diff --git a/arbor/hardware/gpu.cpp b/arbor/hardware/gpu.cpp
deleted file mode 100644
index bd5f2584..00000000
--- a/arbor/hardware/gpu.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifdef ARB_HAVE_GPU
-    #include <cuda_runtime.h>
-#endif
-
-namespace arb {
-namespace hw {
-
-#ifdef ARB_HAVE_GPU
-unsigned num_gpus() {
-    int n;
-    cudaGetDeviceCount(&n);
-    return n;
-}
-#else
-unsigned num_gpus() {
-    return 0u;
-}
-#endif
-
-} // namespace hw
-} // namespace arb
diff --git a/arbor/hardware/gpu.hpp b/arbor/hardware/gpu.hpp
deleted file mode 100644
index 78b8ea80..00000000
--- a/arbor/hardware/gpu.hpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma once
-
-namespace arb {
-namespace hw {
-
-unsigned num_gpus();
-
-} // namespace hw
-} // namespace arb
diff --git a/arbor/hardware/node_info.cpp b/arbor/hardware/node_info.cpp
index 354a697d..095c26b0 100644
--- a/arbor/hardware/node_info.cpp
+++ b/arbor/hardware/node_info.cpp
@@ -1,19 +1,52 @@
-#include <algorithm>
+#include <thread>
+
+#ifdef ARB_HAVE_GPU
+#include <cuda_runtime.h>
+#endif
+
+// TODO: C++17 use __has_include(<unistd.h>)
+#if defined(__unix__) || defined(__APPLE__) && defined(__MACH__)
+#include <unistd.h>
+#endif
 
 #include "affinity.hpp"
-#include "gpu.hpp"
 #include "node_info.hpp"
 
 namespace arb {
 namespace hw {
 
-// Return a node_info that describes the hardware resources available on this node.
-// If unable to determine the number of available cores, assumes that there is one
-// core available.
-node_info get_node_info() {
-    auto res = num_cores();
-    unsigned ncpu = res? *res: 1u;
-    return {ncpu, num_gpus()};
+
+unsigned node_gpus() {
+#ifdef ARB_HAVE_GPU
+    int n;
+    if (cudaGetDeviceCount(&n)==cudaSuccess) {
+        return (unsigned)(n);
+    }
+#endif
+
+    return 0;
+}
+
+unsigned node_processors() {
+    // Attempt to get count first from affinity information if available.
+    unsigned n = get_affinity().size();
+
+    // If no luck, try sysconf.
+#ifdef _SC_NPROCESSORS_ONLN
+    if (!n) {
+        long r = sysconf(_SC_NPROCESSORS_ONLN);
+        if (r>0) {
+            n = (unsigned)r;
+        }
+    }
+#endif
+
+    // If still zero, try the hint from the library.
+    if (!n) {
+        n = std::thread::hardware_concurrency();
+    }
+
+    return n;
 }
 
 } // namespace util
diff --git a/arbor/hardware/node_info.hpp b/arbor/hardware/node_info.hpp
index f83a2caf..0452bdd4 100644
--- a/arbor/hardware/node_info.hpp
+++ b/arbor/hardware/node_info.hpp
@@ -3,20 +3,12 @@
 namespace arb {
 namespace hw {
 
-// Information about the computational resources available on a compute node.
-// Currently a simple enumeration of the number of cpu cores and gpus, which
-// will become richer.
-struct node_info {
-    node_info() = default;
-    node_info(unsigned c, unsigned g):
-        num_cpu_cores(c), num_gpus(g)
-    {}
+// Number of GPUs detected on the node.
+unsigned node_gpus();
 
-    unsigned num_cpu_cores = 1;
-    unsigned num_gpus = 0;
-};
+// Number of visible logical processors on the node.
+// 0 => unable to determine.
+unsigned node_processors();
 
-node_info get_node_info();
-
-} // namespace util
+} // namespace hw
 } // namespace arb
diff --git a/arbor/io/exporter.hpp b/arbor/io/exporter.hpp
deleted file mode 100644
index 9370391d..00000000
--- a/arbor/io/exporter.hpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include <random>
-#include <string>
-
-#include <arbor/common_types.hpp>
-#include <arbor/spike.hpp>
-
-namespace arb {
-namespace io {
-
-// interface for exporters.
-// Exposes one virtual functions:
-//    do_export(vector<type>) receiving a vector of parameters to export
-
-class exporter {
-public:
-    // Performs the export of the data
-    virtual void output(const std::vector<spike>&) = 0;
-
-    // Returns the status of the exporter
-    virtual bool good() const = 0;
-
-    virtual ~exporter() {}
-};
-
-} //communication
-} // namespace arb
diff --git a/arbor/io/exporter_spike_file.hpp b/arbor/io/exporter_spike_file.hpp
deleted file mode 100644
index 7ee80d7a..00000000
--- a/arbor/io/exporter_spike_file.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#pragma once
-
-#include <fstream>
-#include <iomanip>
-#include <memory>
-#include <random>
-#include <stdexcept>
-#include <vector>
-
-#include <cstring>
-#include <cstdio>
-
-#include <arbor/common_types.hpp>
-#include <arbor/spike.hpp>
-
-#include "io/exporter.hpp"
-#include "util/file.hpp"
-
-namespace arb {
-namespace io {
-
-class exporter_spike_file: public exporter {
-public:
-    // Constructor
-    // over_write if true will overwrite the specified output file (default = true)
-    // output_path  relative or absolute path
-    // file_name    will be appended with "_x" with x the rank number
-    // file_extension  a seperator will be added automatically
-    exporter_spike_file(
-        const std::string& file_name,
-        const std::string& path,
-        const std::string& file_extension,
-        int index,
-        bool over_write=true)
-    {
-        file_path_ =
-            create_output_file_path(
-                file_name, path, file_extension, index);
-
-        //test if the file exist and depending on over_write throw or delete
-        if (!over_write && util::file_exists(file_path_)) {
-            throw std::runtime_error(
-                "Tried opening file for writing but it exists and over_write is false: " + file_path_);
-        }
-
-        file_handle_.open(file_path_);
-    }
-
-    // Performs export of the spikes to file.
-    // One id and spike time with 4 decimals after the comma on a
-    // line space separated.
-    void output(const std::vector<spike>& spikes) override {
-        for (auto spike : spikes) {
-            char linebuf[45];
-            auto n =
-                std::snprintf(
-                    linebuf, sizeof(linebuf), "%u %.4f\n",
-                    unsigned{spike.source.gid}, float(spike.time));
-            file_handle_.write(linebuf, n);
-        }
-    }
-
-    bool good() const override {
-        return file_handle_.good();
-    }
-
-    // Creates an indexed filename
-    static std::string create_output_file_path(
-        const std::string& file_name,
-        const std::string& path,
-        const std::string& file_extension,
-        unsigned index)
-    {
-        return path + file_name + "_" +  std::to_string(index) + "." + file_extension;
-    }
-
-    // The name of the output path and file name.
-    // May be either relative or absolute path.
-    const std::string& file_path() const {
-        return file_path_;
-    }
-
-private:
-
-    // Handle to opened file handle
-    std::ofstream file_handle_;
-    std::string file_path_;
-};
-
-} //communication
-} // namespace arb
diff --git a/arbor/lif_cell_group.cpp b/arbor/lif_cell_group.cpp
index 7cce6cc5..7622dab2 100644
--- a/arbor/lif_cell_group.cpp
+++ b/arbor/lif_cell_group.cpp
@@ -6,8 +6,8 @@
 using namespace arb;
 
 // Constructor containing gid of first cell in a group and a container of all cells.
-lif_cell_group::lif_cell_group(std::vector<cell_gid_type> gids, const recipe& rec):
-gids_(std::move(gids))
+lif_cell_group::lif_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec):
+    gids_(gids)
 {
     // Default to no binning of events
     set_binning_policy(binning_kind::none, 0);
diff --git a/arbor/lif_cell_group.hpp b/arbor/lif_cell_group.hpp
index 6ed49d39..034f3e34 100644
--- a/arbor/lif_cell_group.hpp
+++ b/arbor/lif_cell_group.hpp
@@ -19,7 +19,7 @@ public:
     lif_cell_group() = default;
 
     // Constructor containing gid of first cell in a group and a container of all cells.
-    lif_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
+    lif_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec);
 
     virtual cell_kind get_cell_kind() const override;
     virtual void reset() override;
diff --git a/arbor/load_balance.hpp b/arbor/load_balance.hpp
deleted file mode 100644
index 757f5a31..00000000
--- a/arbor/load_balance.hpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-
-#include <arbor/distributed_context.hpp>
-#include <arbor/domain_decomposition.hpp>
-#include <arbor/recipe.hpp>
-
-#include "hardware/node_info.hpp"
-
-namespace arb {
-
-domain_decomposition partition_load_balance(const recipe& rec,
-                                            hw::node_info nd,
-                                            const distributed_context* ctx);
-
-} // namespace arb
diff --git a/arbor/local_alloc.cpp b/arbor/local_alloc.cpp
new file mode 100644
index 00000000..92204a5f
--- /dev/null
+++ b/arbor/local_alloc.cpp
@@ -0,0 +1,16 @@
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/threadinfo.hpp>
+
+#include "hardware/node_info.hpp"
+
+namespace arb {
+
+proc_allocation local_allocation() {
+    proc_allocation info;
+    info.num_threads = arb::num_threads();
+    info.num_gpus = arb::hw::node_gpus();
+
+    return info;
+}
+
+} // namespace arb
diff --git a/arbor/mc_cell_group.cpp b/arbor/mc_cell_group.cpp
index e03ae795..54793ddc 100644
--- a/arbor/mc_cell_group.cpp
+++ b/arbor/mc_cell_group.cpp
@@ -23,8 +23,8 @@
 
 namespace arb {
 
-mc_cell_group::mc_cell_group(std::vector<cell_gid_type> gids, const recipe& rec, fvm_lowered_cell_ptr lowered):
-    gids_(std::move(gids)), lowered_(std::move(lowered))
+mc_cell_group::mc_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec, fvm_lowered_cell_ptr lowered):
+    gids_(gids), lowered_(std::move(lowered))
 {
     // Default to no binning of events
     set_binning_policy(binning_kind::none, 0);
diff --git a/arbor/mc_cell_group.hpp b/arbor/mc_cell_group.hpp
index be9599d7..47653506 100644
--- a/arbor/mc_cell_group.hpp
+++ b/arbor/mc_cell_group.hpp
@@ -30,7 +30,7 @@ class mc_cell_group: public cell_group {
 public:
     mc_cell_group() = default;
 
-    mc_cell_group(std::vector<cell_gid_type> gids, const recipe& rec, fvm_lowered_cell_ptr lowered);
+    mc_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec, fvm_lowered_cell_ptr lowered);
 
     cell_kind get_cell_kind() const override {
         return cell_kind::cable1d_neuron;
diff --git a/arbor/morphology.cpp b/arbor/morphology.cpp
index 5508f835..2243772b 100644
--- a/arbor/morphology.cpp
+++ b/arbor/morphology.cpp
@@ -2,8 +2,7 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
-
-#include "math.hpp"
+#include <arbor/math.hpp>
 
 namespace arb {
 
diff --git a/arbor/partition_load_balance.cpp b/arbor/partition_load_balance.cpp
index 472c61ef..e6a80892 100644
--- a/arbor/partition_load_balance.cpp
+++ b/arbor/partition_load_balance.cpp
@@ -1,16 +1,20 @@
 #include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/recipe.hpp>
 
-#include "hardware/node_info.hpp"
+#include "cell_group_factory.hpp"
+#include "util/maputil.hpp"
 #include "util/partition.hpp"
 #include "util/span.hpp"
 
 namespace arb {
 
-domain_decomposition partition_load_balance(const recipe& rec,
-                                            hw::node_info nd,
-                                            const distributed_context* ctx)
+domain_decomposition partition_load_balance(
+    const recipe& rec,
+    proc_allocation nd,
+    const distributed_context* ctx,
+    partition_hint_map hint_map)
 {
     struct partition_gid_domain {
         partition_gid_domain(std::vector<cell_gid_type> divs):
@@ -45,8 +49,7 @@ domain_decomposition partition_load_balance(const recipe& rec,
 
     // Local load balance
 
-    std::unordered_map<cell_kind, std::vector<cell_gid_type>>
-        kind_lists;
+    std::unordered_map<cell_kind, std::vector<cell_gid_type>> kind_lists;
     for (auto gid: make_span(gid_part[domain_id])) {
         kind_lists[rec.get_cell_kind(gid)].push_back(gid);
     }
@@ -61,6 +64,11 @@ domain_decomposition partition_load_balance(const recipe& rec,
     // the threading internals. We need support for setting the priority
     // of cell group updates according to rules such as the back end on
     // which the cell group is running.
+
+    auto has_gpu_backend = [](cell_kind c) {
+        return cell_kind_supported(c, backend_kind::gpu);
+    };
+
     std::vector<cell_kind> kinds;
     for (auto l: kind_lists) {
         kinds.push_back(cell_kind(l.first));
@@ -69,16 +77,30 @@ domain_decomposition partition_load_balance(const recipe& rec,
 
     std::vector<group_description> groups;
     for (auto k: kinds) {
-        // put all cells into a single cell group on the gpu if possible
-        if (nd.num_gpus && has_gpu_backend(k)) {
-            groups.push_back({k, std::move(kind_lists[k]), backend_kind::gpu});
+        partition_hint hint;
+        if (auto opt_hint = util::value_by_key(hint_map, k)) {
+            hint = opt_hint.value();
+        }
+
+        backend_kind backend = backend_kind::multicore;
+        std::size_t group_size = hint.cpu_group_size;
+
+        if (hint.prefer_gpu && nd.num_gpus>0 && has_gpu_backend(k)) {
+            backend = backend_kind::gpu;
+            group_size = hint.gpu_group_size;
         }
-        // otherwise place into cell groups of size 1 on the cpu cores
-        else {
-            for (auto gid: kind_lists[k]) {
-                groups.push_back({k, {gid}, backend_kind::multicore});
+
+        std::vector<cell_gid_type> group_elements;
+        for (auto gid: kind_lists[k]) {
+            group_elements.push_back(gid);
+            if (group_elements.size()>=group_size) {
+                groups.push_back({k, std::move(group_elements), backend});
+                group_elements.clear();
             }
         }
+        if (!group_elements.empty()) {
+            groups.push_back({k, std::move(group_elements), backend});
+        }
     }
 
     // calculate the number of local cells
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index 8339b0c0..f0c90f77 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -105,17 +105,17 @@ private:
 
     // Apply a functional to each cell group in parallel.
     template <typename L>
-    void foreach_group(L fn) {
+    void foreach_group(L&& fn) {
         threading::parallel_for::apply(0, cell_groups_.size(),
-            [&](int i) { fn(cell_groups_[i]); });
+            [&, fn = std::forward<L>(fn)](int i) { fn(cell_groups_[i]); });
     }
 
     // Apply a functional to each cell group in parallel, supplying
     // the cell group pointer reference and index.
     template <typename L>
-    void foreach_group_index(L fn) {
+    void foreach_group_index(L&& fn) {
         threading::parallel_for::apply(0, cell_groups_.size(),
-            [&](int i) { fn(cell_groups_[i], i); });
+            [&, fn = std::forward<L>(fn)](int i) { fn(cell_groups_[i], i); });
     }
 };
 
@@ -162,7 +162,11 @@ simulation_state::simulation_state(
     // Generate the cell groups in parallel, with one task per cell group.
     cell_groups_.resize(decomp.groups.size());
     foreach_group_index(
-        [&](cell_group_ptr& group, int i) { group = cell_group_factory(rec, decomp.groups[i]); });
+        [&](cell_group_ptr& group, int i) {
+            const auto& group_info = decomp.groups[i];
+            auto factory = cell_kind_implementation(group_info.kind, group_info.backend);
+            group = factory(group_info.gids, rec);
+        });
 
     // Create event lane buffers.
     // There is one set for each epoch: current (0) and next (1).
diff --git a/arbor/spike_source_cell_group.cpp b/arbor/spike_source_cell_group.cpp
index 3859ba0c..aea7deaa 100644
--- a/arbor/spike_source_cell_group.cpp
+++ b/arbor/spike_source_cell_group.cpp
@@ -11,8 +11,8 @@
 
 namespace arb {
 
-spike_source_cell_group::spike_source_cell_group(std::vector<cell_gid_type> gids, const recipe& rec):
-    gids_(std::move(gids))
+spike_source_cell_group::spike_source_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec):
+    gids_(gids)
 {
     time_sequences_.reserve(gids_.size());
     for (auto gid: gids_) {
diff --git a/arbor/spike_source_cell_group.hpp b/arbor/spike_source_cell_group.hpp
index 7169fb08..1fcf8a88 100644
--- a/arbor/spike_source_cell_group.hpp
+++ b/arbor/spike_source_cell_group.hpp
@@ -15,7 +15,7 @@ namespace arb {
 
 class spike_source_cell_group: public cell_group {
 public:
-    spike_source_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
+    spike_source_cell_group(const std::vector<cell_gid_type>& gids, const recipe& rec);
 
     cell_kind get_cell_kind() const override;
 
diff --git a/arbor/swcio.cpp b/arbor/swcio.cpp
index 2f93bef8..ef18c386 100644
--- a/arbor/swcio.cpp
+++ b/arbor/swcio.cpp
@@ -8,16 +8,15 @@
 #include <arbor/assert.hpp>
 #include <arbor/morphology.hpp>
 #include <arbor/point.hpp>
+#include <arbor/swcio.hpp>
 
 #include "algorithms.hpp"
-#include "swcio.hpp"
+#include "util/span.hpp"
 
 namespace arb {
-namespace io {
 
 // swc_record implementation
 
-
 // helper function: return error message if inconsistent, or nullptr if ok.
 const char* swc_record_error(const swc_record& r) {
     constexpr int max_type = static_cast<int>(swc_record::kind::custom);
@@ -57,7 +56,7 @@ void swc_record::assert_consistent() const {
     }
 }
 
-bool parse_record(const std::string& line, swc_record& record) {
+static bool parse_record(const std::string& line, swc_record& record) {
     std::istringstream is(line);
     swc_record r;
 
@@ -76,7 +75,7 @@ bool parse_record(const std::string& line, swc_record& record) {
     return false;
 }
 
-bool is_comment(const std::string& line) {
+static bool is_comment(const std::string& line) {
     auto pos = line.find_first_not_of(" \f\n\r\t\v");
     return pos==std::string::npos || line[pos]=='#';
 }
@@ -151,9 +150,133 @@ std::vector<swc_record> parse_swc_file(std::istream& is) {
         throw swc_error("SWC parse error", line_number);
     }
 
-    swc_canonicalize_sequence(records);
+    swc_canonicalize(records);
     return records;
 }
 
-} // namespace io
+morphology swc_as_morphology(const std::vector<swc_record>& swc_records) {
+    morphology morph;
+
+    std::vector<swc_record::id_type> swc_parent_index;
+    for (const auto& r: swc_records) {
+        swc_parent_index.push_back(r.parent_id);
+    }
+
+    if (swc_parent_index.empty()) {
+        return morph;
+    }
+
+    // The parent of soma must be 0, while in SWC files is -1
+    swc_parent_index[0] = 0;
+    auto branch_index = algorithms::branches(swc_parent_index); // partitions [0, #records] by branch.
+    auto parent_branch_index = algorithms::tree_reduce(swc_parent_index, branch_index);
+
+    // sanity check
+    arb_assert(parent_branch_index.size() == branch_index.size() - 1);
+
+    // Add the soma first; then the segments
+    const auto& soma = swc_records[0];
+    morph.soma = { soma.x, soma.y, soma.z, soma.r };
+
+    for (auto i: util::make_span(1, parent_branch_index.size())) {
+        auto b_start = swc_records.begin() + branch_index[i];
+        auto b_end   = swc_records.begin() + branch_index[i+1];
+
+        unsigned parent_id = parent_branch_index[i];
+        std::vector<section_point> points;
+        section_kind kind = section_kind::none;
+
+        if (parent_id != 0) {
+            // include the parent of current record if not branching from soma
+            auto parent_record = swc_records[swc_parent_index[branch_index[i]]];
+
+            points.push_back(section_point{parent_record.x, parent_record.y, parent_record.z, parent_record.r});
+        }
+
+        for (auto b = b_start; b!=b_end; ++b) {
+            points.push_back(section_point{b->x, b->y, b->z, b->r});
+
+            switch (b->type) {
+            case swc_record::kind::axon:
+                kind = section_kind::axon;
+                break;
+            case swc_record::kind::dendrite:
+            case swc_record::kind::apical_dendrite:
+                kind = section_kind::dendrite;
+                break;
+            case swc_record::kind::soma:
+                kind = section_kind::soma;
+                break;
+            default: ; // stick with what we have
+            }
+        }
+
+        morph.add_section(std::move(points), parent_id, kind);
+    }
+
+    morph.assert_valid();
+    return morph;
+}
+
+void swc_canonicalize(std::vector<swc_record>& swc_records) {
+    std::unordered_set<swc_record::id_type> ids;
+
+    std::size_t         num_trees = 0;
+    swc_record::id_type last_id   = -1;
+    bool                needsort  = false;
+
+    for (const auto& r: swc_records) {
+        r.assert_consistent();
+
+        if (r.parent_id == -1 && ++num_trees > 1) {
+            // only a single tree is allowed
+            throw swc_error("multiple trees found in SWC record sequence");
+        }
+        if (ids.count(r.id)) {
+            throw swc_error("records with duplicated ids in SWC record sequence");
+        }
+
+        if (!needsort && r.id < last_id) {
+            needsort = true;
+        }
+
+        last_id = r.id;
+        ids.insert(r.id);
+    }
+
+    if (needsort) {
+        std::sort(swc_records.begin(), swc_records.end(),
+            [](const swc_record& a, const swc_record& b) { return a.id<b.id; });
+    }
+
+    // Renumber records if necessary.
+    std::map<swc_record::id_type, swc_record::id_type> idmap;
+    swc_record::id_type next_id = 0;
+    for (auto& r: swc_records) {
+        if (r.id != next_id) {
+            auto old_id = r.id;
+            r.id = next_id;
+
+            auto new_parent_id = idmap.find(r.parent_id);
+            if (new_parent_id != idmap.end()) {
+                r.parent_id = new_parent_id->second;
+            }
+
+            r.assert_consistent();
+            idmap.insert(std::make_pair(old_id, next_id));
+        }
+        ++next_id;
+    }
+
+    // Reject if branches are not contiguously numbered.
+    std::vector<swc_record::id_type> parent_list = { 0 };
+    for (std::size_t i = 1; i < swc_records.size(); ++i) {
+        parent_list.push_back(swc_records[i].parent_id);
+    }
+
+    if (!arb::algorithms::has_contiguous_compartments(parent_list)) {
+        throw swc_error("branches are not contiguously numbered", 0);
+    }
+}
+
 } // namespace arb
diff --git a/arbor/swcio.hpp b/arbor/swcio.hpp
deleted file mode 100644
index 15028118..00000000
--- a/arbor/swcio.hpp
+++ /dev/null
@@ -1,243 +0,0 @@
-#pragma once
-
-#include <exception>
-#include <iostream>
-#include <iterator>
-#include <map>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include <arbor/assert.hpp>
-#include <arbor/morphology.hpp>
-#include <arbor/point.hpp>
-
-#include "algorithms.hpp"
-
-namespace arb {
-namespace io {
-
-class swc_record {
-public:
-    using id_type = int;
-    using coord_type = double;
-
-    // More on SWC files: http://research.mssm.edu/cnic/swc.html
-    enum class kind {
-        undefined = 0,
-        soma,
-        axon,
-        dendrite,
-        apical_dendrite,
-        fork_point,
-        end_point,
-        custom
-    };
-
-    kind type = kind::undefined; // record type
-    id_type id = 0;              // record id
-    coord_type x = 0;            // record coordinates
-    coord_type y = 0;
-    coord_type z = 0;
-    coord_type r = 0;            // record radius
-    id_type parent_id= -1;      // record parent's id
-
-    // swc records assume zero-based indexing; root's parent remains -1
-    swc_record(swc_record::kind type, int id,
-               coord_type x, coord_type y, coord_type z, coord_type r,
-               int parent_id):
-        type(type), id(id), x(x), y(y), z(z), r(r), parent_id(parent_id)
-    {}
-
-    swc_record() = default;
-    swc_record(const swc_record& other) = default;
-    swc_record& operator=(const swc_record& other) = default;
-
-    bool operator==(const swc_record& other) const {
-        return id == other.id &&
-            x == other.x &&
-            y == other.y &&
-            z == other.z &&
-            r == other.r &&
-            parent_id == other.parent_id;
-    }
-
-    friend bool operator!=(const swc_record& lhs, const swc_record& rhs) {
-        return !(lhs == rhs);
-    }
-
-    friend std::ostream& operator<<(std::ostream& os, const swc_record& record);
-
-    coord_type diameter() const {
-        return 2*r;
-    }
-
-    arb::point<coord_type> coord() const {
-        return arb::point<coord_type>(x, y, z);
-    }
-
-    arb::section_point as_section_point() const {
-        return arb::section_point{x, y, z, r};
-    }
-
-    // validity checks
-    bool is_consistent() const;
-    void assert_consistent() const; // throw swc_error if inconsistent.
-};
-
-
-class swc_error: public std::runtime_error {
-public:
-    explicit swc_error(const char* msg, std::size_t lineno = 0):
-        std::runtime_error(msg), line_number(lineno)
-    {}
-
-    explicit swc_error(const std::string& msg, std::size_t lineno = 0):
-        std::runtime_error(msg), line_number(lineno)
-    {}
-
-    std::size_t line_number;
-};
-
-// Parse one record, skipping comments and blank lines.
-std::istream& operator>>(std::istream& is, swc_record& record);
-
-// Parse and canonicalize an EOF-terminated sequence of records.
-// Throw on parsing failure.
-std::vector<swc_record> parse_swc_file(std::istream& is);
-
-// Convert a canonical (see below) sequence of SWC records to a morphology object.
-template <typename RandomAccessSequence>
-morphology swc_as_morphology(const RandomAccessSequence& swc_records) {
-    morphology morph;
-
-    std::vector<swc_record::id_type> swc_parent_index;
-    for (const auto& r: swc_records) {
-        swc_parent_index.push_back(r.parent_id);
-    }
-
-    if (swc_parent_index.empty()) {
-        return morph;
-    }
-
-    // The parent of soma must be 0, while in SWC files is -1
-    swc_parent_index[0] = 0;
-    auto branch_index = algorithms::branches(swc_parent_index); // partitions [0, #records] by branch.
-    auto parent_branch_index = algorithms::tree_reduce(swc_parent_index, branch_index);
-
-    // sanity check
-    arb_assert(parent_branch_index.size() == branch_index.size() - 1);
-
-    // Add the soma first; then the segments
-    const auto& soma = swc_records[0];
-    morph.soma = { soma.x, soma.y, soma.z, soma.r };
-
-    auto n_branches = parent_branch_index.size();
-    for (std::size_t i = 1; i < n_branches; ++i) {
-        auto b_start = std::next(swc_records.begin(), branch_index[i]);
-        auto b_end   = std::next(swc_records.begin(), branch_index[i+1]);
-
-        unsigned parent_id = parent_branch_index[i];
-        std::vector<section_point> points;
-        section_kind kind = section_kind::none;
-
-        if (parent_id != 0) {
-            // include the parent of current record if not branching from soma
-            auto parent_record = swc_records[swc_parent_index[branch_index[i]]];
-
-            points.push_back(section_point{parent_record.x, parent_record.y, parent_record.z, parent_record.r});
-        }
-
-        for (auto b = b_start; b!=b_end; ++b) {
-            points.push_back(section_point{b->x, b->y, b->z, b->r});
-
-            switch (b->type) {
-            case swc_record::kind::axon:
-                kind = section_kind::axon;
-                break;
-            case swc_record::kind::dendrite:
-            case swc_record::kind::apical_dendrite:
-                kind = section_kind::dendrite;
-                break;
-            case swc_record::kind::soma:
-                kind = section_kind::soma;
-                break;
-            default: ; // stick with what we have
-            }
-        }
-
-        morph.add_section(std::move(points), parent_id, kind);
-    }
-
-    morph.assert_valid();
-    return morph;
-}
-
-// Given a random-access mutable sequence of `swc_record` describing
-// a single morphology, check for consistency and renumber records
-// so that ids are contiguous within branches, have no gaps, and
-// are ordered with repect to parent indices.
-template <typename RandomAccessSequence>
-void swc_canonicalize_sequence(RandomAccessSequence& swc_records) {
-    std::unordered_set<swc_record::id_type> ids;
-
-    std::size_t         num_trees = 0;
-    swc_record::id_type last_id   = -1;
-    bool                needsort  = false;
-
-    for (const auto& r: swc_records) {
-        r.assert_consistent();
-
-        if (r.parent_id == -1 && ++num_trees > 1) {
-            // only a single tree is allowed
-            throw swc_error("multiple trees found in SWC record sequence");
-        }
-        if (ids.count(r.id)) {
-            throw swc_error("records with duplicated ids in SWC record sequence");
-        }
-
-        if (!needsort && r.id < last_id) {
-            needsort = true;
-        }
-
-        last_id = r.id;
-        ids.insert(r.id);
-    }
-
-    if (needsort) {
-        std::sort(std::begin(swc_records), std::end(swc_records),
-            [](const swc_record& a, const swc_record& b) { return a.id<b.id; });
-    }
-
-    // Renumber records if necessary
-    std::map<swc_record::id_type, swc_record::id_type> idmap;
-    swc_record::id_type next_id = 0;
-    for (auto& r: swc_records) {
-        if (r.id != next_id) {
-            auto old_id = r.id;
-            r.id = next_id;
-
-            auto new_parent_id = idmap.find(r.parent_id);
-            if (new_parent_id != idmap.end()) {
-                r.parent_id = new_parent_id->second;
-            }
-
-            r.assert_consistent();
-            idmap.insert(std::make_pair(old_id, next_id));
-        }
-        ++next_id;
-    }
-
-    // Reject if branches are not contiguously numbered
-    std::vector<swc_record::id_type> parent_list = { 0 };
-    for (std::size_t i = 1; i < swc_records.size(); ++i) {
-        parent_list.push_back(swc_records[i].parent_id);
-    }
-
-    if (!arb::algorithms::has_contiguous_compartments(parent_list)) {
-        throw swc_error("branches are not contiguously numbered", 0);
-    }
-}
-
-} // namespace io
-} // namespace arb
diff --git a/arbor/threading/threading.cpp b/arbor/threading/threading.cpp
index 6e2ab43e..a05bf1bd 100644
--- a/arbor/threading/threading.cpp
+++ b/arbor/threading/threading.cpp
@@ -5,7 +5,7 @@
 
 #include <arbor/arbexcept.hpp>
 #include <arbor/util/optional.hpp>
-#include <hardware/affinity.hpp>
+#include <hardware/node_info.hpp>
 
 #include "threading.hpp"
 #include "util/strprintf.hpp"
@@ -58,13 +58,18 @@ util::optional<size_t> get_env_num_threads() {
     return nthreads;
 }
 
-size_t num_threads_init() {
-    auto env_threads = get_env_num_threads();
-    if (!env_threads || *env_threads==0u) {
-        auto detect_threads = hw::num_cores();
-        return detect_threads? *detect_threads: 1;
+std::size_t num_threads_init() {
+    std::size_t n = 0;
+
+    if (auto env_threads = get_env_num_threads()) {
+        n = env_threads.value();
+    }
+
+    if (!n) {
+        n = hw::node_processors();
     }
-    return *env_threads;
+
+    return n? n: 1;
 }
 
 // Returns the number of threads used by the threading back end.
diff --git a/aux/CMakeLists.txt b/aux/CMakeLists.txt
index 91de6207..d3030154 100644
--- a/aux/CMakeLists.txt
+++ b/aux/CMakeLists.txt
@@ -1,7 +1,13 @@
 set(aux-sources
-    json_meter.cpp)
+
+    glob.cpp
+    ioutil.cpp
+    json_meter.cpp
+    path.cpp
+    spike_emitter.cpp
+)
 
 add_library(arbor-aux ${aux-sources})
 target_link_libraries(arbor-aux PUBLIC ext-json arbor)
-target_include_directories(arbor-aux INTERFACE .)
+target_include_directories(arbor-aux PUBLIC include)
 set_target_properties(arbor-aux PROPERTIES OUTPUT_NAME arboraux)
diff --git a/aux/glob.cpp b/aux/glob.cpp
new file mode 100644
index 00000000..9b67a472
--- /dev/null
+++ b/aux/glob.cpp
@@ -0,0 +1,47 @@
+// POSIX headers
+extern "C" {
+#define _POSIX_C_SOURCE 2
+#include <glob.h>
+}
+
+// GLOB_TILDE and GLOB_BRACE are non-standard but convenient and common
+// flags for glob().
+
+#ifndef GLOB_TILDE
+#define GLOB_TILDE 0
+#endif
+#ifndef GLOB_BRACE
+#define GLOB_BRACE 0
+#endif
+
+#include <cerrno>
+
+#include <aux/path.hpp>
+#include <aux/scope_exit.hpp>
+
+namespace aux {
+
+std::vector<path> glob(const std::string& pattern) {
+    std::vector<path> paths;
+    glob_t matches;
+
+    int flags = GLOB_MARK | GLOB_NOCHECK | GLOB_TILDE | GLOB_BRACE;
+    auto r = ::glob(pattern.c_str(), flags, nullptr, &matches);
+    auto glob_guard = on_scope_exit([&]() { ::globfree(&matches); });
+
+    if (r==GLOB_NOSPACE) {
+        throw std::bad_alloc{};
+    }
+    else if (r==0) {
+        // success
+        paths.reserve(matches.gl_pathc);
+        for (auto pathp = matches.gl_pathv; *pathp; ++pathp) {
+            paths.push_back(*pathp);
+        }
+    }
+
+    return paths;
+}
+
+} // namespace aux
+
diff --git a/aux/include/aux/glob.hpp b/aux/include/aux/glob.hpp
new file mode 100644
index 00000000..406167ce
--- /dev/null
+++ b/aux/include/aux/glob.hpp
@@ -0,0 +1,14 @@
+#pragma once
+
+// glob (3) wrapper
+// TODO: emulate for not-entirely-POSIX platforms.
+
+
+#include <aux/path.hpp>
+
+namespace aux {
+
+std::vector<path> glob(const std::string& pattern);
+
+} // namespace aux
+
diff --git a/arbor/util/ioutil.hpp b/aux/include/aux/ioutil.hpp
similarity index 78%
rename from arbor/util/ioutil.hpp
rename to aux/include/aux/ioutil.hpp
index b45db228..2a9ba4ab 100644
--- a/arbor/util/ioutil.hpp
+++ b/aux/include/aux/ioutil.hpp
@@ -1,27 +1,22 @@
 #pragma once
 
-#include <iostream>
-
-namespace arb {
-namespace util {
-
-class iosfmt_guard {
-public:
-    explicit iosfmt_guard(std::ios& stream) :
-        save_(nullptr), stream_(stream)
-    {
-        save_.copyfmt(stream_);
-    }
+// Provides:
+//
+// * mask_stream
+//
+//   Stream manipulator that enables or disables writing to a stream based on a flag.
+//
+// * open_or_throw
+//
+//   Open an fstream, throwing on error. If the 'excl' flag is set, throw a
+//   std::runtime_error if the path exists.
 
-    ~iosfmt_guard() {
-        stream_.copyfmt(save_);
-    }
+#include <iostream>
+#include <fstream>
 
-private:
-    std::ios save_;
-    std::ios& stream_;
-};
+#include <aux/path.hpp>
 
+namespace aux {
 
 template <typename charT, typename traitsT = std::char_traits<charT> >
 class basic_null_streambuf: public std::basic_streambuf<charT, traitsT> {
@@ -92,6 +87,12 @@ private:
     bool mask_;
 };
 
-} // namespace util
-} // namespace arb
+std::fstream open_or_throw(const aux::path& p, std::ios_base::openmode, bool exclusive);
+
+inline std::fstream open_or_throw(const aux::path& p, bool exclusive) {
+    using std::ios_base;
+    return open_or_throw(p, ios_base::in|ios_base::out, exclusive);
+}
+
+} // namespace aux
 
diff --git a/aux/json_meter.hpp b/aux/include/aux/json_meter.hpp
similarity index 100%
rename from aux/json_meter.hpp
rename to aux/include/aux/json_meter.hpp
diff --git a/arbor/util/path.hpp b/aux/include/aux/path.hpp
similarity index 92%
rename from arbor/util/path.hpp
rename to aux/include/aux/path.hpp
index d11cd1b7..c5429cf5 100644
--- a/arbor/util/path.hpp
+++ b/aux/include/aux/path.hpp
@@ -24,11 +24,7 @@
 #include <utility>
 #include <vector>
 
-#include "util/meta.hpp"
-#include "util/rangeutil.hpp"
-
-namespace arb {
-namespace util {
+namespace aux {
 
 class posix_path {
 public:
@@ -51,22 +47,23 @@ public:
 
     // Construct or assign from value_type string or sequence.
 
-    template <typename Source>
-    posix_path(Source&& source) { assign(std::forward<Source>(source)); }
+    posix_path(string_type source): p_(std::move(source)) {}
+
+    posix_path(const value_type* source): p_(source) {}
 
     template <typename Iter>
     posix_path(Iter b, Iter e) { assign(b, e); }
 
     template <typename Source>
-    posix_path& operator=(const Source& source) { return assign(source); }
+    posix_path& operator=(Source&& source) { return assign(std::forward<Source>(source)); }
 
     posix_path& assign(const posix_path& other) {
         p_ = other.p_;
         return *this;
     }
 
-    posix_path& assign(const string_type& source) {
-        p_ = source;
+    posix_path& assign(string_type source) {
+        p_ = std::move(source);
         return *this;
     }
 
@@ -75,12 +72,6 @@ public:
         return *this;
     }
 
-    template <typename Seq, typename = enable_if_sequence_t<Seq>>
-    posix_path& assign(const Seq& seq) {
-        util::assign(p_, seq);
-        return *this;
-    }
-
     template <typename Iter>
     posix_path& assign(Iter b, Iter e) {
         p_.assign(b, e);
@@ -349,9 +340,6 @@ private:
 namespace posix {
     file_status status(const path&, std::error_code&);
     file_status symlink_status(const path&, std::error_code&);
-
-    // POSIX glob (3) wrapper (not part of std::filesystem!).
-    std::vector<path> glob(const std::string& pattern);
 }
 
 inline file_status status(const path& p, std::error_code& ec) {
@@ -362,15 +350,11 @@ inline file_status symlink_status(const path& p, std::error_code& ec) {
     return posix::symlink_status(p, ec);
 }
 
-inline std::vector<path> glob(const std::string& pattern) {
-    return posix::glob(pattern);
-}
-
 // Wrappers for `status()`, again following std::filesystem.
 
 inline file_status status(const path& p) {
     std::error_code ec;
-    auto r = ::arb::util::posix::status(p, ec);
+    auto r = ::aux::posix::status(p, ec);
     if (ec) {
         throw filesystem_error("status()", p, ec);
     }
@@ -425,6 +409,5 @@ inline bool exists(const path& p, std::error_code& ec) {
     return exists(status(p, ec));
 }
 
-} // namespace util
-} // namespace arb
+} // namespace aux
 
diff --git a/arbor/util/scope_exit.hpp b/aux/include/aux/scope_exit.hpp
similarity index 93%
rename from arbor/util/scope_exit.hpp
rename to aux/include/aux/scope_exit.hpp
index 5f83678a..4db228d8 100644
--- a/arbor/util/scope_exit.hpp
+++ b/aux/include/aux/scope_exit.hpp
@@ -5,8 +5,7 @@
 
 // Convenience class for RAII control of resources.
 
-namespace arb {
-namespace util {
+namespace aux {
 
 // `scope_exit` guard object will call provided functional object
 // on destruction. The provided functional object must be nothrow
@@ -48,5 +47,4 @@ scope_exit<std::decay_t<F>> on_scope_exit(F&& f) {
     return scope_exit<std::decay_t<F>>(std::forward<F>(f));
 }
 
-} // namespace util
-} // namespace arb
+} // namespace aux
diff --git a/aux/include/aux/spike_emitter.hpp b/aux/include/aux/spike_emitter.hpp
new file mode 100644
index 00000000..fdbfa16f
--- /dev/null
+++ b/aux/include/aux/spike_emitter.hpp
@@ -0,0 +1,16 @@
+#include <functional>
+#include <iosfwd>
+#include <vector>
+
+#include <arbor/spike.hpp>
+
+namespace aux {
+
+struct spike_emitter {
+    std::reference_wrapper<std::ostream> out;
+
+    spike_emitter(std::ostream& out);
+    void operator()(const std::vector<arb::spike>&);
+};
+
+} // namespace aux
diff --git a/aux/include/aux/strsub.hpp b/aux/include/aux/strsub.hpp
new file mode 100644
index 00000000..b2dca849
--- /dev/null
+++ b/aux/include/aux/strsub.hpp
@@ -0,0 +1,66 @@
+#pragma once
+
+// Substitute instances of a given character (defaults to '%') in a template C
+// string with the remaining arguments, and write the result to an ostream or
+// return the result as a string.
+//
+// The special character itself can be escaped by duplicating it, e.g.
+//
+//     strsub("%%%-%%%", 30, 70)
+//
+// returns the string
+//
+//     "%30-%70"
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <utility>
+
+namespace aux {
+
+// Stream-writing strsub(...):
+
+inline std::ostream& strsub(std::ostream& o, char c, const char* templ) {
+    return o << templ;
+}
+
+template <typename T, typename... Tail>
+std::ostream& strsub(std::ostream& o, char c, const char* templ, T value, Tail&&... tail) {
+    const char* t = templ;
+    for (;;) {
+        while (*t && !(*t==c)) ++t;
+
+        if (t>templ) o.write(templ, t-templ);
+
+        if (!*t) return o;
+
+        if (t[1]!=c) break;
+
+        o.put(c);
+        templ = t += 2;
+    }
+
+    o << std::forward<T>(value);
+    return strsub(o, c, t+1, std::forward<Tail>(tail)...);
+}
+
+template <typename... Args>
+std::ostream& strsub(std::ostream& o, const char* templ, Args&&... args) {
+    return strsub(o, '%', templ, std::forward<Args>(args)...);
+}
+
+// String-returning strsub(...) wrappers:
+
+template <typename... Args>
+std::string strsub(char c, const char* templ, Args&&... args) {
+    std::ostringstream o;
+    return strsub(o, c, templ, std::forward<Args>(args)...), o.str();
+}
+
+template <typename... Args>
+std::string strsub(const char* templ, Args&&... args) {
+    return strsub('%', templ, std::forward<Args>(args)...);
+}
+
+} // namespace aux
diff --git a/aux/tinyopt.hpp b/aux/include/aux/tinyopt.hpp
similarity index 100%
rename from aux/tinyopt.hpp
rename to aux/include/aux/tinyopt.hpp
diff --git a/aux/with_mpi.hpp b/aux/include/aux/with_mpi.hpp
similarity index 100%
rename from aux/with_mpi.hpp
rename to aux/include/aux/with_mpi.hpp
diff --git a/aux/ioutil.cpp b/aux/ioutil.cpp
new file mode 100644
index 00000000..7572208b
--- /dev/null
+++ b/aux/ioutil.cpp
@@ -0,0 +1,25 @@
+#include <fstream>
+#include <utility>
+
+#include <aux/ioutil.hpp>
+#include <aux/path.hpp>
+#include <aux/strsub.hpp>
+
+namespace aux {
+
+std::fstream open_or_throw(const path& p, std::ios_base::openmode mode, bool exclusive) {
+    if (exclusive && exists(p)) {
+        throw std::runtime_error(strsub("file % already exists", p));
+    }
+
+    std::fstream file;
+    file.open(p, mode);
+    if (!file) {
+        throw std::runtime_error(strsub("unable to open file %", p));
+    }
+
+    return std::move(file);
+}
+
+} // namespace aux
+
diff --git a/arbor/util/path.cpp b/aux/path.cpp
similarity index 66%
rename from arbor/util/path.cpp
rename to aux/path.cpp
index df571b2f..64321b23 100644
--- a/arbor/util/path.cpp
+++ b/aux/path.cpp
@@ -1,51 +1,15 @@
 // POSIX headers
 extern "C" {
-#define _POSIX_C_SOURCE 2
-#include <glob.h>
 #include <sys/stat.h>
 }
 
-// GLOB_TILDE and GLOB_BRACE are non-standard but convenient and common
-// flags for glob().
-
-#ifndef GLOB_TILDE
-#define GLOB_TILDE 0
-#endif
-#ifndef GLOB_BRACE
-#define GLOB_BRACE 0
-#endif
-
 #include <cerrno>
 
-#include <util/scope_exit.hpp>
-#include <util/path.hpp>
+#include <aux/path.hpp>
 
-namespace arb {
-namespace util {
+namespace aux {
 namespace posix {
 
-std::vector<path> glob(const std::string& pattern) {
-    std::vector<path> paths;
-    glob_t matches;
-
-    int flags = GLOB_MARK | GLOB_NOCHECK | GLOB_TILDE | GLOB_BRACE;
-    auto r = ::glob(pattern.c_str(), flags, nullptr, &matches);
-    auto glob_guard = on_scope_exit([&]() { ::globfree(&matches); });
-
-    if (r==GLOB_NOSPACE) {
-        throw std::bad_alloc{};
-    }
-    else if (r==0) {
-        // success
-        paths.reserve(matches.gl_pathc);
-        for (auto pathp = matches.gl_pathv; *pathp; ++pathp) {
-            paths.push_back(path{*pathp});
-        }
-    }
-
-    return paths;
-}
-
 namespace impl {
     file_status status(const char* p, int r, struct stat& st, std::error_code& ec) {
         if (!r) {
@@ -100,6 +64,5 @@ file_status symlink_status(const path& p, std::error_code& ec) {
 }
 
 } // namespace posix
-} // namespace util
 } // namespace arb
 
diff --git a/aux/spike_emitter.cpp b/aux/spike_emitter.cpp
new file mode 100644
index 00000000..db33121d
--- /dev/null
+++ b/aux/spike_emitter.cpp
@@ -0,0 +1,23 @@
+#include <functional>
+#include <iostream>
+
+#include <arbor/spike.hpp>
+#include <aux/spike_emitter.hpp>
+
+namespace aux {
+
+spike_emitter::spike_emitter(std::ostream& out): out(out) {}
+
+void spike_emitter::operator()(const std::vector<arb::spike>& spikes) {
+    char line[45];
+    for (auto& s: spikes) {
+        int n = std::snprintf(line, sizeof(line), "%u %.4f",  s.source.gid, s.time);
+        if (n<0) {
+            throw std::system_error(errno, std::generic_category());
+        }
+
+        out.get().write(line, n).put('\n');
+    }
+};
+
+} // namespace aux
diff --git a/example/bench/CMakeLists.txt b/example/bench/CMakeLists.txt
index 1423c792..8814dd31 100644
--- a/example/bench/CMakeLists.txt
+++ b/example/bench/CMakeLists.txt
@@ -1,6 +1,3 @@
 add_executable(bench bench.cpp recipe.cpp parameters.cpp)
 
 target_link_libraries(bench PRIVATE arbor arbor-aux ext-tclap ext-json)
-
-# TODO: resolve public headers
-target_link_libraries(bench PRIVATE arbor-private-headers)
diff --git a/example/bench/bench.cpp b/example/bench/bench.cpp
index 1ac4dcf1..b1ada307 100644
--- a/example/bench/bench.cpp
+++ b/example/bench/bench.cpp
@@ -11,31 +11,31 @@
 #include <arbor/profile/meter_manager.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/profile/profiler.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
-#include "util/ioutil.hpp"
-
-#include "json_meter.hpp"
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include <aux/with_mpi.hpp>
 
 #include "parameters.hpp"
 #include "recipe.hpp"
 
-using namespace arb;
+namespace profile = arb::profile;
 
 int main(int argc, char** argv) {
     try {
-        distributed_context context;
-        #ifdef ARB_HAVE_MPI
-        mpi::scoped_guard guard(&argc, &argv);
+        arb::distributed_context context;
+#ifdef ARB_HAVE_MPI
+        aux::with_mpi guard(&argc, &argv);
         context = mpi_context(MPI_COMM_WORLD);
-        #endif
+#endif
         const bool is_root =  context.id()==0;
 
-        std::cout << util::mask_stream(is_root);
+        std::cout << aux::mask_stream(is_root);
 
         bench_params params = read_options(argc, argv);
 
@@ -49,8 +49,8 @@ int main(int argc, char** argv) {
         meters.checkpoint("recipe-build");
 
         // Make the domain decomposition for the model
-        auto node = arb::hw::get_node_info();
-        auto decomp = arb::partition_load_balance(recipe, node, &context);
+        auto local = arb::local_allocation();
+        auto decomp = arb::partition_load_balance(recipe, local, &context);
         meters.checkpoint("domain-decomp");
 
         // Construct the model.
@@ -73,8 +73,8 @@ int main(int argc, char** argv) {
         }
 
         // output profile and diagnostic feedback
-        auto profile = profile::profiler_summary();
-        std::cout << profile << "\n";
+        auto summary = profile::profiler_summary();
+        std::cout << summary << "\n";
 
         std::cout << "there were " << sim.num_spikes() << " spikes\n";
     }
diff --git a/example/bench/recipe.cpp b/example/bench/recipe.cpp
index e70e8060..b8dd5f3f 100644
--- a/example/bench/recipe.cpp
+++ b/example/bench/recipe.cpp
@@ -6,6 +6,10 @@
 
 #include "recipe.hpp"
 
+using arb::cell_gid_type;
+using arb::cell_size_type;
+using arb::cell_kind;
+
 cell_size_type bench_recipe::num_cells() const {
     return params_.num_cells;
 }
@@ -27,8 +31,8 @@ arb::util::unique_any bench_recipe::get_cell_description(cell_gid_type gid) cons
     return std::move(cell);
 }
 
-arb::cell_kind bench_recipe::get_cell_kind(arb::cell_gid_type gid) const {
-    return arb::cell_kind::benchmark;
+cell_kind bench_recipe::get_cell_kind(cell_gid_type gid) const {
+    return cell_kind::benchmark;
 }
 
 std::vector<arb::cell_connection> bench_recipe::connections_on(cell_gid_type gid) const {
diff --git a/example/bench/recipe.hpp b/example/bench/recipe.hpp
index 4c8c8102..e95f6077 100644
--- a/example/bench/recipe.hpp
+++ b/example/bench/recipe.hpp
@@ -2,22 +2,21 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/recipe.hpp>
+#include <arbor/util/unique_any.hpp>
 
 #include "parameters.hpp"
 
-using arb::cell_kind;
-using arb::cell_gid_type;
-using arb::cell_size_type;
-
 class bench_recipe: public arb::recipe {
+private:
     bench_params params_;
+
 public:
     bench_recipe(bench_params p): params_(std::move(p)) {}
-    cell_size_type num_cells() const override;
-    arb::util::unique_any get_cell_description(cell_gid_type gid) const override;
+    arb::cell_size_type num_cells() const override;
+    arb::util::unique_any get_cell_description(arb::cell_gid_type gid) const override;
     arb::cell_kind get_cell_kind(arb::cell_gid_type gid) const override;
-    cell_size_type num_targets(cell_gid_type gid) const override;
-    cell_size_type num_sources(cell_gid_type gid) const override;
-    std::vector<arb::cell_connection> connections_on(cell_gid_type) const override;
+    arb::cell_size_type num_targets(arb::cell_gid_type gid) const override;
+    arb::cell_size_type num_sources(arb::cell_gid_type gid) const override;
+    std::vector<arb::cell_connection> connections_on(arb::cell_gid_type) const override;
 };
 
diff --git a/example/brunel/CMakeLists.txt b/example/brunel/CMakeLists.txt
index 15ed1ee7..657ec23f 100644
--- a/example/brunel/CMakeLists.txt
+++ b/example/brunel/CMakeLists.txt
@@ -3,6 +3,3 @@ add_executable(brunel-miniapp
     io.cpp)
 
 target_link_libraries(brunel-miniapp PRIVATE arbor arbor-aux ext-tclap)
-
-# TODO: resolve public headers
-target_link_libraries(brunel-miniapp PRIVATE arbor-private-headers)
diff --git a/example/brunel/brunel_miniapp.cpp b/example/brunel/brunel_miniapp.cpp
index ae716caf..fb1195f1 100644
--- a/example/brunel/brunel_miniapp.cpp
+++ b/example/brunel/brunel_miniapp.cpp
@@ -8,8 +8,10 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/event_generator.hpp>
 #include <arbor/lif_cell.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/profile/meter_manager.hpp>
 #include <arbor/profile/profiler.hpp>
 #include <arbor/recipe.hpp>
@@ -17,22 +19,20 @@
 #include <arbor/threadinfo.hpp>
 #include <arbor/version.hpp>
 
-#include "json_meter.hpp"
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include <aux/path.hpp>
+#include <aux/spike_emitter.hpp>
+#include <aux/strsub.hpp>
 #ifdef ARB_MPI_ENABLED
-#include "with_mpi.hpp"
+#include <aux/with_mpi.hpp>
 #endif
 
-#include "hardware/gpu.hpp"
-#include "hardware/node_info.hpp"
-#include "io/exporter_spike_file.hpp"
-#include "util/ioutil.hpp"
-
-#include "partitioner.hpp"
 #include "io.hpp"
 
 using namespace arb;
 
-void banner(hw::node_info, const distributed_context*);
+void banner(proc_allocation, const distributed_context*);
 
 // Samples m unique values in interval [start, end) - gid.
 // We exclude gid because we don't want self-loops.
@@ -186,9 +186,6 @@ private:
     int seed_;
 };
 
-using util::any_cast;
-using util::make_span;
-
 int main(int argc, char** argv) {
     distributed_context context;
 
@@ -199,12 +196,10 @@ int main(int argc, char** argv) {
 #endif
         arb::profile::meter_manager meters(&context);
         meters.start();
-        std::cout << util::mask_stream(context.id()==0);
+        std::cout << aux::mask_stream(context.id()==0);
         // read parameters
         io::cl_options options = io::read_options(argc, argv, context.id()==0);
-        hw::node_info nd;
-        nd.num_cpu_cores = arb::num_threads();
-        nd.num_gpus = hw::num_gpus()>0? 1: 0;
+        proc_allocation nd = local_allocation();
         banner(nd, &context);
 
         meters.checkpoint("setup");
@@ -240,37 +235,31 @@ int main(int argc, char** argv) {
 
         brunel_recipe recipe(nexc, ninh, next, in_degree_prop, w, d, rel_inh_strength, poiss_lambda, seed);
 
-        auto register_exporter = [] (const io::cl_options& options) {
-            return std::make_unique<io::exporter_spike_file>
-                       (options.file_name, options.output_path,
-                        options.file_extension, options.over_write);
-        };
+        partition_hint_map hints;
+        hints[cell_kind::lif_neuron].cpu_group_size = group_size;
+        auto decomp = partition_load_balance(recipe, nd, &context, hints);
 
-        auto decomp = decompose(recipe, group_size, &context);
         simulation sim(recipe, decomp, &context);
 
         // Initialize the spike exporting interface
-        std::unique_ptr<io::exporter_spike_file> file_exporter;
+        std::fstream spike_out;
         if (options.spike_file_output) {
-            if (options.single_file_per_rank) {
-                file_exporter = register_exporter(options);
+            using std::ios_base;
 
-                sim.set_local_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                        file_exporter->output(spikes);
-                    }
-                );
+            auto rank = context.id();
+            aux::path p = options.output_path;
+            p /= aux::strsub("%_%.%", options.file_name, rank, options.file_extension);
+
+            if (options.single_file_per_rank) {
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_local_spike_callback(aux::spike_emitter(spike_out));
             }
-            else if(context.id()==0) {
-                file_exporter = register_exporter(options);
-
-                sim.set_global_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                        file_exporter->output(spikes);
-                    }
-                );
+            else if (rank==0) {
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_global_spike_callback(aux::spike_emitter(spike_out));
             }
         }
+
         meters.checkpoint("model-init");
 
         // run simulation
@@ -293,7 +282,7 @@ int main(int argc, char** argv) {
     }
     catch (io::usage_error& e) {
         // only print usage/startup errors on master
-        std::cerr << util::mask_stream(context.id()==0);
+        std::cerr << aux::mask_stream(context.id()==0);
         std::cerr << e.what() << "\n";
         return 1;
     }
@@ -304,12 +293,12 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner(hw::node_info nd, const distributed_context* ctx) {
+void banner(proc_allocation nd, const distributed_context* ctx) {
     std::cout << "==========================================\n";
     std::cout << "  Arbor miniapp\n";
     std::cout << "  - distributed : " << ctx->size()
               << " (" << ctx->name() << ")\n";
-    std::cout << "  - threads     : " << nd.num_cpu_cores
+    std::cout << "  - threads     : " << nd.num_threads
               << " (" << arb::thread_implementation() << ")\n";
     std::cout << "  - gpus        : " << nd.num_gpus << "\n";
     std::cout << "==========================================\n";
diff --git a/example/brunel/io.cpp b/example/brunel/io.cpp
index 82624cd7..540cc559 100644
--- a/example/brunel/io.cpp
+++ b/example/brunel/io.cpp
@@ -8,6 +8,7 @@
 #include <type_traits>
 
 #include <tclap/CmdLine.h>
+
 #include <arbor/util/optional.hpp>
 
 #include "io.hpp"
@@ -32,176 +33,176 @@ namespace arb {
             return I;
         }
     }
-
-    namespace io {
-        // Override annoying parameters listed back-to-front behaviour.
-        //
-        // TCLAP argument creation _prepends_ its arguments to the internal
-        // list (_argList), where standard options --help etc. are already
-        // pre-inserted.
-        //
-        // reorder_arguments() reverses the arguments to restore ordering,
-        // and moves the standard options to the end.
-        class CustomCmdLine: public TCLAP::CmdLine {
-        public:
-            CustomCmdLine(const std::string &message, const std::string &version = "none"):
-            TCLAP::CmdLine(message, ' ', version, true)
-            {}
-
-            void reorder_arguments() {
-                _argList.reverse();
-                for (auto opt: {"help", "version", "ignore_rest"}) {
-                    auto i = std::find_if(
-                                          _argList.begin(), _argList.end(),
-                                          [&opt](TCLAP::Arg* a) { return a->getName()==opt; });
-
-                    if (i!=_argList.end()) {
-                        auto a = *i;
-                        _argList.erase(i);
-                        _argList.push_back(a);
-                    }
+}
+
+namespace io {
+    // Override annoying parameters listed back-to-front behaviour.
+    //
+    // TCLAP argument creation _prepends_ its arguments to the internal
+    // list (_argList), where standard options --help etc. are already
+    // pre-inserted.
+    //
+    // reorder_arguments() reverses the arguments to restore ordering,
+    // and moves the standard options to the end.
+    class CustomCmdLine: public TCLAP::CmdLine {
+    public:
+        CustomCmdLine(const std::string &message, const std::string &version = "none"):
+        TCLAP::CmdLine(message, ' ', version, true)
+        {}
+
+        void reorder_arguments() {
+            _argList.reverse();
+            for (auto opt: {"help", "version", "ignore_rest"}) {
+                auto i = std::find_if(
+                                      _argList.begin(), _argList.end(),
+                                      [&opt](TCLAP::Arg* a) { return a->getName()==opt; });
+
+                if (i!=_argList.end()) {
+                    auto a = *i;
+                    _argList.erase(i);
+                    _argList.push_back(a);
                 }
             }
-        };
+        }
+    };
 
-        // Update an option value from command line argument if set.
-        template <
+    // Update an option value from command line argument if set.
+    template <
         typename T,
         typename Arg,
         typename = std::enable_if_t<std::is_base_of<TCLAP::Arg, Arg>::value>
-        >
-        static void update_option(T& opt, Arg& arg) {
-            if (arg.isSet()) {
-                opt = arg.getValue();
-            }
+    >
+    static void update_option(T& opt, Arg& arg) {
+        if (arg.isSet()) {
+            opt = arg.getValue();
         }
+    }
 
-        // Read options from (optional) json file and command line arguments.
-        cl_options read_options(int argc, char** argv, bool allow_write) {
-            cl_options options;
-            std::string save_file = "";
-
-            // Parse command line arguments.
-            try {
-                cl_options defopts;
-
-                CustomCmdLine cmd("nest brunel miniapp harness", "0.1");
-
-                TCLAP::ValueArg<uint32_t> nexc_arg
-                    ("n", "n-excitatory", "total number of cells in the excitatory population",
-                     false, defopts.nexc, "integer", cmd);
-
-                TCLAP::ValueArg<uint32_t> ninh_arg
-                    ("m", "n-inhibitory", "total number of cells in the inhibitory population",
-                     false, defopts.ninh, "integer", cmd);
-
-                TCLAP::ValueArg<uint32_t> next_arg
-                    ("e", "n-external", "total number of incoming Poisson (external) connections per cell.",
-                     false, defopts.ninh, "integer", cmd);
-
-                TCLAP::ValueArg<double> syn_prop_arg
-                    ("p", "in-degree-prop", "the proportion of connections both the excitatory and inhibitory populations that each neuron receives",
-                     false, defopts.syn_per_cell_prop, "double", cmd);
-
-                TCLAP::ValueArg<float> weight_arg
-                    ("w", "weight", "the weight of all excitatory connections",
-                     false, defopts.weight, "float", cmd);
-
-                TCLAP::ValueArg<float> delay_arg
-                    ("d", "delay", "the delay of all connections",
-                     false, defopts.delay, "float", cmd);
-
-                TCLAP::ValueArg<float> rel_inh_strength_arg
-                    ("g", "rel-inh-w", "relative strength of inhibitory synapses with respect to the excitatory ones",
-                     false, defopts.rel_inh_strength, "float", cmd);
-
-                TCLAP::ValueArg<double> poiss_lambda_arg
-                    ("l", "lambda", "Expected number of spikes from a single poisson cell per ms",
-                     false, defopts.poiss_lambda, "double", cmd);
-
-                TCLAP::ValueArg<double> tfinal_arg
-                    ("t", "tfinal", "length of the simulation period [ms]",
-                     false, defopts.tfinal, "time", cmd);
-
-                TCLAP::ValueArg<double> dt_arg
-                    ("s", "delta-t", "simulation time step [ms] (this parameter is ignored)",
-                     false, defopts.dt, "time", cmd);
-
-                TCLAP::ValueArg<uint32_t> group_size_arg
-                    ("G", "group-size", "number of cells per cell group",
-                     false, defopts.group_size, "integer", cmd);
-
-                TCLAP::ValueArg<uint32_t> seed_arg
-                    ("S", "seed", "seed for poisson spike generators",
-                     false, defopts.seed, "integer", cmd);
-
-                TCLAP::SwitchArg spike_output_arg
-                    ("f","spike-file-output","save spikes to file", cmd, false);
-
-                TCLAP::SwitchArg profile_only_zero_arg
-                    ("z", "profile-only-zero", "Only output profile information for rank 0",
-                     cmd, false);
-
-                TCLAP::SwitchArg verbose_arg
-                    ("v", "verbose", "Present more verbose information to stdout", cmd, false);
-
-                cmd.reorder_arguments();
-                cmd.parse(argc, argv);
-
-                // Handle verbosity separately from other options: it is not considered part
-                // of the saved option state.
-                options.verbose = verbose_arg.getValue();
-                update_option(options.nexc, nexc_arg);
-                update_option(options.ninh, ninh_arg);
-                update_option(options.next, next_arg);
-                update_option(options.syn_per_cell_prop, syn_prop_arg);
-                update_option(options.weight, weight_arg);
-                update_option(options.delay, delay_arg);
-                update_option(options.rel_inh_strength, rel_inh_strength_arg);
-                update_option(options.poiss_lambda, poiss_lambda_arg);
-                update_option(options.tfinal, tfinal_arg);
-                update_option(options.dt, dt_arg);
-                update_option(options.group_size, group_size_arg);
-                update_option(options.seed, seed_arg);
-                update_option(options.spike_file_output, spike_output_arg);
-                update_option(options.profile_only_zero, profile_only_zero_arg);
-
-                if (options.group_size < 1) {
-                    throw usage_error("minimum of one cell per group");
-                }
-
-                if (options.rel_inh_strength <= 0 || options.rel_inh_strength > 1) {
-                    throw usage_error("relative strength of inhibitory connections must be in the interval (0, 1].");
-                }
-            }
-            catch (TCLAP::ArgException& e) {
-                throw usage_error("error parsing command line argument "+e.argId()+": "+e.error());
+    // Read options from (optional) json file and command line arguments.
+    cl_options read_options(int argc, char** argv, bool allow_write) {
+        cl_options options;
+        std::string save_file = "";
+
+        // Parse command line arguments.
+        try {
+            cl_options defopts;
+
+            CustomCmdLine cmd("nest brunel miniapp harness", "0.1");
+
+            TCLAP::ValueArg<uint32_t> nexc_arg
+                ("n", "n-excitatory", "total number of cells in the excitatory population",
+                 false, defopts.nexc, "integer", cmd);
+
+            TCLAP::ValueArg<uint32_t> ninh_arg
+                ("m", "n-inhibitory", "total number of cells in the inhibitory population",
+                 false, defopts.ninh, "integer", cmd);
+
+            TCLAP::ValueArg<uint32_t> next_arg
+                ("e", "n-external", "total number of incoming Poisson (external) connections per cell.",
+                 false, defopts.ninh, "integer", cmd);
+
+            TCLAP::ValueArg<double> syn_prop_arg
+                ("p", "in-degree-prop", "the proportion of connections both the excitatory and inhibitory populations that each neuron receives",
+                 false, defopts.syn_per_cell_prop, "double", cmd);
+
+            TCLAP::ValueArg<float> weight_arg
+                ("w", "weight", "the weight of all excitatory connections",
+                 false, defopts.weight, "float", cmd);
+
+            TCLAP::ValueArg<float> delay_arg
+                ("d", "delay", "the delay of all connections",
+                 false, defopts.delay, "float", cmd);
+
+            TCLAP::ValueArg<float> rel_inh_strength_arg
+                ("g", "rel-inh-w", "relative strength of inhibitory synapses with respect to the excitatory ones",
+                 false, defopts.rel_inh_strength, "float", cmd);
+
+            TCLAP::ValueArg<double> poiss_lambda_arg
+                ("l", "lambda", "Expected number of spikes from a single poisson cell per ms",
+                 false, defopts.poiss_lambda, "double", cmd);
+
+            TCLAP::ValueArg<double> tfinal_arg
+                ("t", "tfinal", "length of the simulation period [ms]",
+                 false, defopts.tfinal, "time", cmd);
+
+            TCLAP::ValueArg<double> dt_arg
+                ("s", "delta-t", "simulation time step [ms] (this parameter is ignored)",
+                 false, defopts.dt, "time", cmd);
+
+            TCLAP::ValueArg<uint32_t> group_size_arg
+                ("G", "group-size", "number of cells per cell group",
+                 false, defopts.group_size, "integer", cmd);
+
+            TCLAP::ValueArg<uint32_t> seed_arg
+                ("S", "seed", "seed for poisson spike generators",
+                 false, defopts.seed, "integer", cmd);
+
+            TCLAP::SwitchArg spike_output_arg
+                ("f","spike-file-output","save spikes to file", cmd, false);
+
+            TCLAP::SwitchArg profile_only_zero_arg
+                ("z", "profile-only-zero", "Only output profile information for rank 0",
+                 cmd, false);
+
+            TCLAP::SwitchArg verbose_arg
+                ("v", "verbose", "Present more verbose information to stdout", cmd, false);
+
+            cmd.reorder_arguments();
+            cmd.parse(argc, argv);
+
+            // Handle verbosity separately from other options: it is not considered part
+            // of the saved option state.
+            options.verbose = verbose_arg.getValue();
+            update_option(options.nexc, nexc_arg);
+            update_option(options.ninh, ninh_arg);
+            update_option(options.next, next_arg);
+            update_option(options.syn_per_cell_prop, syn_prop_arg);
+            update_option(options.weight, weight_arg);
+            update_option(options.delay, delay_arg);
+            update_option(options.rel_inh_strength, rel_inh_strength_arg);
+            update_option(options.poiss_lambda, poiss_lambda_arg);
+            update_option(options.tfinal, tfinal_arg);
+            update_option(options.dt, dt_arg);
+            update_option(options.group_size, group_size_arg);
+            update_option(options.seed, seed_arg);
+            update_option(options.spike_file_output, spike_output_arg);
+            update_option(options.profile_only_zero, profile_only_zero_arg);
+
+            if (options.group_size < 1) {
+                throw usage_error("minimum of one cell per group");
             }
 
-            // If verbose output requested, emit option summary.
-            if (options.verbose) {
-                std::cout << options << "\n";
+            if (options.rel_inh_strength <= 0 || options.rel_inh_strength > 1) {
+                throw usage_error("relative strength of inhibitory connections must be in the interval (0, 1].");
             }
-
-            return options;
+        }
+        catch (TCLAP::ArgException& e) {
+            throw usage_error("error parsing command line argument "+e.argId()+": "+e.error());
         }
 
-        std::ostream& operator<<(std::ostream& o, const cl_options& options) {
-            o << "simulation options:\n";
-            o << "  excitatory cells                                           : " << options.nexc << "\n";
-            o << "  inhibitory cells                                           : " << options.ninh << "\n";
-            o << "  Poisson connections per cell                               : " << options.next << "\n";
-            o << "  proportion of synapses/cell from each population           : " << options.syn_per_cell_prop << "\n";
-            o << "  weight of excitatory synapses                              : " << options.weight << "\n";
-            o << "  relative strength of inhibitory synapses                   : " << options.rel_inh_strength << "\n";
-            o << "  delay of all synapses                                      : " << options.delay << "\n";
-            o << "  expected number of spikes from a single poisson cell per ms: " << options.poiss_lambda << "\n";
-            o << "\n";
-            o << "  simulation time                                            : " << options.tfinal << "\n";
-            o << "  dt                                                         : " << options.dt << "\n";
-            o << "  group size                                                 : " << options.group_size << "\n";
-            o << "  seed                                                       : " << options.seed << "\n";
-            return o;
+        // If verbose output requested, emit option summary.
+        if (options.verbose) {
+            std::cout << options << "\n";
         }
-    } // namespace io
-} // namespace arbor
+
+        return options;
+    }
+
+    std::ostream& operator<<(std::ostream& o, const cl_options& options) {
+        o << "simulation options:\n";
+        o << "  excitatory cells                                           : " << options.nexc << "\n";
+        o << "  inhibitory cells                                           : " << options.ninh << "\n";
+        o << "  Poisson connections per cell                               : " << options.next << "\n";
+        o << "  proportion of synapses/cell from each population           : " << options.syn_per_cell_prop << "\n";
+        o << "  weight of excitatory synapses                              : " << options.weight << "\n";
+        o << "  relative strength of inhibitory synapses                   : " << options.rel_inh_strength << "\n";
+        o << "  delay of all synapses                                      : " << options.delay << "\n";
+        o << "  expected number of spikes from a single poisson cell per ms: " << options.poiss_lambda << "\n";
+        o << "\n";
+        o << "  simulation time                                            : " << options.tfinal << "\n";
+        o << "  dt                                                         : " << options.dt << "\n";
+        o << "  group size                                                 : " << options.group_size << "\n";
+        o << "  seed                                                       : " << options.seed << "\n";
+        return o;
+    }
+} // namespace io
diff --git a/example/brunel/io.hpp b/example/brunel/io.hpp
index deeedb5f..370e02ac 100644
--- a/example/brunel/io.hpp
+++ b/example/brunel/io.hpp
@@ -8,7 +8,6 @@
 #include <arbor/common_types.hpp>
 #include <arbor/util/optional.hpp>
 
-namespace arb {
 namespace io {
     // Holds the options for a simulation run.
     // Default constructor gives default options.
@@ -60,4 +59,3 @@ namespace io {
 
     cl_options read_options(int argc, char** argv, bool allow_write = true);
 } // namespace io
-} // namespace arbor
diff --git a/example/brunel/partitioner.hpp b/example/brunel/partitioner.hpp
deleted file mode 100644
index cd4d383d..00000000
--- a/example/brunel/partitioner.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include <arbor/distributed_context.hpp>
-#include <arbor/domain_decomposition.hpp>
-#include <arbor/recipe.hpp>
-
-#include "hardware/node_info.hpp"
-#include "util/partition.hpp"
-#include "util/span.hpp"
-#include "util/transform.hpp"
-
-namespace arb {
-    static
-    domain_decomposition decompose(const recipe& rec, const unsigned group_size, const distributed_context* ctx) {
-        struct partition_gid_domain {
-            partition_gid_domain(std::vector<cell_gid_type> divs):
-                gid_divisions(std::move(divs))
-            {}
-
-            int operator()(cell_gid_type gid) const {
-                auto gid_part = util::partition_view(gid_divisions);
-                return gid_part.index(gid);
-            }
-
-            const std::vector<cell_gid_type> gid_divisions;
-        };
-
-        cell_size_type num_global_cells = rec.num_cells();
-        unsigned num_domains = ctx->size();
-        int domain_id = ctx->id();
-
-        auto dom_size = [&](unsigned dom) -> cell_gid_type {
-            const cell_gid_type B = num_global_cells/num_domains;
-            const cell_gid_type R = num_global_cells - num_domains*B;
-            return B + (dom<R);
-        };
-
-        // Global load balance
-        std::vector<cell_gid_type> gid_divisions;
-        auto gid_part = make_partition(
-            gid_divisions, util::transform_view(util::make_span(num_domains), dom_size));
-
-        auto range = gid_part[domain_id];
-        cell_size_type num_local_cells = range.second - range.first;
-
-        unsigned num_groups = num_local_cells / group_size + (num_local_cells%group_size== 0 ? 0 : 1);
-        std::vector<group_description> groups;
-
-        // Local load balance
-        // i.e. all the groups that the current rank (domain) owns
-        for (unsigned i = 0; i < num_groups; ++i) {
-            unsigned start = i * group_size;
-            unsigned end = std::min(start + group_size, num_local_cells);
-            std::vector<cell_gid_type> group_elements;
-
-            for (unsigned j = start; j < end; ++j) {
-                group_elements.push_back(j);
-            }
-
-            groups.push_back({cell_kind::lif_neuron, std::move(group_elements), backend_kind::multicore});
-        }
-
-        domain_decomposition d;
-        d.num_domains = num_domains;
-        d.domain_id = domain_id;
-        d.num_local_cells = num_local_cells;
-        d.num_global_cells = num_global_cells;
-        d.groups = std::move(groups);
-        d.gid_domain = partition_gid_domain(std::move(gid_divisions));
-
-        return d;
-    }
-}
diff --git a/example/generators/CMakeLists.txt b/example/generators/CMakeLists.txt
index 7235b104..bc2a70b5 100644
--- a/example/generators/CMakeLists.txt
+++ b/example/generators/CMakeLists.txt
@@ -1,6 +1,3 @@
 add_executable(event-gen event_gen.cpp)
 
 target_link_libraries(event-gen PRIVATE arbor arbor-aux ext-json)
-
-# TODO: resolve public headers
-target_link_libraries(event-gen PRIVATE arbor-private-headers)
diff --git a/example/generators/event_gen.cpp b/example/generators/event_gen.cpp
index 22607137..f3c577c3 100644
--- a/example/generators/event_gen.cpp
+++ b/example/generators/event_gen.cpp
@@ -14,15 +14,14 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/event_generator.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
-
 using arb::cell_gid_type;
 using arb::cell_lid_type;
 using arb::cell_size_type;
@@ -135,7 +134,7 @@ int main() {
     generator_recipe recipe;
 
     // Make the domain decomposition for the model
-    auto node = arb::hw::get_node_info();
+    auto node = arb::local_allocation();
     auto decomp = arb::partition_load_balance(recipe, node, &context);
 
     // Construct the model.
diff --git a/example/miniapp/CMakeLists.txt b/example/miniapp/CMakeLists.txt
index f2d8e9bd..84480623 100644
--- a/example/miniapp/CMakeLists.txt
+++ b/example/miniapp/CMakeLists.txt
@@ -7,6 +7,3 @@ add_executable(miniapp
 )
 
 target_link_libraries(miniapp PRIVATE arbor arbor-aux ext-tclap ext-json)
-
-# TODO: resolve public headers
-target_link_libraries(miniapp PRIVATE arbor-private-headers)
diff --git a/example/miniapp/io.hpp b/example/miniapp/io.hpp
index f9e89f22..8d300931 100644
--- a/example/miniapp/io.hpp
+++ b/example/miniapp/io.hpp
@@ -9,8 +9,7 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/util/optional.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 // TODO: this shouldn't be in arb namespace
 namespace arb {
diff --git a/example/miniapp/miniapp.cpp b/example/miniapp/miniapp.cpp
index c1095b08..e384a015 100644
--- a/example/miniapp/miniapp.cpp
+++ b/example/miniapp/miniapp.cpp
@@ -7,6 +7,7 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/profile/meter_manager.hpp>
 #include <arbor/profile/profiler.hpp>
@@ -17,15 +18,14 @@
 #include <arbor/util/any.hpp>
 #include <arbor/version.hpp>
 
-#include "hardware/gpu.hpp"
-#include "hardware/node_info.hpp"
-#include "io/exporter_spike_file.hpp"
-#include "load_balance.hpp"
-#include "util/ioutil.hpp"
 
-#include "json_meter.hpp"
+#include <aux/ioutil.hpp>
+#include <aux/json_meter.hpp>
+#include <aux/path.hpp>
+#include <aux/spike_emitter.hpp>
+#include <aux/strsub.hpp>
 #ifdef ARB_MPI_ENABLED
-#include "with_mpi.hpp"
+#include <aux/with_mpi.hpp>
 #endif
 
 #include "io.hpp"
@@ -36,10 +36,10 @@ using namespace arb;
 
 using util::any_cast;
 
-void banner(hw::node_info, const distributed_context*);
+void banner(proc_allocation, const distributed_context*);
 std::unique_ptr<recipe> make_recipe(const io::cl_options&, const probe_distribution&);
 sample_trace make_trace(const probe_info& probe);
-
+std::fstream& open_or_throw(std::fstream& file, const aux::path& p, bool exclusive = false);
 void report_compartment_stats(const recipe&);
 
 int main(int argc, char** argv) {
@@ -55,7 +55,7 @@ int main(int argc, char** argv) {
         profile::meter_manager meters(&context);
         meters.start();
 
-        std::cout << util::mask_stream(context.id()==0);
+        std::cout << aux::mask_stream(context.id()==0);
         // read parameters
         io::cl_options options = io::read_options(argc, argv, context.id()==0);
 
@@ -63,9 +63,8 @@ int main(int argc, char** argv) {
 
         // Use a node description that uses the number of threads used by the
         // threading back end, and 1 gpu if available.
-        hw::node_info nd;
-        nd.num_cpu_cores = arb::num_threads();
-        nd.num_gpus = hw::num_gpus()>0? 1: 0;
+        proc_allocation nd = local_allocation();
+        nd.num_gpus = nd.num_gpus>=1? 1: 0;
         banner(nd, &context);
 
         meters.checkpoint("setup");
@@ -80,13 +79,6 @@ int main(int argc, char** argv) {
             report_compartment_stats(*recipe);
         }
 
-        auto register_exporter = [] (const io::cl_options& options) {
-            return
-                std::make_unique<io::exporter_spike_file>(
-                    options.file_name, options.output_path,
-                    options.file_extension, options.over_write);
-        };
-
         auto decomp = partition_load_balance(*recipe, nd, &context);
         simulation sim(*recipe, decomp, &context);
 
@@ -121,21 +113,21 @@ int main(int argc, char** argv) {
         sim.set_binning_policy(binning_policy, options.bin_dt);
 
         // Initialize the spike exporting interface
-        std::unique_ptr<io::exporter_spike_file> file_exporter;
+        std::fstream spike_out;
         if (options.spike_file_output) {
+            using std::ios_base;
+
+            auto rank = context.id();
+            aux::path p = options.output_path;
+            p /= aux::strsub("%_%.%", options.file_name, rank, options.file_extension);
+
             if (options.single_file_per_rank) {
-                file_exporter = register_exporter(options);
-                sim.set_local_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                        file_exporter->output(spikes);
-                    });
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_local_spike_callback(aux::spike_emitter(spike_out));
             }
-            else if(context.id()==0) {
-                file_exporter = register_exporter(options);
-                sim.set_global_spike_callback(
-                    [&](const std::vector<spike>& spikes) {
-                       file_exporter->output(spikes);
-                    });
+            else if (rank==0) {
+                spike_out = aux::open_or_throw(p, ios_base::out, !options.over_write);
+                sim.set_global_spike_callback(aux::spike_emitter(spike_out));
             }
         }
 
@@ -168,7 +160,7 @@ int main(int argc, char** argv) {
     }
     catch (io::usage_error& e) {
         // only print usage/startup errors on master
-        std::cerr << util::mask_stream(context.id()==0);
+        std::cerr << aux::mask_stream(context.id()==0);
         std::cerr << e.what() << "\n";
         return 1;
     }
@@ -179,12 +171,12 @@ int main(int argc, char** argv) {
     return 0;
 }
 
-void banner(hw::node_info nd, const distributed_context* ctx) {
+void banner(proc_allocation nd, const distributed_context* ctx) {
     std::cout << "==========================================\n";
     std::cout << "  Arbor miniapp\n";
     std::cout << "  - distributed : " << ctx->size()
               << " (" << ctx->name() << ")\n";
-    std::cout << "  - threads     : " << nd.num_cpu_cores
+    std::cout << "  - threads     : " << nd.num_threads
               << " (" << arb::thread_implementation() << ")\n";
     std::cout << "  - gpus        : " << nd.num_gpus << "\n";
     std::cout << "==========================================\n";
@@ -258,3 +250,4 @@ void report_compartment_stats(const recipe& rec) {
 
     std::cout << "compartments/cell: min=" << ncomp_min <<"; max=" << ncomp_max << "; mean=" << (double)ncomp_total/ncell << "\n";
 }
+
diff --git a/example/miniapp/morphology_pool.cpp b/example/miniapp/morphology_pool.cpp
index 4d3937e4..8515f20e 100644
--- a/example/miniapp/morphology_pool.cpp
+++ b/example/miniapp/morphology_pool.cpp
@@ -3,9 +3,10 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
+#include <arbor/swcio.hpp>
 
-#include "swcio.hpp"
-#include "util/path.hpp"
+#include <aux/glob.hpp>
+#include <aux/path.hpp>
 
 #include "morphology_pool.hpp"
 
@@ -30,22 +31,22 @@ static morphology make_basic_y_morphology() {
 
 morphology_pool default_morphology_pool(make_basic_y_morphology());
 
-void load_swc_morphology(morphology_pool& pool, const util::path& swc_path) {
+void load_swc_morphology(morphology_pool& pool, const aux::path& swc_path) {
     std::ifstream fi;
     fi.exceptions(std::ifstream::failbit);
 
     fi.open(swc_path.c_str());
-    pool.insert(io::swc_as_morphology(io::parse_swc_file(fi)));
+    pool.insert(swc_as_morphology(parse_swc_file(fi)));
 }
 
 void load_swc_morphology_glob(morphology_pool& pool, const std::string& swc_pattern) {
     std::ifstream fi;
     fi.exceptions(std::ifstream::failbit);
 
-    auto swc_paths = util::glob(swc_pattern);
+    auto swc_paths = aux::glob(swc_pattern);
     for (const auto& p: swc_paths) {
         fi.open(p.c_str());
-        pool.insert(io::swc_as_morphology(io::parse_swc_file(fi)));
+        pool.insert(swc_as_morphology(parse_swc_file(fi)));
         pool[pool.size()-1].assert_valid();
         fi.close();
     }
diff --git a/example/miniapp/morphology_pool.hpp b/example/miniapp/morphology_pool.hpp
index a7b46230..31567c0b 100644
--- a/example/miniapp/morphology_pool.hpp
+++ b/example/miniapp/morphology_pool.hpp
@@ -9,8 +9,7 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 namespace arb {
 
@@ -35,7 +34,7 @@ public:
 
 extern morphology_pool default_morphology_pool;
 
-void load_swc_morphology(morphology_pool& pool, const util::path& swc_path);
+void load_swc_morphology(morphology_pool& pool, const aux::path& swc_path);
 void load_swc_morphology_glob(morphology_pool& pool, const std::string& pattern);
 
 } // namespace arb
diff --git a/example/miniapp/trace.cpp b/example/miniapp/trace.cpp
index 5fc87898..39db7138 100644
--- a/example/miniapp/trace.cpp
+++ b/example/miniapp/trace.cpp
@@ -1,12 +1,11 @@
 #include <fstream>
+#include <iomanip>
 #include <string>
 
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
 
-#include "util/strprintf.hpp"
-
 #include "trace.hpp"
 
 using namespace arb;
@@ -23,8 +22,10 @@ void write_trace_csv(const sample_trace& trace, const std::string& prefix) {
     file << "# probe: " << trace.probe_id.index << "\n";
     file << "time_ms, " << trace.name << "_" << trace.units << "\n";
 
+    file.precision(15);
+    file << std::fixed;
     for (const auto& sample: trace.samples) {
-        file << util::strprintf("% 20.15f, % 20.15f\n", sample.t, sample.v);
+        file << std::setw(20) << sample.t << ',' << std::setw(20) << sample.v << '\n';
     }
 }
 
diff --git a/include/arbor/domain_decomposition.hpp b/include/arbor/domain_decomposition.hpp
index aecf5a83..61f1710b 100644
--- a/include/arbor/domain_decomposition.hpp
+++ b/include/arbor/domain_decomposition.hpp
@@ -9,14 +9,16 @@
 
 namespace arb {
 
-inline bool has_gpu_backend(cell_kind k) {
-    if (k==cell_kind::cable1d_neuron) {
-        return true;
-    }
-    return false;
-}
+/// Local resource info for domain partitioning.
+struct proc_allocation {
+    unsigned num_threads = 1;
+    unsigned num_gpus = 0;
+};
+
+/// Determine available local domain resources.
+proc_allocation local_allocation();
 
-/// Meta data for a local cell group.
+/// Metadata for a local cell group.
 struct group_description {
     /// The kind of cell in the group. All cells in a cell_group have the same type.
     const cell_kind kind;
diff --git a/include/arbor/event_generator.hpp b/include/arbor/event_generator.hpp
index e18290d7..02ad77b8 100644
--- a/include/arbor/event_generator.hpp
+++ b/include/arbor/event_generator.hpp
@@ -1,9 +1,11 @@
 #pragma once
 
+#include <algorithm>
 #include <cstdint>
 #include <memory>
 #include <random>
 
+#include <arbor/assert.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/generic_event.hpp>
 #include <arbor/spike_event.hpp>
diff --git a/include/arbor/load_balance.hpp b/include/arbor/load_balance.hpp
new file mode 100644
index 00000000..8235da03
--- /dev/null
+++ b/include/arbor/load_balance.hpp
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/recipe.hpp>
+
+namespace arb {
+
+struct partition_hint {
+    constexpr static std::size_t max_size = -1;
+
+    std::size_t cpu_group_size = 1;
+    std::size_t gpu_group_size = max_size;
+    bool prefer_gpu = true;
+};
+
+using partition_hint_map = std::unordered_map<cell_kind, partition_hint>;
+
+domain_decomposition partition_load_balance(
+    const recipe& rec,
+    proc_allocation nd,
+    const distributed_context* ctx,
+    partition_hint_map hint_map = {});
+
+} // namespace arb
diff --git a/arbor/math.hpp b/include/arbor/math.hpp
similarity index 95%
rename from arbor/math.hpp
rename to include/arbor/math.hpp
index 6f538cf0..d0b24600 100644
--- a/arbor/math.hpp
+++ b/include/arbor/math.hpp
@@ -114,18 +114,6 @@ C round_up(T v, U b) {
     return v-m+signum(m)*impl::abs_if_signed(b, Signed{});
 }
 
-// Return minimum of the two values
-template <typename T>
-T min(const T& lhs, const T& rhs) {
-    return lhs<rhs? lhs: rhs;
-}
-
-// Return maximum of the two values
-template <typename T>
-T max(const T& lhs, const T& rhs) {
-    return lhs<rhs? rhs: lhs;
-}
-
 // Value of x/(exp(x)-1) with care taken to handle x=0 case
 template <typename T>
 inline
diff --git a/include/arbor/swcio.hpp b/include/arbor/swcio.hpp
new file mode 100644
index 00000000..50003bd1
--- /dev/null
+++ b/include/arbor/swcio.hpp
@@ -0,0 +1,110 @@
+#pragma once
+
+#include <exception>
+#include <iostream>
+#include <iterator>
+#include <map>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include <arbor/assert.hpp>
+#include <arbor/arbexcept.hpp>
+#include <arbor/morphology.hpp>
+#include <arbor/point.hpp>
+
+namespace arb {
+
+struct swc_error: public arbor_exception {
+    explicit swc_error(const std::string& msg, unsigned line_number = 0):
+        arbor_exception(msg), line_number(line_number)
+    {}
+    unsigned line_number;
+};
+
+class swc_record {
+public:
+    using id_type = int;
+    using coord_type = double;
+
+    // More on SWC files: http://research.mssm.edu/cnic/swc.html
+    enum class kind {
+        undefined = 0,
+        soma,
+        axon,
+        dendrite,
+        apical_dendrite,
+        fork_point,
+        end_point,
+        custom
+    };
+
+    kind type = kind::undefined; // record type
+    id_type id = 0;              // record id
+    coord_type x = 0;            // record coordinates
+    coord_type y = 0;
+    coord_type z = 0;
+    coord_type r = 0;            // record radius
+    id_type parent_id= -1;      // record parent's id
+
+    // swc records assume zero-based indexing; root's parent remains -1
+    swc_record(swc_record::kind type, int id,
+               coord_type x, coord_type y, coord_type z, coord_type r,
+               int parent_id):
+        type(type), id(id), x(x), y(y), z(z), r(r), parent_id(parent_id)
+    {}
+
+    swc_record() = default;
+    swc_record(const swc_record& other) = default;
+    swc_record& operator=(const swc_record& other) = default;
+
+    bool operator==(const swc_record& other) const {
+        return id == other.id &&
+            x == other.x &&
+            y == other.y &&
+            z == other.z &&
+            r == other.r &&
+            parent_id == other.parent_id;
+    }
+
+    friend bool operator!=(const swc_record& lhs, const swc_record& rhs) {
+        return !(lhs == rhs);
+    }
+
+    friend std::ostream& operator<<(std::ostream& os, const swc_record& record);
+
+    coord_type diameter() const {
+        return 2*r;
+    }
+
+    arb::point<coord_type> coord() const {
+        return arb::point<coord_type>(x, y, z);
+    }
+
+    arb::section_point as_section_point() const {
+        return arb::section_point{x, y, z, r};
+    }
+
+    // validity checks
+    bool is_consistent() const;
+    void assert_consistent() const; // throw swc_error if inconsistent.
+};
+
+
+// Parse one record, skipping comments and blank lines.
+std::istream& operator>>(std::istream& is, swc_record& record);
+
+// Parse and canonicalize an EOF-terminated sequence of records.
+// Throw on parsing failure.
+std::vector<swc_record> parse_swc_file(std::istream& is);
+
+// Convert a canonical (see below) vector of SWC records to a morphology object.
+morphology swc_as_morphology(const std::vector<swc_record>& swc_records);
+
+// Given a vector of random-access mutable sequence of `swc_record` describing
+// a single morphology, check for consistency and renumber records
+// so that ids are contiguous within branches, have no gaps, and
+// are ordered with repect to parent indices.
+void swc_canonicalize(std::vector<swc_record>& swc_records);
+
+} // namespace arb
diff --git a/include/arbor/time_sequence.hpp b/include/arbor/time_sequence.hpp
index dff057c5..9d2d0029 100644
--- a/include/arbor/time_sequence.hpp
+++ b/include/arbor/time_sequence.hpp
@@ -7,11 +7,8 @@
 
 #include <arbor/common_types.hpp>
 
-#include "util/rangeutil.hpp"
-
 namespace arb {
 
-
 struct empty_time_seq {
     time_type front() { return terminal_time; }
     void pop() {}
diff --git a/lmorpho/lmorpho.cpp b/lmorpho/lmorpho.cpp
index b4f5492b..892a732c 100644
--- a/lmorpho/lmorpho.cpp
+++ b/lmorpho/lmorpho.cpp
@@ -5,9 +5,9 @@
 #include <sstream>
 #include <vector>
 
-#include <tinyopt.hpp>
 #include <arbor/morphology.hpp>
 #include <arbor/util/optional.hpp>
+#include <aux/tinyopt.hpp>
 
 #include "morphio.hpp"
 #include "lsystem.hpp"
diff --git a/lmorpho/lsystem.cpp b/lmorpho/lsystem.cpp
index e2ceee43..ad3cf88b 100644
--- a/lmorpho/lsystem.cpp
+++ b/lmorpho/lsystem.cpp
@@ -5,7 +5,7 @@
 #include <vector>
 
 #include <arbor/morphology.hpp>
-#include "math.hpp"
+#include <arbor/math.hpp>
 
 #include "lsystem.hpp"
 
diff --git a/lmorpho/morphio.cpp b/lmorpho/morphio.cpp
index ee319dda..e556f172 100644
--- a/lmorpho/morphio.cpp
+++ b/lmorpho/morphio.cpp
@@ -1,18 +1,18 @@
 #include <fstream>
+#include <iomanip>
 #include <iterator>
 #include <map>
+#include <ostream>
+#include <sstream>
 #include <string>
 #include <vector>
 
 #include <arbor/morphology.hpp>
-
-#include "swcio.hpp"
-#include "util/strprintf.hpp"
+#include <arbor/swcio.hpp>
 
 #include "morphio.hpp"
 
-using arb::io::swc_record;
-using arb::util::strprintf;
+using arb::swc_record;
 
 std::vector<swc_record> as_swc(const arb::morphology& morph);
 
@@ -25,21 +25,10 @@ multi_file::multi_file(const std::string& pattern, int digits) {
     use_stdout_ = pattern.empty() || pattern=="-";
 
     if (!concat_) {
-        std::string nfmt = digits? "%0"+std::to_string(digits)+"d": "%d";
-        std::string::size_type i = 0;
-        for (;;) {
-            auto p = pattern.find("%", i);
-
-            if (p==npos) {
-                fmt_ += pattern.substr(i);
-                break;
-            }
-            else {
-                fmt_ += pattern.substr(i, p-i);
-                fmt_ += i==0? nfmt: "%%";
-                i = p+1;
-            }
-        }
+        auto p = pattern.find("%");
+        fmt_prefix_ = pattern.substr(0, p);
+        fmt_suffix_ = pattern.substr(p+1);
+        fmt_digits_ = digits;
     }
     else {
         filename_ = pattern;
@@ -53,15 +42,28 @@ void multi_file::open(unsigned n) {
 
     if (file_.is_open()) file_.close();
 
-    std::string fname = concat_? filename_: strprintf(fmt_, n);
+    std::string fname;
+    if (concat_) {
+        fname = filename_;
+    }
+    else {
+        std::stringstream ss;
+        ss << fmt_prefix_ << std::setfill('0') << std::setw(fmt_digits_) << n << fmt_suffix_;
+        fname = ss.str();
+    }
+
     file_.open(fname);
 
     current_n_ = n;
 }
 
-// SWC transform
+static std::string short_cable_message(int id, unsigned sz) {
+    std::stringstream ss;
+    ss << "surprisingly short cable: id=" << id << ", size=" << sz;
+    return ss.str();
+}
 
-using arb::io::swc_record;
+// SWC transform
 
 // TODO: Move this functionality to arbor library.
 std::vector<swc_record> as_swc(const arb::morphology& morph) {
@@ -82,7 +84,7 @@ std::vector<swc_record> as_swc(const arb::morphology& morph) {
         const auto& points = sec.points;
         auto n = points.size();
         if (n<2) {
-            throw std::runtime_error(strprintf("surprisingly short cable: id=%d, size=%ul", sec.id, n));
+            throw std::runtime_error(short_cable_message(sec.id, n));
         }
 
         // Include first point only for dendrites segments attached to soma.
@@ -140,7 +142,7 @@ std::vector<int> as_pvector(const arb::morphology& morph, unsigned offset) {
 
         auto n = sec.points.size();
         if (n<2) {
-            throw std::runtime_error(strprintf("surprisingly short cable: id=%d, size=%ul", sec.id, n));
+            throw std::runtime_error(short_cable_message(sec.id, n));
         }
 
         for (unsigned i = 1; i<n; ++i) {
diff --git a/lmorpho/morphio.hpp b/lmorpho/morphio.hpp
index 3d31dab7..23a2854b 100644
--- a/lmorpho/morphio.hpp
+++ b/lmorpho/morphio.hpp
@@ -14,8 +14,12 @@ private:
     std::ofstream file_;
     bool concat_ = false;
     bool use_stdout_ = false;
-    std::string fmt_;       // use if not concat_
-    std::string filename_;  // use if concat_
+    // use if not concat_:
+    std::string fmt_prefix_;
+    std::string fmt_suffix_;
+    int fmt_digits_ = 0;
+    // use if concat_:
+    std::string filename_;   // use if concat_
     unsigned current_n_ = 0;
 
 public:
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index 6a0cc330..0c7a6376 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -131,11 +131,12 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     io::pfxstringstream out;
 
     out <<
+        "#include <algorithm>\n"
         "#include <cmath>\n"
         "#include <cstddef>\n"
         "#include <memory>\n"
         "#include <" << arb_private_header_prefix() << "backends/multicore/mechanism.hpp>\n"
-        "#include <" << arb_private_header_prefix() << "math.hpp>\n";
+        "#include <" << arb_header_prefix() << "math.hpp>\n";
 
     opt.profile &&
         out << "#include <" << arb_header_prefix() << "profile/profiler.hpp>\n";
@@ -152,13 +153,13 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         "using value_type = base::value_type;\n"
         "using size_type = base::size_type;\n"
         "using index_type = base::index_type;\n"
+        "using ::arb::math::exprelr;\n"
         "using ::std::abs;\n"
         "using ::std::cos;\n"
         "using ::std::exp;\n"
-        "using ::arb::math::exprelr;\n"
         "using ::std::log;\n"
-        "using ::arb::math::max;\n"
-        "using ::arb::math::min;\n"
+        "using ::std::max;\n"
+        "using ::std::min;\n"
         "using ::std::pow;\n"
         "using ::std::sin;\n"
         "\n";
diff --git a/test/ubench/accumulate_functor_values.cpp b/test/ubench/accumulate_functor_values.cpp
index fdf6653b..37586820 100644
--- a/test/ubench/accumulate_functor_values.cpp
+++ b/test/ubench/accumulate_functor_values.cpp
@@ -10,8 +10,8 @@
 
 #include <benchmark/benchmark.h>
 
-#include <util/span.hpp>
-#include <util/transform.hpp>
+#include "util/span.hpp"
+#include "util/transform.hpp"
 
 #define NOINLINE __attribute__((noinline))
 
diff --git a/test/ubench/default_construct.cpp b/test/ubench/default_construct.cpp
index dc0614e2..8dbb770a 100644
--- a/test/ubench/default_construct.cpp
+++ b/test/ubench/default_construct.cpp
@@ -8,7 +8,7 @@
 
 #include <benchmark/benchmark.h>
 
-#include <util/span.hpp>
+#include "util/span.hpp"
 
 using arb::util::make_span;
 
diff --git a/test/ubench/event_binning.cpp b/test/ubench/event_binning.cpp
index 1c33cfa7..85e78db5 100644
--- a/test/ubench/event_binning.cpp
+++ b/test/ubench/event_binning.cpp
@@ -7,12 +7,13 @@
 #include <unordered_map>
 #include <vector>
 
+#include <benchmark/benchmark.h>
+
 #include <arbor/spike_event.hpp>
 
 #include "event_queue.hpp"
 #include "backends/event.hpp"
 
-#include <benchmark/benchmark.h>
 
 using namespace arb;
 
diff --git a/test/ubench/event_setup.cpp b/test/ubench/event_setup.cpp
index 9931e8ba..f9677b4d 100644
--- a/test/ubench/event_setup.cpp
+++ b/test/ubench/event_setup.cpp
@@ -10,14 +10,15 @@
 // TODO: The staged_events output is a vector of spike_event, not
 // a deliverable event.
 
+#include <algorithm>
 #include <random>
 #include <vector>
 
-#include <event_queue.hpp>
-#include <backends/event.hpp>
-
 #include <benchmark/benchmark.h>
 
+#include "event_queue.hpp"
+#include "backends/event.hpp"
+
 using namespace arb;
 
 std::vector<spike_event> generate_inputs(size_t ncells, size_t ev_per_cell) {
diff --git a/test/ubench/mech_vec.cpp b/test/ubench/mech_vec.cpp
index 6f61d726..32e59697 100644
--- a/test/ubench/mech_vec.cpp
+++ b/test/ubench/mech_vec.cpp
@@ -6,9 +6,9 @@
 
 #include <arbor/mc_cell.hpp>
 
-#include <backends/multicore/fvm.hpp>
-#include <benchmark/benchmark.h>
-#include <fvm_lowered_cell_impl.hpp>
+#include "backends/multicore/fvm.hpp"
+#include "benchmark/benchmark.h"
+#include "fvm_lowered_cell_impl.hpp"
 
 using namespace arb;
 
diff --git a/test/unit-distributed/CMakeLists.txt b/test/unit-distributed/CMakeLists.txt
index fdd9b77f..6e5030f0 100644
--- a/test/unit-distributed/CMakeLists.txt
+++ b/test/unit-distributed/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(unit-distributed_sources
     distributed_listener.cpp
     test_domain_decomposition.cpp
-    test_exporter_spike_file.cpp
     test_communicator.cpp
     test_mpi.cpp
 
diff --git a/test/unit-distributed/test.cpp b/test/unit-distributed/test.cpp
index 152afa34..f5be0bdd 100644
--- a/test/unit-distributed/test.cpp
+++ b/test/unit-distributed/test.cpp
@@ -7,16 +7,14 @@
 
 #include <arbor/distributed_context.hpp>
 
-#include <tinyopt.hpp>
-#include <communication/communicator.hpp>
-#include <util/ioutil.hpp>
-
-#include "distributed_listener.hpp"
-
+#include <aux/ioutil.hpp>
+#include <aux/tinyopt.hpp>
 #ifdef TEST_MPI
-#include "with_mpi.hpp"
+#include <aux/with_mpi.hpp>
 #endif
 
+#include "distributed_listener.hpp"
+
 using namespace arb;
 
 distributed_context g_context;
@@ -28,9 +26,6 @@ const char* usage_str =
 "  -h, --help          Display usage information and exit\n";
 
 int main(int argc, char **argv) {
-    // We need to set the communicator policy at the top level
-    // this allows us to build multiple communicators in the tests
-
 #ifdef TEST_MPI
     with_mpi guard(argc, argv, false);
     g_context = mpi_context(MPI_COMM_WORLD);
diff --git a/test/unit-distributed/test_communicator.cpp b/test/unit-distributed/test_communicator.cpp
index 3c3daf17..6e2907d2 100644
--- a/test/unit-distributed/test_communicator.cpp
+++ b/test/unit-distributed/test_communicator.cpp
@@ -5,11 +5,11 @@
 #include <vector>
 
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/spike_event.hpp>
 
 #include "communication/communicator.hpp"
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
 #include "util/filter.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
@@ -371,7 +371,7 @@ TEST(communicator, ring)
     auto R = ring_recipe(n_global);
     // use a node decomposition that reflects the resources available
     // on the node that the test is running on, including gpus.
-    const auto D = partition_load_balance(R, hw::node_info(), &g_context);
+    const auto D = partition_load_balance(R, local_allocation(), &g_context);
     auto C = communicator(R, D, &g_context);
 
     // every cell fires
@@ -466,7 +466,7 @@ TEST(communicator, all2all)
     auto R = all2all_recipe(n_global);
     // use a node decomposition that reflects the resources available
     // on the node that the test is running on, including gpus.
-    const auto D = partition_load_balance(R, hw::node_info(), &g_context);
+    const auto D = partition_load_balance(R, local_allocation(), &g_context);
     auto C = communicator(R, D, &g_context);
 
     // every cell fires
diff --git a/test/unit-distributed/test_domain_decomposition.cpp b/test/unit-distributed/test_domain_decomposition.cpp
index 685cb75e..bade370f 100644
--- a/test/unit-distributed/test_domain_decomposition.cpp
+++ b/test/unit-distributed/test_domain_decomposition.cpp
@@ -8,10 +8,10 @@
 #include <vector>
 
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 
-#include <communication/communicator.hpp>
-#include <hardware/node_info.hpp>
-#include <load_balance.hpp>
+#include "util/span.hpp"
 
 #include "../simple_recipes.hpp"
 #include "test.hpp"
@@ -72,7 +72,7 @@ TEST(domain_decomposition, homogeneous_population) {
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         // 10 cells per domain
         unsigned n_local = 10;
@@ -103,7 +103,7 @@ TEST(domain_decomposition, homogeneous_population) {
     }
     {   // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that all cells will be placed on gpu in a single group.
-        hw::node_info nd(1, 1);
+        proc_allocation nd{1, 1};
 
         // 10 cells per domain
         unsigned n_local = 10;
@@ -141,7 +141,7 @@ TEST(domain_decomposition, heterogeneous_population) {
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         // 10 cells per domain
         const unsigned n_local = 10;
diff --git a/test/unit-distributed/test_exporter_spike_file.cpp b/test/unit-distributed/test_exporter_spike_file.cpp
deleted file mode 100644
index 00d26e57..00000000
--- a/test/unit-distributed/test_exporter_spike_file.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-#include "../gtest.h"
-#include "test.hpp"
-
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include <arbor/distributed_context.hpp>
-#include <arbor/spike.hpp>
-
-#include <communication/communicator.hpp>
-#include <io/exporter_spike_file.hpp>
-
-class exporter_spike_file_fixture : public ::testing::Test {
-protected:
-    using exporter_type = arb::io::exporter_spike_file;
-
-    std::string file_name_;
-    std::string path_;
-    std::string extension_;
-    unsigned index_;
-
-    exporter_spike_file_fixture() :
-        file_name_("spikes_exporter_spike_file_fixture"),
-        path_("./"),
-        extension_("gdf"),
-        index_(g_context.id())
-    {}
-
-    std::string get_standard_file_name() {
-        return exporter_type::create_output_file_path(file_name_, path_, extension_, index_);
-    }
-
-    void SetUp() {
-        // code here will execute just before the test ensues 
-    }
-
-    void TearDown() {
-        // delete the start create file
-        std::remove(get_standard_file_name().c_str());
-    }
-
-    ~exporter_spike_file_fixture()
-    {}
-};
-
-TEST_F(exporter_spike_file_fixture, constructor) {
-    // Create an exporter, and overwrite if neccesary.
-    exporter_type exporter(file_name_, path_, extension_, index_, true);
-
-    // Assert that the output file exists
-    {
-        std::ifstream f(get_standard_file_name());
-        ASSERT_TRUE(f.good());
-    }
-
-    // Create a new exporter with overwrite false. This should throw, because an
-    // outut file with the same name is in use by exporter.
-    try {
-        exporter_type exporter1(file_name_, path_, extension_, index_, false);
-        FAIL() << "expected a file already exists error";
-    }
-    catch (const std::runtime_error& err) {
-        EXPECT_EQ(
-            err.what(),
-            "Tried opening file for writing but it exists and over_write is false: " +
-            get_standard_file_name()
-        );
-    }
-    catch (...) {
-        FAIL() << "expected a file already exists error";
-    }
-}
-
-TEST_F(exporter_spike_file_fixture, create_output_file_path) {
-    // Create some random paths, no need for fancy tests here
-    std::string produced_filename =
-        exporter_type::create_output_file_path("spikes", "./", "gdf", 0);
-    EXPECT_STREQ(produced_filename.c_str(), "./spikes_0.gdf");
-
-    produced_filename =
-        exporter_type::create_output_file_path("a_name", "../../", "txt", 5);
-    EXPECT_STREQ(produced_filename.c_str(), "../../a_name_5.txt");
-}
-
-TEST_F(exporter_spike_file_fixture, do_export) {
-    {
-        exporter_type exporter(file_name_, path_, extension_, g_context.id());
-
-        // Create some spikes
-        std::vector<arb::spike> spikes;
-        spikes.push_back({ { 0, 0 }, 0.0 });
-        spikes.push_back({ { 0, 0 }, 0.1 });
-        spikes.push_back({ { 1, 0 }, 1.0 });
-        spikes.push_back({ { 1, 0 }, 1.1 });
-
-        // now do the export
-        exporter.output(spikes);
-    }
-
-    // Test if we have spikes in the file?
-    std::ifstream f(get_standard_file_name());
-    EXPECT_TRUE(f.good());
-
-    std::string line;
-
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "0 0.0000");
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "0 0.1000");
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "1 1.0000");
-    EXPECT_TRUE(std::getline(f, line));
-    EXPECT_STREQ(line.c_str(), "1 1.1000");
-}
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 887ffe53..5c2d5e18 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -49,7 +49,6 @@ set(unit_sources
     test_mechcat.cpp
     test_merge_events.cpp
     test_multi_event_stream.cpp
-    test_nop.cpp
     test_optional.cpp
     test_mechinfo.cpp
     test_padded.cpp
@@ -67,6 +66,7 @@ set(unit_sources
     test_span.cpp
     test_spikes.cpp
     test_spike_store.cpp
+    test_spike_emitter.cpp
     test_stats.cpp
     test_strprintf.cpp
     test_swcio.cpp
@@ -106,5 +106,5 @@ endif()
 add_executable(unit ${unit_sources})
 target_compile_options(unit PRIVATE ${CXXOPT_ARCH})
 target_compile_definitions(unit PRIVATE "-DDATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}/swc\"")
-target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers)
+target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers arbor-aux)
 
diff --git a/test/unit/test_compartments.cpp b/test/unit/test_compartments.cpp
index 13196d73..a86ef3e6 100644
--- a/test/unit/test_compartments.cpp
+++ b/test/unit/test_compartments.cpp
@@ -3,9 +3,10 @@
 
 #include "../gtest.h"
 
+#include <arbor/math.hpp>
+
 #include "algorithms.hpp"
 #include "fvm_compartment.hpp"
-#include "math.hpp"
 #include "util/span.hpp"
 #include "util/transform.hpp"
 
diff --git a/test/unit/test_domain_decomposition.cpp b/test/unit/test_domain_decomposition.cpp
index dc27a433..effb760b 100644
--- a/test/unit/test_domain_decomposition.cpp
+++ b/test/unit/test_domain_decomposition.cpp
@@ -4,9 +4,8 @@
 
 #include <arbor/distributed_context.hpp>
 #include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
 #include "util/span.hpp"
 
 #include "../simple_recipes.hpp"
@@ -55,7 +54,7 @@ TEST(domain_decomposition, homogenous_population)
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         unsigned num_cells = 10;
         const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), nd, &context);
@@ -81,7 +80,7 @@ TEST(domain_decomposition, homogenous_population)
     }
     {   // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that all cells will be placed on gpu in a single group.
-        hw::node_info nd(1, 1);
+        proc_allocation nd{1, 1};
 
         unsigned num_cells = 10;
         const auto D = partition_load_balance(homo_recipe(num_cells, dummy_cell{}), nd, &context);
@@ -115,7 +114,7 @@ TEST(domain_decomposition, heterogenous_population)
         // We assume that all cells will be put into cell groups of size 1.
         // This assumption will not hold in the future, requiring and update to
         // the test.
-        hw::node_info nd(1, 0);
+        proc_allocation nd{1, 0};
 
         unsigned num_cells = 10;
         auto R = hetero_recipe(num_cells);
@@ -153,7 +152,7 @@ TEST(domain_decomposition, heterogenous_population)
     {   // Test on a node with 1 gpu and 1 cpu core.
         // Assumes that calble cells are on gpu in a single group, and
         // rff cells are on cpu in cell groups of size 1
-        hw::node_info nd(1, 1);
+        proc_allocation nd{1, 1};
 
         unsigned num_cells = 10;
         auto R = hetero_recipe(num_cells);
@@ -189,3 +188,43 @@ TEST(domain_decomposition, heterogenous_population)
         EXPECT_EQ(num_cells, ncells);
     }
 }
+
+TEST(domain_decomposition, hints) {
+    // Check that we can provide group size hint and gpu/cpu preference
+    // by cell kind.
+
+    distributed_context context;
+
+    partition_hint_map hints;
+    hints[cell_kind::cable1d_neuron].cpu_group_size = 3;
+    hints[cell_kind::cable1d_neuron].prefer_gpu = false;
+    hints[cell_kind::spike_source].cpu_group_size = 4;
+
+    domain_decomposition D = partition_load_balance(
+        hetero_recipe(20),
+        proc_allocation{16, 1}, // 16 threads, 1 gpu.
+        &context,
+        hints);
+
+    std::vector<std::vector<cell_gid_type>> expected_c1d_groups =
+        {{0, 2, 4}, {6, 8, 10}, {12, 14, 16}, {18}};
+
+    std::vector<std::vector<cell_gid_type>> expected_ss_groups =
+        {{1, 3, 5, 7}, {9, 11, 13, 15}, {17, 19}};
+
+    std::vector<std::vector<cell_gid_type>> c1d_groups, ss_groups;
+
+    for (auto& g: D.groups) {
+        EXPECT_TRUE(g.kind==cell_kind::cable1d_neuron || g.kind==cell_kind::spike_source);
+
+        if (g.kind==cell_kind::cable1d_neuron) {
+            c1d_groups.push_back(g.gids);
+        }
+        else if (g.kind==cell_kind::spike_source) {
+            ss_groups.push_back(g.gids);
+        }
+    }
+
+    EXPECT_EQ(expected_c1d_groups, c1d_groups);
+    EXPECT_EQ(expected_ss_groups, ss_groups);
+}
diff --git a/test/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp
index 8e3e311d..d81e3745 100644
--- a/test/unit/test_fvm_layout.cpp
+++ b/test/unit/test_fvm_layout.cpp
@@ -3,10 +3,10 @@
 
 #include <arbor/util/optional.hpp>
 #include <arbor/mechcat.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mc_cell.hpp>
 
 #include "fvm_layout.hpp"
-#include "math.hpp"
 #include "util/maputil.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index 0bb0461a..f81069f6 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -5,7 +5,10 @@
 
 #include <arbor/common_types.hpp>
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/load_balance.hpp>
+#include <arbor/math.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/mc_segment.hpp>
 #include <arbor/recipe.hpp>
@@ -18,8 +21,6 @@
 #include "backends/multicore/mechanism.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "fvm_lowered_cell_impl.hpp"
-#include "load_balance.hpp"
-#include "math.hpp"
 #include "sampler_map.hpp"
 #include "util/meta.hpp"
 #include "util/maputil.hpp"
@@ -328,7 +329,7 @@ TEST(fvm_lowered, derived_mechs) {
         float times[] = {10.f, 20.f};
 
         distributed_context context;
-        auto decomp = partition_load_balance(rec, hw::node_info{1u, 0u}, &context);
+        auto decomp = partition_load_balance(rec, proc_allocation{1, 0}, &context);
         simulation sim(rec, decomp, &context);
         sim.add_sampler(all_probes, explicit_schedule(times), sampler);
         sim.run(30.0, 1.f/1024);
diff --git a/test/unit/test_lif_cell_group.cpp b/test/unit/test_lif_cell_group.cpp
index c2753e52..450226b0 100644
--- a/test/unit/test_lif_cell_group.cpp
+++ b/test/unit/test_lif_cell_group.cpp
@@ -1,17 +1,15 @@
 #include "../gtest.h"
 
 #include <arbor/distributed_context.hpp>
+#include <arbor/domain_decomposition.hpp>
 #include <arbor/lif_cell.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/threadinfo.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/spike_source_cell.hpp>
 
-#include "cell_group_factory.hpp"
-#include "hardware/node_info.hpp"
 #include "lif_cell_group.hpp"
-#include "load_balance.hpp"
-#include "threading/threading.hpp"
 
 using namespace arb;
 // Simple ring network of LIF neurons.
@@ -154,13 +152,11 @@ TEST(lif_cell_group, recipe)
 }
 
 TEST(lif_cell_group, spikes) {
-    distributed_context context;
-
     // make two lif cells
     path_recipe recipe(2, 1000, 0.1);
 
-    hw::node_info nd;
-    nd.num_cpu_cores = arb::num_threads();
+    distributed_context context;
+    proc_allocation nd = local_allocation();
 
     auto decomp = partition_load_balance(recipe, nd, &context);
     simulation sim(recipe, decomp, &context);
@@ -195,13 +191,12 @@ TEST(lif_cell_group, ring)
     double weight = 1000;
     double delay = 1;
 
-    hw::node_info nd;
-    nd.num_cpu_cores = threading::num_threads();
-
     // Total simulation time.
     time_type simulation_time = 100;
 
     distributed_context context;
+    proc_allocation nd = local_allocation();
+
     auto recipe = ring_recipe(num_lif_cells, weight, delay);
     auto decomp = partition_load_balance(recipe, nd, &context);
 
diff --git a/test/unit/test_mask_stream.cpp b/test/unit/test_mask_stream.cpp
index a14de4d3..bdc7cd8d 100644
--- a/test/unit/test_mask_stream.cpp
+++ b/test/unit/test_mask_stream.cpp
@@ -1,10 +1,10 @@
-#include "../gtest.h"
-
 #include <sstream>
 
-#include <util/ioutil.hpp>
+#include "../gtest.h"
+
+#include <aux/ioutil.hpp>
 
-using namespace arb::util;
+using aux::mask_stream;
 
 TEST(mask_stream,nomask) {
     // expect mask_stream(true) on a new stream not to change rdbuf.
diff --git a/test/unit/test_math.cpp b/test/unit/test_math.cpp
index 7d0d777a..5c6c3695 100644
--- a/test/unit/test_math.cpp
+++ b/test/unit/test_math.cpp
@@ -3,7 +3,7 @@
 
 #include "../gtest.h"
 
-#include "math.hpp"
+#include <arbor/math.hpp>
 
 using namespace arb::math;
 
@@ -377,31 +377,3 @@ TEST(math, exprelr) {
     }
 }
 
-TEST(math, minmax) {
-    constexpr double inf = std::numeric_limits<double>::infinity();
-
-    struct X {
-        double lhs;
-        double rhs;
-        double expected_min;
-        double expected_max;
-    };
-
-    std::vector<X> inputs = {
-        {  0,    1,    0,   1},
-        { -1,    1,   -1,   1},
-        { 42,   42,   42,  42},
-        {inf, -inf, -inf, inf},
-        {  0,  inf,    0, inf},
-        {  0, -inf, -inf,   0},
-    };
-
-    for (auto x: inputs) {
-        // Call min and max with arguments in both possible orders.
-        EXPECT_EQ(min(x.lhs, x.rhs), x.expected_min);
-        EXPECT_EQ(min(x.rhs, x.lhs), x.expected_min);
-        EXPECT_EQ(max(x.lhs, x.rhs), x.expected_max);
-        EXPECT_EQ(max(x.rhs, x.lhs), x.expected_max);
-    }
-}
-
diff --git a/test/unit/test_matrix.cpp b/test/unit/test_matrix.cpp
index aa5d17e7..c977f88b 100644
--- a/test/unit/test_matrix.cpp
+++ b/test/unit/test_matrix.cpp
@@ -3,11 +3,12 @@
 
 #include "../gtest.h"
 
-#include <math.hpp>
-#include <matrix.hpp>
-#include <backends/multicore/fvm.hpp>
-#include <util/rangeutil.hpp>
-#include <util/span.hpp>
+#include <arbor/math.hpp>
+
+#include "matrix.hpp"
+#include "backends/multicore/fvm.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
 
 #include "common.hpp"
 
diff --git a/test/unit/test_matrix.cu b/test/unit/test_matrix.cu
index a24863ac..b01fff92 100644
--- a/test/unit/test_matrix.cu
+++ b/test/unit/test_matrix.cu
@@ -2,21 +2,23 @@
 #include <random>
 #include <vector>
 
-#include "../gtest.h"
-#include "common.hpp"
+#include <cuda.h>
 
-#include <algorithms.hpp>
-#include <math.hpp>
-#include <matrix.hpp>
-#include <memory/memory.hpp>
-#include <util/span.hpp>
+#include <arbor/math.hpp>
 
-#include <backends/gpu/cuda_common.hpp>
-#include <backends/gpu/matrix_state_flat.hpp>
-#include <backends/gpu/matrix_state_interleaved.hpp>
-#include <backends/gpu/matrix_interleave.hpp>
+#include "algorithms.hpp"
+#include "matrix.hpp"
+#include "memory/memory.hpp"
+#include "util/span.hpp"
+
+#include "backends/gpu/cuda_common.hpp"
+#include "backends/gpu/matrix_state_flat.hpp"
+#include "backends/gpu/matrix_state_interleaved.hpp"
+#include "backends/gpu/matrix_interleave.hpp"
+
+#include "../gtest.h"
+#include "common.hpp"
 
-#include <cuda.h>
 
 using namespace arb;
 
diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
index 4b4959be..27390686 100644
--- a/test/unit/test_matrix_cpuvsgpu.cpp
+++ b/test/unit/test_matrix_cpuvsgpu.cpp
@@ -2,17 +2,19 @@
 #include <random>
 #include <vector>
 
+#include <arbor/math.hpp>
+
+#include "algorithms.hpp"
+#include "matrix.hpp"
+#include "memory/memory.hpp"
+#include "util/span.hpp"
+
+#include "backends/gpu/fvm.hpp"
+#include "backends/multicore/fvm.hpp"
+
 #include "../gtest.h"
 #include "common.hpp"
 
-#include <algorithms.hpp>
-#include <math.hpp>
-#include <matrix.hpp>
-#include <memory/memory.hpp>
-#include <util/span.hpp>
-
-#include <backends/gpu/fvm.hpp>
-#include <backends/multicore/fvm.hpp>
 
 using namespace arb;
 
diff --git a/test/unit/test_mc_cell.cpp b/test/unit/test_mc_cell.cpp
index 67858182..683f133e 100644
--- a/test/unit/test_mc_cell.cpp
+++ b/test/unit/test_mc_cell.cpp
@@ -1,8 +1,8 @@
 #include "../gtest.h"
 
 #include <arbor/mc_cell.hpp>
+#include <arbor/math.hpp>
 
-#include "math.hpp"
 #include "tree.hpp"
 
 using namespace arb;
diff --git a/test/unit/test_merge_events.cpp b/test/unit/test_merge_events.cpp
index 03c58a6a..b906c02e 100644
--- a/test/unit/test_merge_events.cpp
+++ b/test/unit/test_merge_events.cpp
@@ -3,6 +3,8 @@
 #include <event_queue.hpp>
 #include <merge_events.hpp>
 
+#include "util/rangeutil.hpp"
+
 using namespace arb;
 
 std::vector<event_generator> empty_gens;
diff --git a/test/unit/test_nop.cpp b/test/unit/test_nop.cpp
deleted file mode 100644
index 63a8857f..00000000
--- a/test/unit/test_nop.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#include "../gtest.h"
-#include "util/nop.hpp"
-
-using namespace arb::util;
-
-TEST(nop, void_fn) {
-    std::function<void ()> f(nop_function);
-
-    EXPECT_TRUE(f);
-    f(); // should do nothing
-
-    bool flag = false;
-    f = [&]() { flag = true; };
-    f();
-    EXPECT_TRUE(flag);
-
-    flag = false;
-    f = nop_function;
-    f();
-    EXPECT_FALSE(flag);
-
-    // with some arguments
-    std::function<void (int, int)> g(nop_function);
-    EXPECT_TRUE(g);
-    g(2, 3); // should do nothing
-
-    int sum = 0;
-    g = [&](int a, int b) { sum = a+b; };
-    g(2, 3);
-    EXPECT_EQ(5, sum);
-
-    sum = 0;
-    g = nop_function;
-    g(2, 3);
-    EXPECT_EQ(0, sum);
-}
-
-struct check_default {
-    int value = 100;
-
-    check_default() = default;
-    explicit check_default(int n): value(n) {}
-};
-
-TEST(nop, default_return_fn) {
-    std::function<check_default ()> f(nop_function);
-
-    EXPECT_TRUE(f);
-    auto result = f();
-    EXPECT_EQ(result.value, 100);
-
-    f = []() { return check_default(17); };
-    result = f();
-    EXPECT_EQ(result.value, 17);
-
-    f = nop_function;
-    result = f();
-    EXPECT_EQ(result.value, 100);
-
-    std::function<check_default (double, double)> g(nop_function);
-
-    EXPECT_TRUE(g);
-    result = g(1.4, 1.5);
-    EXPECT_EQ(result.value, 100);
-
-    g = [](double x, double y) { return check_default{(int)(x*y)}; };
-    result = g(1.4, 1.5);
-    EXPECT_EQ(result.value, 2);
-
-    g = nop_function;
-    result = g(1.4, 1.5);
-    EXPECT_EQ(result.value, 100);
-
-}
-
diff --git a/test/unit/test_partition.cpp b/test/unit/test_partition.cpp
index e2f5e5d9..d8c160c4 100644
--- a/test/unit/test_partition.cpp
+++ b/test/unit/test_partition.cpp
@@ -7,8 +7,7 @@
 
 #include <arbor/assert.hpp>
 
-#include <util/nop.hpp>
-#include <util/partition.hpp>
+#include "util/partition.hpp"
 
 using namespace arb;
 
diff --git a/test/unit/test_path.cpp b/test/unit/test_path.cpp
index 4fc98e58..d359dc2e 100644
--- a/test/unit/test_path.cpp
+++ b/test/unit/test_path.cpp
@@ -5,9 +5,9 @@
 #include <string>
 #include <vector>
 
-#include <util/path.hpp>
+#include <aux/path.hpp>
 
-using namespace arb::util;
+using namespace aux;
 
 TEST(path, posix_ctor) {
     // test constructor ans assignment overloads with sample character sequences.
@@ -41,14 +41,12 @@ TEST(path, posix_ctor) {
     EXPECT_EQ(str_cs, (p=p2).native());
     EXPECT_EQ(str_cs, (p=cs).native());
     EXPECT_EQ(str_cs, (p=str_cs).native());
-    EXPECT_EQ(str_cs, (p=vec_cs).native());
     EXPECT_EQ(str_cs, (p=std::move(p7)).native());
 
     // test assign overloads (and ref return values)
     EXPECT_EQ(str_cs, p.assign(p2).native());
     EXPECT_EQ(str_cs, p.assign(cs).native());
     EXPECT_EQ(str_cs, p.assign(str_cs).native());
-    EXPECT_EQ(str_cs, p.assign(vec_cs).native());
     EXPECT_EQ(str_cs, p.assign(vec_cs.begin(), vec_cs.end()).native());
 }
 
@@ -318,7 +316,7 @@ TEST(path, posix_status_perms) {
     perms expected = perms::owner_read|perms::owner_write|perms::group_read|perms::group_write|perms::others_read|perms::others_write;
     EXPECT_EQ(expected, null_perm);
 
-    // Expect / to be have exec flag set for everyonr
+    // Expect / to be have exec flag set for everyone
     perms root_perm = status("/").permissions();
     EXPECT_NE(perms::none, root_perm&perms::owner_exec);
     EXPECT_NE(perms::none, root_perm&perms::group_exec);
diff --git a/test/unit/test_segment.cpp b/test/unit/test_segment.cpp
index de33809d..128f629b 100644
--- a/test/unit/test_segment.cpp
+++ b/test/unit/test_segment.cpp
@@ -2,10 +2,9 @@
 
 #include "../gtest.h"
 
+#include <arbor/math.hpp>
 #include <arbor/mc_segment.hpp>
 
-#include "math.hpp"
-
 using namespace arb;
 
 TEST(mc_segment, kinfs) {
diff --git a/test/unit/test_spike_emitter.cpp b/test/unit/test_spike_emitter.cpp
new file mode 100644
index 00000000..60af2a92
--- /dev/null
+++ b/test/unit/test_spike_emitter.cpp
@@ -0,0 +1,30 @@
+#include "../gtest.h"
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <arbor/spike.hpp>
+#include <aux/spike_emitter.hpp>
+
+TEST(spike_emitter, formatting) {
+    std::stringstream out;
+    auto callback = aux::spike_emitter(out);
+
+    std::vector<arb::spike> spikes = {
+        { { 0, 0 }, 0.0 },
+        { { 0, 0 }, 0.1 },
+        { { 1, 0 }, 1.0 },
+        { { 1, 0 }, 1.1 }
+    };
+
+    callback(spikes);
+
+    std::string expected =
+        "0 0.0000\n"
+        "0 0.1000\n"
+        "1 1.0000\n"
+        "1 1.1000\n";
+
+    EXPECT_EQ(expected, out.str());
+}
diff --git a/test/unit/test_swcio.cpp b/test/unit/test_swcio.cpp
index 85ad98d6..172439e6 100644
--- a/test/unit/test_swcio.cpp
+++ b/test/unit/test_swcio.cpp
@@ -9,10 +9,10 @@
 
 #include <arbor/mc_cell.hpp>
 #include <arbor/morphology.hpp>
+#include <arbor/swcio.hpp>
 
 #include "../gtest.h"
 
-#include "swcio.hpp"
 
 // Path to data directory can be overriden at compile time.
 #if !defined(DATADIR)
@@ -22,8 +22,8 @@
 using namespace arb;
 
 // SWC tests
-void expect_record_equals(const io::swc_record& expected,
-                          const io::swc_record& actual)
+void expect_record_equals(const swc_record& expected,
+                          const swc_record& actual)
 {
     EXPECT_EQ(expected.id, actual.id);
     EXPECT_EQ(expected.type, actual.type);
@@ -36,8 +36,6 @@ void expect_record_equals(const io::swc_record& expected,
 
 TEST(swc_record, construction)
 {
-    using namespace arb::io;
-
     {
         // force an invalid type
         swc_record::kind invalid_type = static_cast<swc_record::kind>(100);
@@ -105,8 +103,6 @@ TEST(swc_record, construction)
 
 TEST(swc_parser, invalid_input_istream)
 {
-    using namespace arb::io;
-
     {
         // check incomplete lines; missing parent
         std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830\n");
@@ -127,8 +123,6 @@ TEST(swc_parser, invalid_input_istream)
 
 TEST(swc_parser, invalid_input_parse)
 {
-    using namespace arb::io;
-
     {
         // check incomplete lines; missing parent
         std::istringstream is("1 1 14.566132 34.873772 7.857000 0.717830\n");
@@ -168,8 +162,6 @@ TEST(swc_parser, invalid_input_parse)
 
 TEST(swc_parser, valid_input)
 {
-    using namespace arb::io;
-
     {
         // check empty file; no record may be parsed
         swc_record record, record_orig;
@@ -304,8 +296,6 @@ TEST(swc_parser, valid_input)
 
 TEST(swc_parser, from_allen_db)
 {
-    using namespace arb::io;
-
     std::string datadir{DATADIR};
     auto fname = datadir + "/example.swc";
     std::ifstream fid(fname);
@@ -323,8 +313,6 @@ TEST(swc_parser, from_allen_db)
 
 TEST(swc_parser, input_cleaning)
 {
-    using namespace arb::io;
-
     {
         // Check duplicates
         std::stringstream is;
@@ -391,8 +379,6 @@ TEST(swc_parser, input_cleaning)
 
 TEST(swc_parser, raw)
 {
-    using namespace arb::io;
-
     {
         // Check valid usage
         std::stringstream is;
@@ -469,7 +455,7 @@ TEST(swc_io, cell_construction) {
     };
 
     // swc -> morphology
-    auto morph = io::swc_as_morphology(io::parse_swc_file(is));
+    auto morph = swc_as_morphology(parse_swc_file(is));
 
     mc_cell cell = make_mc_cell(morph, true);
     EXPECT_TRUE(cell.has_soma());
@@ -561,7 +547,7 @@ TEST(swc_parser, from_file_ball_and_stick) {
     }
 
     // read the file as morhpology
-    auto bas_morph = io::swc_as_morphology(io::parse_swc_file(fid));
+    auto bas_morph = swc_as_morphology(parse_swc_file(fid));
 
     // compare with expected morphology
     morphology expected;
diff --git a/test/validation/CMakeLists.txt b/test/validation/CMakeLists.txt
index 740ce9ad..8826d8e6 100644
--- a/test/validation/CMakeLists.txt
+++ b/test/validation/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(validation_sources
     # unit tests
     validate_ball_and_stick.cpp
-    validate_compartment_policy.cpp
     validate_soma.cpp
     validate_kinetic.cpp
     validate_synapses.cpp
@@ -17,7 +16,6 @@ set(validation_sources
 add_executable(validate ${validation_sources})
 target_compile_definitions(validate PRIVATE "ARB_DATADIR=\"${ARB_VALIDATION_DATA_DIR}\"")
 target_link_libraries(validate PRIVATE gtest arbor arbor-aux ext-json)
-target_link_libraries(validate PRIVATE arbor-private-headers) # temporary
 
 if(ARB_BUILD_VALIDATION_DATA)
     add_dependencies(validate validation_data)
diff --git a/test/validation/convergence_test.hpp b/test/validation/convergence_test.hpp
index 459e788f..25742cdb 100644
--- a/test/validation/convergence_test.hpp
+++ b/test/validation/convergence_test.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <iterator>
 #include <vector>
 
 #include <nlohmann/json.hpp>
@@ -8,9 +9,7 @@
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/schedule.hpp>
-
-#include "util/filter.hpp"
-#include "util/rangeutil.hpp"
+#include <aux/path.hpp>
 
 #include "../gtest.h"
 
@@ -53,19 +52,27 @@ public:
         run_validation_(false),
         meta_(meta)
     {
-        util::assign(probe_labels_, probe_labels);
+        using std::begin;
+        using std::end;
+
+        probe_labels_.assign(begin(probe_labels), end(probe_labels));
     }
 
     // Allow free access to JSON meta data attached to saved traces.
     nlohmann::json& metadata() { return meta_; }
 
-    void load_reference_data(const util::path& ref_path) {
+    void load_reference_data(const aux::path& ref_path) {
         run_validation_ = false;
         try {
             ref_data_ = g_trace_io.load_traces(ref_path);
 
-            run_validation_ = util::all_of(probe_labels_,
-                [&](const probe_label& pl) { return ref_data_.count(pl.label)>0; });
+            run_validation_ = true;
+            for (const auto& pl: probe_labels_) {
+                if (!(ref_data_.count(pl.label)>0)) {
+                    run_validation_ = false;
+                    break;
+                }
+            }
 
             EXPECT_TRUE(run_validation_);
         }
@@ -123,16 +130,27 @@ public:
     void report() {
         if (run_validation_ && g_trace_io.verbose()) {
             // reorder to group by id
-            util::stable_sort_by(conv_tbl_, [](const conv_entry<Param>& e) { return e.id; });
+            std::stable_sort(conv_tbl_.begin(), conv_tbl_.end(),
+                [](const auto& a, const auto& b) { return a.id<b.id; });
+
             report_conv_table(std::cout, conv_tbl_, param_name_);
         }
     }
 
     void assert_all_convergence() const {
+        std::vector<conv_entry<Param>> with_label;
+
         for (const auto& pl: probe_labels_) {
             SCOPED_TRACE(pl.label);
-            assert_convergence(util::filter(conv_tbl_,
-                        [&](const conv_entry<Param>& e) { return e.id==pl.label; }));
+
+            with_label.clear();
+            for (const auto& e: conv_tbl_) {
+                if (e.id==pl.label) {
+                    with_label.push_back(e);
+                }
+            }
+
+            assert_convergence(with_label);
         }
     }
 };
@@ -151,7 +169,7 @@ inline std::vector<float> stimulus_ends(const mc_cell& c) {
         ts.push_back(t1);
     }
 
-    util::sort(ts);
+    std::sort(ts.begin(), ts.end());
     return ts;
 }
 
diff --git a/test/validation/interpolate.hpp b/test/validation/interpolate.hpp
new file mode 100644
index 00000000..54d5da0a
--- /dev/null
+++ b/test/validation/interpolate.hpp
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <cmath>
+
+template <typename T, typename U>
+inline T lerp(T a, T b, U u) {
+    return std::fma(u, b, std::fma(-u, a, a));
+}
+
+// Piece-wise linear interpolation across a sequence of points (u_i, x_i),
+// monotonically increasing in u.
+// 
+// Parameters get_u and get_x provide the accessors for the point sequence;
+// consider moving to structured bindings in C++17 instead.
+
+template <typename U, typename Seq, typename GetU, typename GetX>
+auto pw_linear_interpolate(U u, const Seq& seq, GetU get_u, GetX get_x) {
+    using std::begin;
+    using std::end;
+    using value_type = decltype(get_x(*begin(seq)));
+
+    auto i = begin(seq);
+    auto e = end(seq);
+
+    if (i==e) {
+        return value_type(NAN);
+    }
+
+    auto u0 = get_u(*i);
+    auto x0 = get_x(*i);
+
+    if (u<u0) {
+        return x0;
+    }
+
+    while (++i!=e) {
+        auto u1 = get_u(*i);
+        auto x1 = get_x(*i);
+
+        if (u<u1) {
+            return lerp(x0, x1, (u-u0)/(u1-u0));
+        }
+
+        u0 = u1;
+        x0 = x1;
+    }
+
+    return x0;
+}
+
diff --git a/test/validation/trace_analysis.cpp b/test/validation/trace_analysis.cpp
index 6a9cd813..ccf71a42 100644
--- a/test/validation/trace_analysis.cpp
+++ b/test/validation/trace_analysis.cpp
@@ -6,13 +6,11 @@
 
 #include "../gtest.h"
 
-#include <arbor/util/optional.hpp>
+#include <arbor/math.hpp>
 #include <arbor/simple_sampler.hpp>
+#include <arbor/util/optional.hpp>
 
-#include "math.hpp"
-#include "util/partition.hpp"
-#include "util/rangeutil.hpp"
-
+#include "interpolate.hpp"
 #include "trace_analysis.hpp"
 
 namespace arb {
@@ -21,20 +19,9 @@ struct trace_interpolant {
     trace_interpolant(const trace_data<double>& trace): trace_(trace) {}
 
     double operator()(float t) const {
-        if (trace_.empty()) return std::nan("");
-
-        auto tx = times(trace_);
-        auto vx = values(trace_);
-
-        // special case for end points
-        if (t<tx.front()) return vx.front();
-        if (t>=tx.back()) return vx.back();
-
-        auto part = util::partition_view(tx);
-        auto i = part.index(t);
-        arb_assert(i != part.npos);
-        auto p = part[i];
-        return math::lerp(vx[i], vx[i+1], (t-p.first)/(p.second-p.first));
+        return pw_linear_interpolate(t, trace_,
+            [](auto& entry) { return entry.t; },
+            [](auto& entry) { return entry.v; });
     }
 
     const trace_data<double>& trace_;
@@ -43,9 +30,12 @@ struct trace_interpolant {
 double linf_distance(const trace_data<double>& u, const trace_data<double>& r) {
     trace_interpolant f{r};
 
-    return util::max_value(
-            util::transform_view(u,
-                [&](trace_entry<double> x) { return std::abs(x.v-f(x.t)); }));
+    double linf = 0;
+    for (auto entry: u) {
+        linf = std::max(linf, std::abs(entry.v-f(entry.t)));
+    }
+
+    return linf;
 }
 
 // Compute linf distance as above, but excluding sample points that lie
@@ -72,14 +62,14 @@ double linf_distance(const trace_data<double>& u, const trace_data<double>& ref,
         // include points up to and including uj-2, and then proceed from point uj+1,
         // excluding the two points closest to the discontinuity.
 
-        if (uj>1+ui) {
-            util::append(reduced, util::subrange_view(u, ui, uj-1));
+        for (unsigned k = ui; k+1<uj; ++k) {
+            reduced.push_back(u[k]);
         }
         ui = uj+1;
     }
 
-    if (ui<nu) {
-        util::append(reduced, util::subrange_view(u, ui, nu));
+    for (auto k = ui; k<nu; ++k) {
+        reduced.push_back(u[k]);
     }
 
     return linf_distance(reduced, ref);
@@ -89,21 +79,18 @@ std::vector<trace_peak> local_maxima(const trace_data<double>& u) {
     std::vector<trace_peak> peaks;
     if (u.size()<2) return peaks;
 
-    auto tx = times(u);
-    auto vx = values(u);
-
-    int s_prev = math::signum(vx[1]-vx[0]);
+    int s_prev = math::signum(u[1].v-u[0].v);
     std::size_t i_start = 0;
 
     for (std::size_t i = 2; i<u.size()-1; ++i) {
-        int s = math::signum(vx[i]-vx[i-1]);
+        int s = math::signum(u[i].v-u[i-1].v);
         if (s_prev==1 && s==-1) {
             // found peak between i_start and i,
             // observerd peak value at i-1.
-            float t0 = tx[i_start];
-            float t1 = tx[i];
+            float t0 = u[i_start].t;
+            float t1 = u[i].t;
 
-            peaks.push_back({(t0+t1)/2, vx[i-1], (t1-t0)/2});
+            peaks.push_back({(t0+t1)/2, u[i-1].v, (t1-t0)/2});
         }
 
         if (s!=0) {
diff --git a/test/validation/trace_analysis.hpp b/test/validation/trace_analysis.hpp
index e6ff032d..92b8d048 100644
--- a/test/validation/trace_analysis.hpp
+++ b/test/validation/trace_analysis.hpp
@@ -8,24 +8,12 @@
 #include <arbor/simple_sampler.hpp>
 #include <arbor/util/optional.hpp>
 
-#include "util/rangeutil.hpp"
+#include "util.hpp"
 
 namespace arb {
 
 /* Trace data comparison */
 
-// Extract time or value data from trace.
-
-template <typename V>
-inline auto times(const trace_data<V>& trace) {
-   return util::transform_view(trace, [](auto& x) { return x.t; });
-}
-
-template <typename V>
-inline auto values(const trace_data<V>& trace) {
-   return util::transform_view(trace, [](auto& x) { return x.v; });
-}
-
 // Compute max |v_i - f(t_i)| where (t, v) is the 
 // first trace `u` and f is the piece-wise linear interpolant
 // of the second trace `r`.
@@ -77,7 +65,7 @@ using conv_data = std::vector<conv_entry<Param>>;
 
 template <typename ConvEntrySeq>
 void assert_convergence(const ConvEntrySeq& cs) {
-    if (util::empty(cs)) return;
+    if (size(cs)==0) return;
 
     auto tbound = [](trace_peak p) { return std::abs(p.t)+p.t_err; };
     float peak_dt_bound = INFINITY;
diff --git a/test/validation/util.hpp b/test/validation/util.hpp
new file mode 100644
index 00000000..af026392
--- /dev/null
+++ b/test/validation/util.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+// Simple helper utilities for validation tests.
+
+#include <sstream>
+#include <string>
+
+#include <arbor/common_types.hpp>
+
+template <typename T, std::size_t N>
+constexpr std::size_t size(T (&)[N]) noexcept {
+    return N;
+}
+
+template <typename X>
+constexpr std::size_t size(const X& x) { return x.size(); }
+
+inline std::string to_string(arb::backend_kind kind) {
+    std::stringstream out;
+    out << kind;
+    return out.str();
+}
diff --git a/test/validation/validate.cpp b/test/validation/validate.cpp
index dec1edaf..cac0ac3d 100644
--- a/test/validation/validate.cpp
+++ b/test/validation/validate.cpp
@@ -4,7 +4,7 @@
 #include <string>
 #include <exception>
 
-#include <tinyopt.hpp>
+#include <aux/tinyopt.hpp>
 
 #include "../gtest.h"
 
diff --git a/test/validation/validate_ball_and_stick.cpp b/test/validation/validate_ball_and_stick.cpp
index 6f736ff8..afc769fe 100644
--- a/test/validation/validate_ball_and_stick.cpp
+++ b/test/validation/validate_ball_and_stick.cpp
@@ -3,22 +3,20 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
-
-#include "load_balance.hpp"
-#include "hardware/node_info.hpp"
-#include "hardware/gpu.hpp"
-#include "util/meta.hpp"
-#include "util/path.hpp"
-#include "util/strprintf.hpp"
+#include <aux/path.hpp>
 
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
+
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 #include "../gtest.h"
@@ -33,7 +31,7 @@ struct probe_point {
 template <typename ProbePointSeq>
 void run_ncomp_convergence_test(
     const char* model_name,
-    const util::path& ref_data_path,
+    const aux::path& ref_data_path,
     backend_kind backend,
     const mc_cell& c,
     ProbePointSeq& probe_points,
@@ -51,12 +49,12 @@ void run_ncomp_convergence_test(
         {"dt", dt},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", util::to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     auto exclude = stimulus_ends(c);
 
-    auto n_probe = util::size(probe_points);
+    auto n_probe = size(probe_points);
     std::vector<probe_label> plabels;
     plabels.reserve(n_probe);
     for (unsigned i = 0; i<n_probe; ++i) {
@@ -66,6 +64,10 @@ void run_ncomp_convergence_test(
     convergence_test_runner<int> runner("ncomp", plabels, meta);
     runner.load_reference_data(ref_data_path);
 
+    distributed_context context;
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         for (auto& seg: c.segments()) {
             if (!seg->is_soma()) {
@@ -77,8 +79,6 @@ void run_ncomp_convergence_test(
             rec.add_probe(0, 0, cell_probe_address{p.where, cell_probe_address::membrane_voltage});
         }
 
-        distributed_context context;
-        hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
         auto decomp = partition_load_balance(rec, nd, &context);
         simulation sim(rec, decomp, &context);
 
@@ -196,35 +196,35 @@ void validate_ball_and_squiggle(arb::backend_kind backend) {
 
 TEST(ball_and_stick, neuron_ref) {
     validate_ball_and_stick(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_stick(backend_kind::gpu);
     }
 }
 
 TEST(ball_and_taper, neuron_ref) {
     validate_ball_and_taper(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_taper(backend_kind::gpu);
     }
 }
 
 TEST(ball_and_3stick, neuron_ref) {
     validate_ball_and_3stick(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_3stick(backend_kind::gpu);
     }
 }
 
 TEST(rallpack1, numeric_ref) {
     validate_rallpack1(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_rallpack1(backend_kind::gpu);
     }
 }
 
 TEST(ball_and_squiggle, neuron_ref) {
     validate_ball_and_squiggle(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_ball_and_squiggle(backend_kind::gpu);
     }
 }
diff --git a/test/validation/validate_compartment_policy.cpp b/test/validation/validate_compartment_policy.cpp
deleted file mode 100644
index 7307c459..00000000
--- a/test/validation/validate_compartment_policy.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-#include <fstream>
-#include <utility>
-
-#include <nlohmann/json.hpp>
-
-#include <arbor/common_types.hpp>
-#include <arbor/mc_cell.hpp>
-#include <arbor/recipe.hpp>
-#include <arbor/simple_sampler.hpp>
-#include <arbor/simulation.hpp>
-
-#include "util/rangeutil.hpp"
-
-#include "../gtest.h"
-
-#include "../common_cells.hpp"
-#include "../simple_recipes.hpp"
-
-#include "trace_analysis.hpp"
-#include "validation_data.hpp"
-
-using namespace arb;
-
-#if 0
-// *Temporarily* disabled: compartment division policy
-// will be moved to backend policy class.
-
-/*
- * Expect dendtrites composed of a simple frustrum to give
- * essentially identical results no matter the compartment
- * division policy.
- */
-
-template <typename CompPolicy>
-std::vector<trace_data> run_simulation(const cell& c, float sample_dt, float t_end, float dt) {
-    simulation<fvm::fvm_multicell<double, cell_local_size_type, div_compartment_by_ends>> m{singleton_recipe(c)};
-
-    const auto& probes = m.probes();
-    std::size_t n_probes = probes.size();
-    std::vector<simple_sampler> samplers(n_probes, sample_dt);
-
-    for (unsigned i = 0; i<n_probes; ++i) {
-        m.attach_sampler(probes[i].id, samplers[i].sampler<>());
-    }
-
-    m.run(t_end, dt);
-    std::vector<trace_data> traces;
-    for (auto& s: samplers) {
-        traces.push_back(std::move(s.trace));
-    }
-    return traces;
-}
-
-
-void run_test(cell&& c) {
-    add_common_voltage_probes(c);
-
-    float sample_dt = .025;
-    float t_end = 100;
-    float dt = 0.001;
-
-    auto traces_by_ends = run_simulation<div_compartment_by_ends>(c, sample_dt, t_end, dt);
-    auto traces_sampler = run_simulation<div_compartment_sampler>(c, sample_dt, t_end, dt);
-    auto traces_integrator = run_simulation<div_compartment_integrator>(c, sample_dt, t_end, dt);
-
-    auto n_trace = traces_by_ends.size();
-    ASSERT_GT(n_trace, 0);
-    ASSERT_EQ(n_trace, traces_sampler.size());
-    ASSERT_EQ(n_trace, traces_integrator.size());
-
-    for (unsigned i = 0; i<n_trace; ++i) {
-        auto& t1 = traces_by_ends[i];
-        auto& t2 = traces_sampler[i];
-        auto& t3 = traces_integrator[i];
-
-        // expect all traces to be (close to) the same
-        double epsilon = 1e-6;
-        double tol = epsilon*util::max_value(
-            util::transform_view(values(t1), [](double x) { return std::abs(x); }));
-        EXPECT_GE(tol, linf_distance(t1, t2));
-        EXPECT_GE(tol, linf_distance(t2, t3));
-        EXPECT_GE(tol, linf_distance(t3, t1));
-    }
-}
-
-TEST(compartment_policy, validate_ball_and_stick) {
-    SCOPED_TRACE("ball_and_stick");
-    run_test(make_cell_ball_and_stick());
-}
-
-TEST(compartment_policy, validate_ball_and_3stick) {
-    SCOPED_TRACE("ball_and_3stick");
-    run_test(make_cell_ball_and_3stick());
-}
-
-TEST(compartment_policy, validate_ball_and_taper) {
-    SCOPED_TRACE("ball_and_taper");
-    run_test(make_cell_ball_and_taper());
-}
-
-#endif
diff --git a/test/validation/validate_kinetic.cpp b/test/validation/validate_kinetic.cpp
index dd335976..3bd9f796 100644
--- a/test/validation/validate_kinetic.cpp
+++ b/test/validation/validate_kinetic.cpp
@@ -5,22 +5,19 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/node_info.hpp"
-#include "hardware/gpu.hpp"
-#include "load_balance.hpp"
-#include "util/rangeutil.hpp"
-#include "util/strprintf.hpp"
-
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
 
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 void run_kinetic_dt(
@@ -41,13 +38,15 @@ void run_kinetic_dt(
     probe_label plabels[1] = {{"soma.mid", {0u, 0u}}};
 
     meta["sim"] = "arbor";
-    meta["backend_kind"] = util::to_string(backend);
+    meta["backend_kind"] = to_string(backend);
 
     convergence_test_runner<float> runner("dt", plabels, meta);
     runner.load_reference_data(ref_file);
 
     distributed_context context;
-    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     auto decomp = partition_load_balance(rec, nd, &context);
     simulation sim(rec, decomp, &context);
 
@@ -113,14 +112,14 @@ using namespace arb;
 
 TEST(kinetic, kin1_numeric_ref) {
     validate_kinetic_kin1(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_kinetic_kin1(arb::backend_kind::gpu);
     }
 }
 
 TEST(kinetic, kinlva_numeric_ref) {
     validate_kinetic_kinlva(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_kinetic_kinlva(arb::backend_kind::gpu);
     }
 }
diff --git a/test/validation/validate_soma.cpp b/test/validation/validate_soma.cpp
index 90534c4c..6c1b17bd 100644
--- a/test/validation/validate_soma.cpp
+++ b/test/validation/validate_soma.cpp
@@ -1,22 +1,19 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
 
-#include "hardware/gpu.hpp"
-#include "hardware/node_info.hpp"
-#include "load_balance.hpp"
-#include "util/rangeutil.hpp"
-#include "util/strprintf.hpp"
-
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
 
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 #include "../gtest.h"
@@ -33,7 +30,9 @@ void validate_soma(backend_kind backend) {
     probe_label plabels[1] = {{"soma.mid", {0u, 0u}}};
 
     distributed_context context;
-    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     auto decomp = partition_load_balance(rec, nd, &context);
     simulation sim(rec, decomp, &context);
 
@@ -42,7 +41,7 @@ void validate_soma(backend_kind backend) {
         {"model", "soma"},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", util::to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     convergence_test_runner<float> runner("dt", plabels, meta);
@@ -70,7 +69,7 @@ end:
 
 TEST(soma, numeric_ref) {
     validate_soma(backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         validate_soma(backend_kind::gpu);
     }
 }
diff --git a/test/validation/validate_synapses.cpp b/test/validation/validate_synapses.cpp
index b987b718..354dab94 100644
--- a/test/validation/validate_synapses.cpp
+++ b/test/validation/validate_synapses.cpp
@@ -1,15 +1,13 @@
 #include <nlohmann/json.hpp>
 
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/load_balance.hpp>
 #include <arbor/mc_cell.hpp>
 #include <arbor/recipe.hpp>
 #include <arbor/simple_sampler.hpp>
 #include <arbor/simulation.hpp>
+#include <aux/path.hpp>
 
-#include "hardware/node_info.hpp"
-#include "hardware/gpu.hpp"
-#include "load_balance.hpp"
-#include "util/path.hpp"
-#include "util/strprintf.hpp"
 
 #include "../gtest.h"
 
@@ -18,13 +16,14 @@
 
 #include "convergence_test.hpp"
 #include "trace_analysis.hpp"
+#include "util.hpp"
 #include "validation_data.hpp"
 
 using namespace arb;
 
 void run_synapse_test(
     const char* syn_type,
-    const util::path& ref_data_path,
+    const aux::path& ref_data_path,
     backend_kind backend,
     float t_end=70.f,
     float dt=0.001)
@@ -35,7 +34,7 @@ void run_synapse_test(
         {"model", syn_type},
         {"sim", "arbor"},
         {"units", "mV"},
-        {"backend_kind", util::to_string(backend)}
+        {"backend_kind", to_string(backend)}
     };
 
     mc_cell c = make_cell_ball_and_stick(false); // no stimuli
@@ -63,7 +62,9 @@ void run_synapse_test(
     runner.load_reference_data(ref_data_path);
 
     distributed_context context;
-    hw::node_info nd(1, backend==backend_kind::gpu? 1: 0);
+    proc_allocation nd;
+    nd.num_gpus = (backend==backend_kind::gpu);
+
     for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) {
         c.cable(1)->set_compartments(ncomp);
 
@@ -89,7 +90,7 @@ void run_synapse_test(
 TEST(simple_synapse, expsyn_neuron_ref) {
     SCOPED_TRACE("expsyn-multicore");
     run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         SCOPED_TRACE("expsyn-gpu");
         run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::gpu);
     }
@@ -98,7 +99,7 @@ TEST(simple_synapse, expsyn_neuron_ref) {
 TEST(simple_synapse, exp2syn_neuron_ref) {
     SCOPED_TRACE("exp2syn-multicore");
     run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::multicore);
-    if (hw::num_gpus()) {
+    if (local_allocation().num_gpus) {
         SCOPED_TRACE("exp2syn-gpu");
         run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::gpu);
     }
diff --git a/test/validation/validation_data.cpp b/test/validation/validation_data.cpp
index 2722c909..cf5963c4 100644
--- a/test/validation/validation_data.cpp
+++ b/test/validation/validation_data.cpp
@@ -7,8 +7,7 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/simple_sampler.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 #include "trace_analysis.hpp"
 #include "validation_data.hpp"
@@ -21,11 +20,11 @@ trace_io g_trace_io;
 #define ARB_DATADIR ""
 #endif
 
-util::path trace_io::find_datadir() {
+aux::path trace_io::find_datadir() {
     // If environment variable is set, use that in preference.
 
     if (const char* env_path = std::getenv("ARB_DATADIR")) {
-        return util::path(env_path);
+        return env_path;
     }
 
     // Otherwise try compile-time path ARB_DATADIR and hard-coded
@@ -40,13 +39,13 @@ util::path trace_io::find_datadir() {
 
     std::error_code ec;
     for (auto p: paths) {
-        if (util::is_directory(p, ec)) {
-            return util::path(p);
+        if (aux::is_directory(p, ec)) {
+            return p;
         }
     }
 
     // Otherwise set to empty path, and rely on command-line option.
-    return util::path();
+    return "";
 }
 
 void trace_io::save_trace(const std::string& label, const trace_data<double>& data, const nlohmann::json& meta) {
@@ -54,13 +53,16 @@ void trace_io::save_trace(const std::string& label, const trace_data<double>& da
 }
 
 void trace_io::save_trace(const std::string& abscissa, const std::string& label, const trace_data<double>& data, const nlohmann::json& meta) {
-    using namespace arb;
+    using nlohmann::json;
 
-    nlohmann::json j = meta;
-    j["data"] = {
-        {abscissa, times(data)},
-        {label, values(data)}
-    };
+    json j = meta;
+    json& times = j["data"][abscissa];
+    json& values = j["data"][label];
+
+    for (const auto& e: data) {
+        times.push_back(e.t);
+        values.push_back(e.v);
+    }
 
     jtraces_ += std::move(j);
 }
@@ -95,8 +97,8 @@ static void parse_trace_json(const nlohmann::json& j, std::map<std::string, trac
     }
 }
 
-std::map<std::string, trace_data<double>> trace_io::load_traces(const util::path& name) {
-    util::path file  = datadir_/name;
+std::map<std::string, trace_data<double>> trace_io::load_traces(const aux::path& name) {
+    aux::path file  = datadir_/name;
     std::ifstream fid(file);
     if (!fid) {
         throw std::runtime_error("unable to load validation data: "+file.native());
diff --git a/test/validation/validation_data.hpp b/test/validation/validation_data.hpp
index 1317738f..620f79eb 100644
--- a/test/validation/validation_data.hpp
+++ b/test/validation/validation_data.hpp
@@ -8,8 +8,7 @@
 #include <nlohmann/json.hpp>
 
 #include <arbor/simple_sampler.hpp>
-
-#include "util/path.hpp"
+#include <aux/path.hpp>
 
 namespace arb {
 
@@ -47,7 +46,7 @@ public:
 
     void save_trace(const std::string& label, const trace_data<double>& data, const nlohmann::json& meta);
     void save_trace(const std::string& abscissa, const std::string& label, const trace_data<double>& data, const nlohmann::json& meta);
-    std::map<std::string, trace_data<double>> load_traces(const util::path& name);
+    std::map<std::string, trace_data<double>> load_traces(const aux::path& name);
 
     // common flags, options set by driver
 
@@ -63,9 +62,9 @@ public:
     void set_sample_dt(float dt) { sample_dt_ = dt; }
     float sample_dt() const { return sample_dt_; }
 
-    void set_datadir(const util::path& dir) { datadir_ = dir; }
+    void set_datadir(const aux::path& dir) { datadir_ = dir; }
 
-    void set_output(const util::path& file) {
+    void set_output(const aux::path& file) {
         out_.open(file);
         if (!out_) {
             throw std::runtime_error("unable to open file for writing");
@@ -81,7 +80,7 @@ public:
     }
 
 private:
-    util::path datadir_;
+    aux::path datadir_;
     std::ofstream out_;
     nlohmann::json jtraces_ = nlohmann::json::array();
     bool verbose_flag_ = false;
@@ -94,7 +93,7 @@ private:
     // starting with ARB_DATADIR preprocessor define if defined and
     // if the directory exists, or else try './validation/data'
     // and '../validation/data'.
-    static util::path find_datadir();
+    static aux::path find_datadir();
 };
 
 extern trace_io g_trace_io;
-- 
GitLab