diff --git a/.gitmodules b/.gitmodules
index 5749e2a293886fb7409b37f01b3fb1457d81a8a8..3809b6b00184ce2fb4b50a50ff56176661ff24d9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,9 @@
-[submodule "tests/ubench/google-benchmark"]
-	path = tests/ubench/google-benchmark
+[submodule "google-benchmark"]
+	path = ext/google-benchmark
 	url = https://github.com/google/benchmark
-[submodule "doc/rtd_theme"]
-	path = doc/rtd_theme
+[submodule "sphinx_rtd_theme"]
+	path = ext/sphinx_rtd_theme
 	url = https://github.com/rtfd/sphinx_rtd_theme.git
 [submodule "tbb"]
-	path = tbb
+	path = ext/tbb
 	url = https://github.com/wjakob/tbb.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ba3f0cc3366589d54b048085aa0221fb0a040a57..54e0e212f9ae0dfec96ece4b22eaf596e0905ba7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,313 +1,263 @@
-cmake_minimum_required(VERSION 3.0)
+# 3.9 requirement for CUDA language support.
+cmake_minimum_required(VERSION 3.9)
 
-# project info
-project(arbor)
+project(arbor VERSION 0.1)
 enable_language(CXX)
 
-# Hide warnings about mixing old and new signatures for target_link_libraries.
-# These can't be avoided, because the FindCUDA packed provided by CMake before
-# version 3.9 uses the old signature, while other packages use the new signature.
-if ("${CMAKE_VERSION}" MATCHES "^3.[0-9].")
-    cmake_policy(SET CMP0023 OLD)
-endif()
+#----------------------------------------------------------
+# Configure-time build options for Arbor:
+#----------------------------------------------------------
 
-# Set release as the default build type.
-# Otherwise, CMake will default to debug.
-if (NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE release CACHE STRING "Choose the type of build." FORCE)
-  # Set the possible values of build type for cmake-gui
-  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "debug" "release")
-endif()
+# Enable CUDA support with ARB_GPU_MODEL.
 
-# compilation flags
-set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
-include("CompilerOptions")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_DEBUG} ${CXXOPT_CXX11} ${CXXOPT_PTHREAD} ${CXXOPT_WALL}")
+set(ARB_GPU_MODEL "none" CACHE STRING "The target GPU architecture: one of {none,K20,K80,P100}")
+set_property(CACHE ARB_GPU_MODEL PROPERTY STRINGS none K20 K80 P100 )
 
-# this generates a .json file with full compilation command for each file
-set(CMAKE_EXPORT_COMPILE_COMMANDS "YES")
+# Specify target archiecture.
 
-# generated .a and .so go into /lib
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+set(ARB_ARCH "" CACHE STRING "Target architecture for arbor libraries")
 
-#----------------------------------------------------------
-# Option to enable assertions
-#----------------------------------------------------------
-option(ARB_WITH_ASSERTIONS "enable EXPECTS() assertions in code" OFF)
-if(ARB_WITH_ASSERTIONS)
-    add_definitions("-DARB_HAVE_ASSERTIONS")
-endif()
+# Perform explicit vectorization?
 
-#----------------------------------------------------------
-# Option to enable traces
-#----------------------------------------------------------
-option(ARB_WITH_TRACE "enable TRACE() macros in code" OFF)
-if(ARB_WITH_TRACE)
-    add_definitions("-DARB_HAVE_TRACE")
-endif()
+option(ARB_VECTORIZE "use explicit SIMD code in generated mechanisms" OFF)
 
-#----------------------------------------------------------
-# Option to disable auto running of modcc compiler
-#----------------------------------------------------------
-option(ARB_AUTO_RUN_MODCC_ON_CHANGES
-  "Rerun modcc compiler whenever *.mod file or modcc compiler change" ON)
+# Use in-tree TBB?
 
-#----------------------------------------------------------
-# prepare list of libraries/includes needed by external libs
-#----------------------------------------------------------
-set(EXTERNAL_LIBRARIES "")
-set(EXTERNAL_INCLUDES "")
+option(ARB_PRIVATE_TBBLIB "build and link against in-tree TBB build" OFF)
 
-#----------------------------------------------------------
-# list of libraries built inside Arbor
-#----------------------------------------------------------
-set(ARB_LIBRARIES "")
+# Use externally built modcc?
+
+set(ARB_MODCC "" CACHE STRING "path to external modcc NMODL compiler")
+
+# Generate validation data for validation tests?
+
+option(ARB_BUILD_VALIDATION_DATA "generate validation data" OFF)
+
+# Where to generate and find validation data?
+
+set(ARB_VALIDATION_DATA_DIR "${PROJECT_SOURCE_DIR}/validation/data" CACHE PATH
+  "location of generated validation data")
 
 #----------------------------------------------------------
-# Threading model selection
+# Configure-time features for Arbor:
 #----------------------------------------------------------
+
 set(ARB_THREADING_MODEL "cthread" CACHE STRING "set the threading model, one of cthread/tbb/serial")
 set_property(CACHE ARB_THREADING_MODEL PROPERTY STRINGS cthread tbb serial )
 
-if(ARB_THREADING_MODEL MATCHES "tbb")
-    include(GitSubmodule) # required for check_git_submodule
-    check_git_submodule(tbb "${CMAKE_SOURCE_DIR}/tbb")
-    if(NOT tbb_avail)
-        message(FATAL_ERROR "TBB git submodule required.")
-    endif()
+option(ARB_WITH_MPI "build with MPI support" OFF)
 
-    # turn off proxy malloc library and test compilation
-    option(TBB_BUILD_TBBMALLOC_PROXY OFF)
-    option(TBB_BUILD_TESTS           OFF)
+option(ARB_WITH_PROFILING "use built-in profiling" OFF)
 
-    # Use the CMake configuration in the TBB submodule
-    # Reset the CXX flags, to avoid the litany of warnings that -Wall
-    # gives when compiling TBB.
-    set(CMAKE_CXX_FLAGS_TBB_BACK "${CMAKE_CXX_FLAGS}")
-    unset(CMAKE_CXX_FLAGS)
-    add_subdirectory(tbb)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_TBB_BACK}")
+option(ARB_WITH_ASSERTIONS "enable arb_assert() assertions in code" OFF)
 
-    # set flags for TBB
-    add_definitions(-DARB_HAVE_TBB)
-    set(ARB_WITH_TBB TRUE)
+#----------------------------------------------------------
+# Global CMake configuration
+#----------------------------------------------------------
 
-    include_directories(${CMAKE_SOURCE_DIR}/tbb/include)
+# Use GNU standard installation path conventions.
+include(GNUInstallDirs)
 
-    list(APPEND EXTERNAL_LIBRARIES tbb_static tbbmalloc_static)
-    list(APPEND EXTERNAL_LIBRARIES pthread dl)
+# Include own CMake modules in search path, load common modules.
 
-    # TODO add support for user-supplied tbb implementation,
-    # e.g. using an TBB_ROOT variable.
+set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
+include(GitSubmodule) # required for check_git_submodule
+include(ErrorTarget)  # reguired for add_error_target
+include(FindThreadsCudaFix) # bug work around
 
-elseif(ARB_THREADING_MODEL MATCHES "cthread")
-    find_package(Threads REQUIRED)
-    add_definitions(-DARB_HAVE_CTHREAD)
-    set(ARB_WITH_CTHREAD TRUE)
-    list(APPEND EXTERNAL_LIBRARIES ${CMAKE_THREAD_LIBS_INIT})
+# Set release as the default build type (CMake default is debug.)
 
-    if(CMAKE_USE_PTHREADS_INIT)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
-    endif()
+if (NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE release CACHE STRING "Choose the type of build." FORCE)
+    # Set the possible values of build type for cmake-gui
+    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "debug" "release")
 endif()
 
-#----------------------------------------------------------
-# libunwind for pretty printing stack traces
-#----------------------------------------------------------
-find_package(Unwind)
-if(UNWIND_FOUND)
-    add_definitions(-DWITH_UNWIND)
-    include_directories(${UNWIND_INCLUDE_DIR})
-    list(APPEND EXTERNAL_LIBRARIES ${UNWIND_LIBRARIES})
+# When we find threads, prefer -pthread option.
+set(THREADS_PREFER_PTHREAD_FLAG ON)
+
+# Add CUDA as a language if GPU support requested.
+# (This has to be set early so as to enable CUDA tests in generator
+# expressions.)
+if(NOT ARB_GPU_MODEL MATCHES "none")
+    enable_language(CUDA)
 endif()
 
+# Build paths.
+
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+# Generate a .json file with full compilation command for each file.
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS "YES")
+
+# Compiler options common to library, examples, tests, etc.
+
+include("CompilerOptions")
+add_compile_options(
+    "$<$<COMPILE_LANGUAGE:CXX>:${CXXOPT_DEBUG}>"
+    "$<$<COMPILE_LANGUAGE:CXX>:${CXXOPT_WALL}>")
+set(CMAKE_CXX_STANDARD 11)
+
 #----------------------------------------------------------
-# CUDA support
+# Set up flags and dependencies:
 #----------------------------------------------------------
-set(ARB_GPU_MODEL "none" CACHE STRING "The target GPU architecture: one of {none,K20,K80,P100}")
-set_property(CACHE ARB_GPU_MODEL PROPERTY STRINGS none K20 K80 P100 )
 
-set(ARB_WITH_CUDA FALSE)
+# Interface library `arbor-private-deps` collects dependencies, options etc.
+# for the arbor library.
 
-if(NOT ARB_GPU_MODEL MATCHES "none")
-    find_package(CUDA REQUIRED)
+add_library(arbor-private-deps INTERFACE)
 
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}
-        -Xcudafe --diag_suppress=integer_sign_change
-        -Xcudafe --diag_suppress=unsigned_compare_with_zero)
+# Interface library `arbor-public-deps` collects requirements for the
+# users of the arbor library (e.g. tbb, mpi) that will become part
+# of arbor's PUBLIC interface.
 
-    set(ARB_WITH_CUDA TRUE)
-    add_definitions(-DARB_HAVE_GPU)
-    include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
-    list(APPEND EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
+add_library(arbor-public-deps INTERFACE)
+
+# External libraries in `ext` sub-directory: json, tclap and tbb.
+# Creates interface libraries `ext-json`, `ext-tclap` and `ext-tbb`.
+
+add_subdirectory(ext)
+
+# Auxilary/helper utiliies in `aux` are common across test executables
+# and examples. Creates interface library `arbor-aux`.
+
+add_subdirectory(aux)
+
+# Target microarchitecture for building arbor libraries, tests and examples
+#---------------------------------------------------------------------------
+if(ARB_ARCH)
+    set_arch_target(ARB_CXXOPT_ARCH "${ARB_ARCH}")
+    target_compile_options(arbor-private-deps INTERFACE ${ARB_CXXOPT_ARCH})
 endif()
 
-if(ARB_GPU_MODEL MATCHES "K20")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_35)
-    add_definitions(-DARB_CUDA_ARCH=350)
-elseif(ARB_GPU_MODEL MATCHES "K80")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_37)
-    add_definitions(-DARB_CUDA_ARCH=370)
-elseif(ARB_GPU_MODEL MATCHES "P100")
-    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_60)
-    add_definitions(-DARB_CUDA_ARCH=600)
-elseif(NOT ARB_GPU_MODEL MATCHES "none")
-    message( FATAL_ERROR "-- GPU architecture '${ARB_GPU_MODEL}' not supported. Use one of {none, K20, K80, P100}")
+# Profiling and test features
+#-----------------------------
+
+if(ARB_WITH_PROFILING)
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_PROFILING)
+endif()
+if(ARB_WITH_ASSERTIONS)
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_ASSERTIONS)
 endif()
 
-#----------------------------------------------------------
-# Cray/BGQ/Generic Linux/other flag?
-#----------------------------------------------------------
-set(ARB_SYSTEM_TYPE "Generic" CACHE STRING
-    "Choose a system type to customize flags")
-set_property(CACHE ARB_SYSTEM_TYPE PROPERTY STRINGS Generic Cray BGQ )
-
-# Cray specific flags
-if(${ARB_SYSTEM_TYPE} MATCHES "Cray")
-    # we no longer set the -dynamic flag for the compilere here, instead dynamic
-    # linking should be enabled by the user by setting the environment variable:
-    #   CRAYPE_LINK_TYPE=dynamic
-    #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -dynamic")
-    add_definitions(-DARB_HAVE_CRAY)
+# Threading model
+#-----------------
+
+if(ARB_THREADING_MODEL MATCHES "tbb")
+    set(ARB_WITH_TBB TRUE)
+    target_link_libraries(arbor-public-deps INTERFACE ext-tbb)
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_TBB)
+elseif(ARB_THREADING_MODEL MATCHES "cthread")
+    set(ARB_WITH_CTHREAD TRUE)
+    find_package(Threads REQUIRED)
+    find_threads_cuda_fix()
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_CTHREAD)
+    target_link_libraries(arbor-private-deps INTERFACE Threads::Threads)
 endif()
 
-#----------------------------------------------------------
 # MPI support
-#----------------------------------------------------------
-option(ARB_WITH_MPI "build with support for MPI" OFF)
+#-------------------
 
 if(ARB_WITH_MPI)
-   # BGQ specific flags
-   if(${ARB_SYSTEM_TYPE} MATCHES "BGQ" )
-      # On BGQ, set CXX to the mpi wrapper, and pass it a static
-      add_definitions(-DMPICH2_CONST=const)
-      set(MPI_FOUND TRUE)
-    endif()
+    find_package(MPI REQUIRED CXX)
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_MPI)
 
-    if (NOT MPI_FOUND)
-      find_package(MPI REQUIRED)
-    endif()
-    include_directories(SYSTEM ${MPI_C_INCLUDE_PATH})
-    add_definitions(-DARB_HAVE_MPI)
+    # target_compile_definitions(MPI::MPI_CXX INTERFACE MPICH_SKIP_MPICXX=1 OMPI_SKIP_MPICXX=1)
+    # target_link_libraries(arbor-public-deps INTERFACE MPI::MPI_CXX)
+
+    # CMake 3.9 does not allow us to add definitions to an import target.
+    # so wrap MPI::MPI_CXX in an interface library 'mpi-wrap' instead.
 
-    # unfortunate workaround for C++ detection in system mpi.h
-    add_definitions(-DMPICH_SKIP_MPICXX=1 -DOMPI_SKIP_MPICXX=1)
-    set_property(DIRECTORY APPEND_STRING PROPERTY COMPILE_OPTIONS "${MPI_C_COMPILE_FLAGS}")
+    add_library(mpi-wrap INTERFACE)
+    target_link_libraries(mpi-wrap INTERFACE MPI::MPI_CXX)
+    target_compile_definitions(mpi-wrap INTERFACE MPICH_SKIP_MPICXX=1 OMPI_SKIP_MPICXX=1)
+    target_link_libraries(arbor-public-deps INTERFACE mpi-wrap)
 endif()
 
-#----------------------------------------------------------
-# Built-in profiler
-#----------------------------------------------------------
-option(ARB_WITH_PROFILING "use built-in profiling" OFF)
-if(ARB_WITH_PROFILING)
-    add_definitions(-DARB_HAVE_PROFILING)
+# CUDA support
+#--------------
+
+if(NOT ARB_GPU_MODEL MATCHES "none")
+    set(ARB_WITH_CUDA TRUE)
+
+    add_compile_options(
+        "$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe=--diag_suppress=integer_sign_change>"
+        "$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe=--diag_suppress=unsigned_compare_with_zero>")
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_GPU)
+
+    if(ARB_GPU_MODEL MATCHES "K20")
+        set(cuda_arch sm_35)
+        set(arb_cuda_arch 350)
+    elseif(ARB_GPU_MODEL MATCHES "K80")
+        set(cuda_arch sm_37)
+        set(arb_cuda_arch 370)
+    elseif(ARB_GPU_MODEL MATCHES "P100")
+        set(cuda_arch sm_60)
+        set(arb_cuda_arch 600)
+    else()
+        message(FATAL_ERROR "-- GPU architecture '${ARB_GPU_MODEL}' not supported. Use one of {none, K20, K80, P100}")
+    endif()
+
+    target_compile_options(arbor-private-deps INTERFACE
+        $<$<COMPILE_LANGUAGE:CUDA>:-arch=${cuda_arch}>)
+    target_compile_definitions(arbor-private-deps INTERFACE ARB_CUDA_ARCH=${arb_cuda_arch})
 endif()
 
-#----------------------------------------------------------
-# Modcc vectorization target
-#----------------------------------------------------------
-option(ARB_VECTORIZE "use explicit SIMD code in generated mechanisms" OFF)
+# Use libunwind if available for pretty printing stack traces
+#-------------------------------------------------------------
 
-#----------------------------------------------------------
-# Target microarchitecture for building arbor libraries
-#----------------------------------------------------------
-set(ARB_ARCH "" CACHE STRING "Target architecture for arbor libraries")
-if(ARB_ARCH)
-    # Sets CXXOPT_ARCH variable accordingly:
-    set_arch_target("${ARB_ARCH}")
+find_package(Unwind)
+if(Unwind_FOUND)
+    target_link_libraries(arbor-private-deps INTERFACE Unwind::unwind)
+    target_compile_definitions(arbor-private-deps ARB_WITH_UNWIND)
 endif()
 
-#----------------------------------------------------------
-# Only build modcc if it has not already been installed.
-# This is useful if cross compiling for KNL, when it is not desirable to compile
-# modcc with the same flags that are used for the KNL target.
-#----------------------------------------------------------
-set(use_external_modcc OFF)
-find_program(MODCC_BIN modcc)
-if(MODCC_BIN)
-    set(use_external_modcc ON)
-    set(modcc "${MODCC_BIN}")
+# Build and use modcc unless explicit path given
+#------------------------------------------------
+
+if(ARB_MODCC)
+    find_program(modcc NAMES ${ARB_MODCC} NO_CMAKE_PATH NO_CMAKE_ENVIRONMENT_PATH NO_CMAKE_SYSTEM_PATH)
+    if(NOT modcc)
+        message(FATAL_ERROR "Unable to find modcc executable.")
+    endif()
+    set(ARB_WITH_EXTERNAL_MODCC TRUE)
 else()
     set(modcc $<TARGET_FILE:modcc>)
+    set(ARB_WITH_EXTERNAL_MODCC FALSE)
 endif()
 
-#----------------------------------------------------------
-# Validation data generation
-#----------------------------------------------------------
-# destination directory for generated data
-set(ARB_VALIDATION_DATA_DIR "${PROJECT_SOURCE_DIR}/validation/data" CACHE PATH
-  "location of generated validation data")
 
 #----------------------------------------------------------
-# Whether to build validation data
+# Configure targets in sub-directories.
 #----------------------------------------------------------
-# turn off by default
-option(ARB_BUILD_VALIDATION_DATA "generate validation data" OFF)
-if (ARB_BUILD_VALIDATION_DATA)
-    # Whether to attempt to use julia to build validation data
-    find_program(JULIA_BIN julia)
-    if(JULIA_BIN STREQUAL "JULIA_BIN-NOTFOUND")
-        message(STATUS "julia not found; will not automatically build validation data sets from julia scripts")
-        set(ARB_BUILD_JULIA_VALIDATION_DATA FALSE)
-    else()
-        set(ARB_BUILD_JULIA_VALIDATION_DATA TRUE)
-    endif()
 
-    # Whether to attempt to use nrniv to build validation data
-    # (if we find nrniv, do)
-    find_program(NRNIV_BIN nrniv)
-    if(NRNIV_BIN STREQUAL "NRNIV_BIN-NOTFOUND")
-        message(STATUS "nrniv not found; will not automatically build NEURON validation data sets")
-        set(ARB_BUILD_NRN_VALIDATION_DATA FALSE)
-    else()
-        set(ARB_BUILD_NRN_VALIDATION_DATA TRUE)
-    endif()
-endif()
 
-#----------------------------------------------------------
-# Setup include dirs
-#----------------------------------------------------------
-include_directories(
-    "${PROJECT_SOURCE_DIR}/tclap"
-    "${PROJECT_SOURCE_DIR}/include"
-    "${PROJECT_SOURCE_DIR}/src"
-    "${PROJECT_SOURCE_DIR}/modcc"
-    "${PROJECT_SOURCE_DIR}")
-if(EXTERNAL_INCLUDES)
-  include_directories("${EXTERNAL_INCLUDES}")
-endif()
+# arbor-public-headers:
+add_subdirectory(include)
 
-#----------------------------------------------------------
-# Setup subdirs
-#----------------------------------------------------------
-# only include validation data if flag is set
-if(ARB_BUILD_VALIDATION_DATA)
-    add_subdirectory(validation)
-endif()
+# modcc, libmodcc:
+add_subdirectory(modcc)
 
-# only compile modcc if it is not provided externally
-if(NOT use_external_modcc)
-    add_subdirectory(modcc)
-endif()
+# arbor, arbor-private-headers:
+add_subdirectory(arbor)
 
-#----------------------------------------------------------
-# set up for targets that require git submodules.
-#----------------------------------------------------------
-include(GitSubmodule) # required for check_git_submodule
-include(ErrorTarget)  # reguired for add_error_target
-check_git_submodule(rtdtheme "${PROJECT_SOURCE_DIR}/doc/rtd_theme")
+# unit, unit-mpi, unit-local, unit-modcc, validate
+add_subdirectory(test)
 
-add_subdirectory(src)
-add_subdirectory(mechanisms) # after src path so that gpu_mechanism library is last on link line
-add_subdirectory(tests)
+# miniapp, brunel-minapp, event-gen 
 add_subdirectory(example)
+
+# lmorpho:
 add_subdirectory(lmorpho)
-if (rtdtheme_avail)
-    add_subdirectory(doc)
-else()
-    add_error_target( docs
-        "Generating Sphinx documentation"
-        "The git submodule for read the docs is not available")
+
+# html:
+add_subdirectory(doc)
+
+# validation-data:
+if(ARB_BUILD_VALIDATION_DATA)
+    add_subdirectory(validation) # validation-data
 endif()
 
diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4df243146aabf72871e16aba733d2a271b76c03a
--- /dev/null
+++ b/arbor/CMakeLists.txt
@@ -0,0 +1,102 @@
+# Sources:
+
+set(arbor_sources
+    assert.cpp
+    backends/multicore/mechanism.cpp
+    backends/multicore/shared_state.cpp
+    backends/multicore/stimulus.cpp
+    benchmark_cell_group.cpp
+    builtin_mechanisms.cpp
+    cell_group_factory.cpp
+    common_types_io.cpp
+    cell.cpp
+    event_binner.cpp
+    fvm_layout.cpp
+    fvm_lowered_cell_impl.cpp
+    hardware/affinity.cpp
+    hardware/gpu.cpp
+    hardware/memory.cpp
+    hardware/node_info.cpp
+    hardware/power.cpp
+    io/locked_ostream.cpp
+    io/serialize_hex.cpp
+    lif_cell_group.cpp
+    mc_cell_group.cpp
+    mechcat.cpp
+    memory/cuda_wrappers.cpp
+    memory/util.cpp
+    merge_events.cpp
+    simulation.cpp
+    morphology.cpp
+    partition_load_balance.cpp
+    profile/clock.cpp
+    profile/memory_meter.cpp
+    profile/meter_manager.cpp
+    profile/power_meter.cpp
+    profile/profiler.cpp
+    schedule.cpp
+    spike_source_cell_group.cpp
+    swcio.cpp
+    threadinfo.cpp
+    threading/threading.cpp
+    thread_private_spike_store.cpp
+    util/hostname.cpp
+    util/path.cpp
+    util/unwind.cpp
+    version.cpp
+)
+
+if(ARB_WITH_CUDA)
+    list(APPEND arbor_sources
+        backends/gpu/mechanism.cpp
+        backends/gpu/shared_state.cpp
+        backends/gpu/stimulus.cpp
+        backends/gpu/stimulus.cu
+        backends/gpu/threshold_watcher.cu
+        backends/gpu/managed_ptr.cpp
+        backends/gpu/matrix_assemble.cu
+        backends/gpu/matrix_interleave.cu
+        backends/gpu/matrix_solve.cu
+        backends/gpu/multi_event_stream.cu
+        backends/gpu/shared_state.cu
+        backends/gpu/stimulus.cu
+        backends/gpu/threshold_watcher.cu
+        memory/fill.cu
+    )
+endif()
+
+if(ARB_WITH_MPI)
+    list(APPEND arbor_sources
+        communication/mpi.cpp
+        communication/mpi_error.cpp
+        communication/mpi_context.cpp)
+endif()
+
+if(ARB_WITH_CTHREAD)
+    list(APPEND arbor_sources
+        threading/cthread.cpp)
+endif()
+
+# Add special target for private include directory, for use by arbor target
+# and arbor unit tests. Private headers are also used for the other binaries
+# until the process of splitting our private and public headers is complete.
+
+add_library(arbor-private-headers INTERFACE)
+target_include_directories(arbor-private-headers INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
+
+# Mechanisms, generated from .mod files; sets arbor_mechanism_sources
+# variable, build_all_mods target. Note: CMake source file properties are
+# directory-local.
+
+add_subdirectory(../mechanisms "${CMAKE_BINARY_DIR}/mechanisms")
+set_source_files_properties(${arbor_mechanism_sources} PROPERTIES GENERATED TRUE)
+
+# Library target:
+
+add_library(arbor ${arbor_sources} ${arbor_mechanism_sources})
+add_dependencies(arbor build_all_mods)
+target_link_libraries(arbor PRIVATE arbor-private-deps arbor-private-headers)
+target_link_libraries(arbor PUBLIC arbor-public-deps arbor-public-headers)
+
+install(TARGETS arbor ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
diff --git a/src/algorithms.hpp b/arbor/algorithms.hpp
similarity index 96%
rename from src/algorithms.hpp
rename to arbor/algorithms.hpp
index 43ec3dcbe47388bdcd975574ef48712756786928..b2596d704d7949ffed47d5313ac886d96ede860d 100644
--- a/src/algorithms.hpp
+++ b/arbor/algorithms.hpp
@@ -7,8 +7,9 @@
 #include <type_traits>
 #include <vector>
 
-#include <util/compat.hpp>
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+
+#include <arbor/util/compat.hpp>
 #include <util/meta.hpp>
 #include <util/range.hpp>
 #include <util/rangeutil.hpp>
@@ -193,7 +194,7 @@ std::vector<typename C::value_type> branches(const C& parent_index)
         "integral type required"
     );
 
-    EXPECTS(has_contiguous_compartments(parent_index));
+    arb_assert(has_contiguous_compartments(parent_index));
 
     std::vector<typename C::value_type> branch_index;
     if (parent_index.empty()) {
@@ -292,9 +293,9 @@ std::vector<typename C::value_type> tree_reduce(
         return {};
     }
 
-    EXPECTS(parent_index.size()-branch_index.back() == 0);
-    EXPECTS(has_contiguous_compartments(parent_index));
-    EXPECTS(is_strictly_monotonic_increasing(branch_index));
+    arb_assert(parent_index.size()-branch_index.back() == 0);
+    arb_assert(has_contiguous_compartments(parent_index));
+    arb_assert(is_strictly_monotonic_increasing(branch_index));
 
     // expand the branch index
     auto expanded_branch = expand_branches(branch_index);
diff --git a/arbor/assert.cpp b/arbor/assert.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b17fa3251e1cbc423bf27aecbcd4639a184dc7c5
--- /dev/null
+++ b/arbor/assert.cpp
@@ -0,0 +1,34 @@
+#include <iostream>
+
+#include <arbor/assert.hpp>
+
+#include "util/unwind.hpp"
+
+namespace arb {
+
+void abort_on_failed_assertion(
+    const char* assertion,
+    const char* file,
+    int line,
+    const char* func)
+{
+    // Emit stack trace If libunwind is being used.
+    std::cerr << util::backtrace();
+
+    // Explicit flush, as we can't assume default buffering semantics on stderr/cerr,
+    // and abort() might not flush streams.
+    std::cerr << file << ':' << line << " " << func
+              << ": Assertion `" << assertion << "' failed." << std::endl;
+    std::abort();
+}
+
+void ignore_failed_assertion(
+    const char* assertion,
+    const char* file,
+    int line,
+    const char* func)
+{}
+
+failed_assertion_handler_t global_failed_assertion_handler = abort_on_failed_assertion;
+
+} // namespace arb
diff --git a/src/backends.hpp b/arbor/backends.hpp
similarity index 100%
rename from src/backends.hpp
rename to arbor/backends.hpp
diff --git a/src/backends/builtin_mech_proto.hpp b/arbor/backends/builtin_mech_proto.hpp
similarity index 91%
rename from src/backends/builtin_mech_proto.hpp
rename to arbor/backends/builtin_mech_proto.hpp
index c08f6cd797f33b33450f5b1cadccd20d671b7f80..1fcb292c6574685be1479184a8c93ff09372c06c 100644
--- a/src/backends/builtin_mech_proto.hpp
+++ b/arbor/backends/builtin_mech_proto.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <mechanism.hpp>
-#include <mechinfo.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/mechinfo.hpp>
 
 namespace arb {
 
diff --git a/src/backends/event.hpp b/arbor/backends/event.hpp
similarity index 97%
rename from src/backends/event.hpp
rename to arbor/backends/event.hpp
index f6abfdfaf4fcc038d4f567663bf1485545ee2801..deba95eea283d31e7f852ac30da35eceec5d0ef2 100644
--- a/src/backends/event.hpp
+++ b/arbor/backends/event.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <common_types.hpp>
-#include <backends/fvm_types.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 // Structures for the representation of event delivery targets and
 // staged events.
diff --git a/src/backends/event_delivery.md b/arbor/backends/event_delivery.md
similarity index 100%
rename from src/backends/event_delivery.md
rename to arbor/backends/event_delivery.md
diff --git a/src/backends/gpu/cuda_atomic.hpp b/arbor/backends/gpu/cuda_atomic.hpp
similarity index 100%
rename from src/backends/gpu/cuda_atomic.hpp
rename to arbor/backends/gpu/cuda_atomic.hpp
diff --git a/src/backends/gpu/cuda_common.hpp b/arbor/backends/gpu/cuda_common.hpp
similarity index 100%
rename from src/backends/gpu/cuda_common.hpp
rename to arbor/backends/gpu/cuda_common.hpp
diff --git a/src/backends/gpu/fvm.hpp b/arbor/backends/gpu/fvm.hpp
similarity index 84%
rename from src/backends/gpu/fvm.hpp
rename to arbor/backends/gpu/fvm.hpp
index a68661e86f9a9ebd9d0d2d09a0796388ab401f96..262d9e3cf7bb8aa9c792e9f2b54324d0b73ea6d7 100644
--- a/src/backends/gpu/fvm.hpp
+++ b/arbor/backends/gpu/fvm.hpp
@@ -3,16 +3,16 @@
 #include <map>
 #include <string>
 
-#include <common_types.hpp>
-#include <mechanism.hpp>
-#include <memory/memory.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
+#include "memory/memory.hpp"
+#include "util/rangeutil.hpp"
 
-#include <backends/gpu/gpu_store_types.hpp>
-#include <backends/gpu/shared_state.hpp>
+#include "backends/event.hpp"
+
+#include "backends/gpu/gpu_store_types.hpp"
+#include "backends/gpu/shared_state.hpp"
 
 #include "matrix_state_interleaved.hpp"
 #include "threshold_watcher.hpp"
diff --git a/src/backends/gpu/gpu_store_types.hpp b/arbor/backends/gpu/gpu_store_types.hpp
similarity index 73%
rename from src/backends/gpu/gpu_store_types.hpp
rename to arbor/backends/gpu/gpu_store_types.hpp
index 7eca723aec83b1e644c4acefdc4b8c150da6f16c..ac215ed4370e486cec9cca82617f483d047fd081 100644
--- a/src/backends/gpu/gpu_store_types.hpp
+++ b/arbor/backends/gpu/gpu_store_types.hpp
@@ -5,12 +5,12 @@
 //
 // Defines array, iarray, and specialized multi-event stream classes.
 
-#include <memory/memory.hpp>
+#include <arbor/fvm_types.hpp>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/gpu/multi_event_stream.hpp>
-#include <backends/gpu/multi_event_stream.hpp>
+#include "memory/memory.hpp"
+#include "backends/event.hpp"
+#include "backends/gpu/multi_event_stream.hpp"
+#include "backends/gpu/multi_event_stream.hpp"
 
 namespace arb {
 namespace gpu {
diff --git a/arbor/backends/gpu/managed_ptr.cpp b/arbor/backends/gpu/managed_ptr.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..90cd37a9a6f17181ba6957478f9a19d7635963de
--- /dev/null
+++ b/arbor/backends/gpu/managed_ptr.cpp
@@ -0,0 +1,12 @@
+namespace arb {
+namespace gpu {
+
+// TODO: make this a runtime check
+
+bool device_concurrent_managed_access() {
+    return (ARB_CUDA_ARCH >= 600); // all GPUs from P100
+}
+
+} // namespace gpu
+} // namespace arb
+
diff --git a/src/backends/gpu/managed_ptr.hpp b/arbor/backends/gpu/managed_ptr.hpp
similarity index 77%
rename from src/backends/gpu/managed_ptr.hpp
rename to arbor/backends/gpu/managed_ptr.hpp
index ba0d99deeef8b860efb6987b2222315618e7b6da..9d977662455886aef17a069caf97f9595ce6b674 100644
--- a/src/backends/gpu/managed_ptr.hpp
+++ b/arbor/backends/gpu/managed_ptr.hpp
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <cuda.h>
+#include <cuda_runtime.h>
 
 #include <memory/allocator.hpp>
 
@@ -10,13 +11,14 @@ namespace gpu {
 // Pre-pascal NVIDIA GPUs don't support page faulting for GPU reads of managed
 // memory, so when a kernel is launched, all managed memory is copied to the
 // GPU. The upshot of this is that no CPU-side reads can be made of _any_
-// managed memory can be made whe _any_ kernel is running.  The following helper
-// function can be used to determine whether synchronization is required before
-// CPU-side reads of managed memory.
-constexpr
-bool managed_synch_required() {
-    return (ARB_CUDA_ARCH < 600); // all GPUs before P100
-}
+// managed memory can be made whe _any_ kernel is running.
+//
+// The following helper function can be used to determine whether
+// synchronization is required before CPU-side reads of managed memory: if the
+// device concurrentManagedAccess property is zero, then safe host-side requires
+// a synchronization.
+
+bool device_concurrent_managed_access();
 
 // used to indicate that the type pointed to by the managed_ptr is to be
 // constructed in the managed_ptr constructor
@@ -34,13 +36,15 @@ struct construct_in_place_tag {};
 // instead of directly constructing the managed_ptr.
 template <typename T>
 class managed_ptr {
-    public:
+public:
 
     using element_type = T;
     using pointer = element_type*;
     using reference = element_type&;
 
-    managed_ptr() = default;
+    managed_ptr():
+        concurrent_managed_access(device_concurrent_managed_access())
+    {}
 
     managed_ptr(const managed_ptr& other) = delete;
 
@@ -49,14 +53,18 @@ class managed_ptr {
     // point of the wrapper is to hide the complexity of allocating managed
     // memory and constructing a type in place.
     template <typename... Args>
-    managed_ptr(construct_in_place_tag, Args&&... args) {
+    managed_ptr(construct_in_place_tag, Args&&... args):
+        concurrent_managed_access(device_concurrent_managed_access())
+    {
         memory::managed_allocator<element_type> allocator;
         data_ = allocator.allocate(1u);
         synchronize();
         data_ = new (data_) element_type(std::forward<Args>(args)...);
     }
 
-    managed_ptr(managed_ptr&& other) {
+    managed_ptr(managed_ptr&& other):
+        concurrent_managed_access(other.concurrent_managed_access)
+    {
         std::swap(other.data_, data_);
     }
 
@@ -105,7 +113,15 @@ class managed_ptr {
         cudaDeviceSynchronize();
     }
 
-    private:
+    // Synchronize if concurrent host-side access is not supported.
+    void host_access() const {
+        if (!concurrent_managed_access) {
+            cudaDeviceSynchronize();
+        }
+    }
+
+private:
+    const bool concurrent_managed_access;
 
     __host__ __device__
     bool is_allocated() const {
diff --git a/src/backends/gpu/math.hpp b/arbor/backends/gpu/math_cu.hpp
similarity index 100%
rename from src/backends/gpu/math.hpp
rename to arbor/backends/gpu/math_cu.hpp
diff --git a/src/backends/gpu/matrix_assemble.cu b/arbor/backends/gpu/matrix_assemble.cu
similarity index 99%
rename from src/backends/gpu/matrix_assemble.cu
rename to arbor/backends/gpu/matrix_assemble.cu
index 040724a3d5e8f236e3849c47598cd0340a8a2907..fde63e1acdbc6950f75973a54ca60b129cbc4ab4 100644
--- a/src/backends/gpu/matrix_assemble.cu
+++ b/arbor/backends/gpu/matrix_assemble.cu
@@ -1,4 +1,4 @@
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 #include "cuda_common.hpp"
 #include "matrix_common.hpp"
diff --git a/src/backends/gpu/matrix_common.hpp b/arbor/backends/gpu/matrix_common.hpp
similarity index 100%
rename from src/backends/gpu/matrix_common.hpp
rename to arbor/backends/gpu/matrix_common.hpp
diff --git a/src/backends/gpu/matrix_interleave.cu b/arbor/backends/gpu/matrix_interleave.cu
similarity index 97%
rename from src/backends/gpu/matrix_interleave.cu
rename to arbor/backends/gpu/matrix_interleave.cu
index daad91f87afeb653f3526c2ab7affa37e2a9faed..ae7fbdefb04a49b6b29ea871f7631d97c2bf58e1 100644
--- a/src/backends/gpu/matrix_interleave.cu
+++ b/arbor/backends/gpu/matrix_interleave.cu
@@ -1,4 +1,4 @@
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 #include "matrix_common.hpp"
 #include "matrix_interleave.hpp"
diff --git a/src/backends/gpu/matrix_interleave.hpp b/arbor/backends/gpu/matrix_interleave.hpp
similarity index 100%
rename from src/backends/gpu/matrix_interleave.hpp
rename to arbor/backends/gpu/matrix_interleave.hpp
diff --git a/src/backends/gpu/matrix_solve.cu b/arbor/backends/gpu/matrix_solve.cu
similarity index 98%
rename from src/backends/gpu/matrix_solve.cu
rename to arbor/backends/gpu/matrix_solve.cu
index eaf5724ff9b451c0d50cdd56b885a97b7c0d9d16..1a9ab8c6396ec4a04974d0b7d06179acc2bc7d2b 100644
--- a/src/backends/gpu/matrix_solve.cu
+++ b/arbor/backends/gpu/matrix_solve.cu
@@ -1,8 +1,7 @@
-#include <cassert>
+#include <arbor/fvm_types.hpp>
 
 #include "cuda_common.hpp"
 #include "matrix_common.hpp"
-#include <backends/fvm_types.hpp>
 
 namespace arb {
 namespace gpu {
diff --git a/src/backends/gpu/matrix_state_flat.hpp b/arbor/backends/gpu/matrix_state_flat.hpp
similarity index 90%
rename from src/backends/gpu/matrix_state_flat.hpp
rename to arbor/backends/gpu/matrix_state_flat.hpp
index b65d6efe38ef0b19b42bdd056e53b0d0ccb6f0c3..e4ddc6661aa3f049fce0bdf6e81b380343787e8c 100644
--- a/src/backends/gpu/matrix_state_flat.hpp
+++ b/arbor/backends/gpu/matrix_state_flat.hpp
@@ -1,11 +1,12 @@
 #pragma once
 
-#include <backends/fvm_types.hpp>
-#include <memory/memory.hpp>
-#include <memory/wrappers.hpp>
-#include <util/span.hpp>
-#include <util/partition.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "memory/memory.hpp"
+#include "memory/wrappers.hpp"
+#include "util/span.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 namespace gpu {
@@ -75,11 +76,11 @@ struct matrix_state_flat {
         cv_capacitance(memory::make_const_view(cv_cap)),
         cv_area(memory::make_const_view(area))
     {
-        EXPECTS(cv_cap.size() == size());
-        EXPECTS(face_cond.size() == size());
-        EXPECTS(area.size() == size());
-        EXPECTS(cell_cv_divs.back() == (index_type)size());
-        EXPECTS(cell_cv_divs.size() > 1u);
+        arb_assert(cv_cap.size() == size());
+        arb_assert(face_cond.size() == size());
+        arb_assert(area.size() == size());
+        arb_assert(cell_cv_divs.back() == (index_type)size());
+        arb_assert(cell_cv_divs.size() > 1u);
 
         using memory::make_const_view;
 
diff --git a/src/backends/gpu/matrix_state_interleaved.hpp b/arbor/backends/gpu/matrix_state_interleaved.hpp
similarity index 95%
rename from src/backends/gpu/matrix_state_interleaved.hpp
rename to arbor/backends/gpu/matrix_state_interleaved.hpp
index 7463918cc63a87d80835565c9939bdc57cf591cc..0cb2aac84fde5f55f2158e9415c54f50122fa155 100644
--- a/src/backends/gpu/matrix_state_interleaved.hpp
+++ b/arbor/backends/gpu/matrix_state_interleaved.hpp
@@ -1,13 +1,14 @@
 #pragma once
 
-#include <backends/fvm_types.hpp>
-#include <math.hpp>
-#include <memory/memory.hpp>
-#include <util/debug.hpp>
-#include <util/span.hpp>
-#include <util/partition.hpp>
-#include <util/rangeutil.hpp>
-#include <util/indirect.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "math.hpp"
+#include "memory/memory.hpp"
+#include "util/span.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
+#include "util/indirect.hpp"
 
 #include "cuda_common.hpp"
 #include "matrix_common.hpp"
@@ -145,12 +146,12 @@ struct matrix_state_interleaved {
                  const std::vector<value_type>& face_cond,
                  const std::vector<value_type>& area)
     {
-        EXPECTS(cv_cap.size()    == p.size());
-        EXPECTS(face_cond.size() == p.size());
-        EXPECTS(cell_cv_divs.back()  == (index_type)p.size());
+        arb_assert(cv_cap.size()    == p.size());
+        arb_assert(face_cond.size() == p.size());
+        arb_assert(cell_cv_divs.back()  == (index_type)p.size());
 
         // Just because you never know.
-        EXPECTS(cell_cv_divs.size() <= UINT_MAX);
+        arb_assert(cell_cv_divs.size() <= UINT_MAX);
 
         using util::make_span;
         using util::indirect_view;
diff --git a/src/backends/gpu/mechanism.cpp b/arbor/backends/gpu/mechanism.cpp
similarity index 92%
rename from src/backends/gpu/mechanism.cpp
rename to arbor/backends/gpu/mechanism.cpp
index e050001f532b28c4c7abf4348d468e827dee30b6..8e6d7baeebbc75bb77247f69d1afdccb77dd071f 100644
--- a/src/backends/gpu/mechanism.cpp
+++ b/arbor/backends/gpu/mechanism.cpp
@@ -5,19 +5,20 @@
 #include <utility>
 #include <vector>
 
-#include <common_types.hpp>
-#include <math.hpp>
-#include <mechanism.hpp>
-#include <memory/memory.hpp>
-#include <util/index_into.hpp>
-#include <util/optional.hpp>
-#include <util/maputil.hpp>
-#include <util/range.hpp>
-#include <util/span.hpp>
-
-#include <backends/fvm_types.hpp>
-#include <backends/gpu/mechanism.hpp>
-#include <backends/gpu/fvm.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "math.hpp"
+#include "memory/memory.hpp"
+#include "util/index_into.hpp"
+#include "util/maputil.hpp"
+#include "util/range.hpp"
+#include "util/span.hpp"
+
+#include "backends/gpu/mechanism.hpp"
+#include "backends/gpu/fvm.hpp"
 
 namespace arb {
 namespace gpu {
@@ -50,7 +51,7 @@ memory::const_device_view<T> device_view(const T* ptr, std::size_t n) {
 // pointers. This also involves setting the pointers in the parameter pack,
 // which is used to pass pointers to CUDA kernels.
 
-void mechanism::instantiate(fvm_size_type id,
+void mechanism::instantiate(unsigned id,
                             backend::shared_state& shared,
                             const layout& pos_data)
 {
@@ -126,7 +127,7 @@ void mechanism::instantiate(fvm_size_type id,
     pp->node_index_ = indices_.data();
 
     auto ion_index_tbl = ion_index_table();
-    EXPECTS(num_ions_==ion_index_tbl.size());
+    arb_assert(num_ions_==ion_index_tbl.size());
 
     for (auto i: make_span(0, num_ions_)) {
         util::optional<ion_state&> oion = value_by_key(shared.ion_data, ion_index_tbl[i].first);
diff --git a/src/backends/gpu/mechanism.hpp b/arbor/backends/gpu/mechanism.hpp
similarity index 94%
rename from src/backends/gpu/mechanism.hpp
rename to arbor/backends/gpu/mechanism.hpp
index 3c28116cd282c557d6d887ab7a850b48baf2f0e7..d9da7aa12358fb2fd09ff71ee2c3c9407271adf7 100644
--- a/src/backends/gpu/mechanism.hpp
+++ b/arbor/backends/gpu/mechanism.hpp
@@ -7,13 +7,13 @@
 #include <utility>
 #include <vector>
 
-#include <backends/fvm_types.hpp>
-#include <common_types.hpp>
-#include <mechanism.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/mechanism.hpp>
 
-#include <backends/gpu/fvm.hpp>
-#include <backends/gpu/gpu_store_types.hpp>
-#include <backends/gpu/mechanism_ppack_base.hpp>
+#include "backends/gpu/fvm.hpp"
+#include "backends/gpu/gpu_store_types.hpp"
+#include "backends/gpu/mechanism_ppack_base.hpp"
 
 namespace arb {
 namespace gpu {
diff --git a/src/backends/gpu/mechanism_ppack_base.hpp b/arbor/backends/gpu/mechanism_ppack_base.hpp
similarity index 96%
rename from src/backends/gpu/mechanism_ppack_base.hpp
rename to arbor/backends/gpu/mechanism_ppack_base.hpp
index 5147d7efd168cce8d2141cf83065f5a6e577241f..a867f69da375e985d55eda099d8801df56cb5adc 100644
--- a/src/backends/gpu/mechanism_ppack_base.hpp
+++ b/arbor/backends/gpu/mechanism_ppack_base.hpp
@@ -3,7 +3,7 @@
 // Base class for parameter packs for GPU generated kernels:
 // will be included by .cu generated sources.
 
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 namespace arb {
 namespace gpu {
diff --git a/src/backends/gpu/multi_event_stream.cu b/arbor/backends/gpu/multi_event_stream.cu
similarity index 92%
rename from src/backends/gpu/multi_event_stream.cu
rename to arbor/backends/gpu/multi_event_stream.cu
index 349e9da89c746607a647879c899fee2bd0f5362f..5e4e5cf936a98f8b7b19edc88f747efe384183eb 100644
--- a/src/backends/gpu/multi_event_stream.cu
+++ b/arbor/backends/gpu/multi_event_stream.cu
@@ -1,9 +1,10 @@
-#include <common_types.hpp>
-#include <backends/event.hpp>
-#include <backends/gpu/multi_event_stream.hpp>
-#include <memory/array.hpp>
-#include <memory/copy.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+
+#include "backends/event.hpp"
+#include "backends/gpu/multi_event_stream.hpp"
+#include "memory/array.hpp"
+#include "memory/copy.hpp"
+#include "util/rangeutil.hpp"
 
 #include "cuda_common.hpp"
 
@@ -99,7 +100,7 @@ void multi_event_stream_base::clear() {
 // Designate for processing events `ev` at head of each event stream `i`
 // until `event_time(ev)` > `t_until[i]`.
 void multi_event_stream_base::mark_until_after(const_view t_until) {
-    EXPECTS(n_streams()==util::size(t_until));
+    arb_assert(n_streams()==util::size(t_until));
 
     constexpr int block_dim = 128;
 
@@ -112,7 +113,7 @@ void multi_event_stream_base::mark_until_after(const_view t_until) {
 // Designate for processing events `ev` at head of each event stream `i`
 // while `t_until[i]` > `event_time(ev)`.
 void multi_event_stream_base::mark_until(const_view t_until) {
-    EXPECTS(n_streams()==util::size(t_until));
+    arb_assert(n_streams()==util::size(t_until));
     constexpr int block_dim = 128;
 
     unsigned n = n_stream_;
diff --git a/src/backends/gpu/multi_event_stream.hpp b/arbor/backends/gpu/multi_event_stream.hpp
similarity index 89%
rename from src/backends/gpu/multi_event_stream.hpp
rename to arbor/backends/gpu/multi_event_stream.hpp
index 842c88cd8b501861ba1cff0b194d7bba0c9d65d6..4498b87cb9f208c76f32e5054bf886a48f09594e 100644
--- a/src/backends/gpu/multi_event_stream.hpp
+++ b/arbor/backends/gpu/multi_event_stream.hpp
@@ -2,15 +2,16 @@
 
 // Indexed collection of pop-only event queues --- multicore back-end implementation.
 
-#include <common_types.hpp>
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/multi_event_stream_state.hpp>
-#include <generic_event.hpp>
-#include <memory/array.hpp>
-#include <memory/copy.hpp>
-#include <profiling/profiler.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "backends/event.hpp"
+#include "backends/multi_event_stream_state.hpp"
+#include "generic_event.hpp"
+#include "memory/array.hpp"
+#include "memory/copy.hpp"
+#include "profile/profiler_macro.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 namespace gpu {
@@ -70,7 +71,7 @@ protected:
             throw std::range_error("too many events");
         }
 
-        EXPECTS(util::is_sorted_by(staged, [](const Event& ev) { return event_index(ev); }));
+        arb_assert(util::is_sorted_by(staged, [](const Event& ev) { return event_index(ev); }));
 
         std::size_t n_ev = staged.size();
         tmp_ev_time_.clear();
@@ -92,13 +93,13 @@ protected:
 
             // Within a subrange of events with the same index, events should
             // be sorted by time.
-            EXPECTS(std::is_sorted(&tmp_ev_time_[ev_begin_i], &tmp_ev_time_[ev_i]));
+            arb_assert(std::is_sorted(&tmp_ev_time_[ev_begin_i], &tmp_ev_time_[ev_i]));
             n_nonempty += (tmp_divs_.back()!=ev_i);
             tmp_divs_.push_back(ev_i);
             ev_begin_i = ev_i;
         }
 
-        EXPECTS(tmp_divs_.size()==n_stream_+1);
+        arb_assert(tmp_divs_.size()==n_stream_+1);
         memory::copy(memory::make_view(tmp_divs_)(0,n_stream_), span_begin_);
         memory::copy(memory::make_view(tmp_divs_)(1,n_stream_+1), span_end_);
         memory::copy(span_begin_, mark_);
diff --git a/src/backends/gpu/reduce_by_key.hpp b/arbor/backends/gpu/reduce_by_key.hpp
similarity index 100%
rename from src/backends/gpu/reduce_by_key.hpp
rename to arbor/backends/gpu/reduce_by_key.hpp
diff --git a/src/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp
similarity index 93%
rename from src/backends/gpu/shared_state.cpp
rename to arbor/backends/gpu/shared_state.cpp
index 1f59bcccde25974dcb9a067540d74dfb909d7dcb..d49be673e0e23ce1af65c11e140554c6c6510b9f 100644
--- a/src/backends/gpu/shared_state.cpp
+++ b/arbor/backends/gpu/shared_state.cpp
@@ -1,17 +1,16 @@
 #include <cstddef>
 #include <vector>
 
-#include <constants.hpp>
-#include <ion.hpp>
-#include <memory/wrappers.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/ion.hpp>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/multi_event_stream_state.hpp>
-
-#include <backends/gpu/gpu_store_types.hpp>
-#include <backends/gpu/shared_state.hpp>
+#include "backends/event.hpp"
+#include "backends/gpu/gpu_store_types.hpp"
+#include "backends/gpu/shared_state.hpp"
+#include "backends/multi_event_stream_state.hpp"
+#include "constants.hpp"
+#include "memory/wrappers.hpp"
+#include "util/rangeutil.hpp"
 
 using arb::memory::make_const_view;
 
@@ -70,8 +69,8 @@ ion_state::ion_state(
     default_int_concentration(info.default_int_concentration),
     default_ext_concentration(info.default_ext_concentration)
 {
-    EXPECTS(node_index_.size()==weight_Xi_.size());
-    EXPECTS(node_index_.size()==weight_Xo_.size());
+    arb_assert(node_index_.size()==weight_Xi_.size());
+    arb_assert(node_index_.size()==weight_Xo_.size());
 }
 
 void ion_state::nernst(fvm_value_type temperature_K) {
diff --git a/src/backends/gpu/shared_state.cu b/arbor/backends/gpu/shared_state.cu
similarity index 100%
rename from src/backends/gpu/shared_state.cu
rename to arbor/backends/gpu/shared_state.cu
diff --git a/src/backends/gpu/shared_state.hpp b/arbor/backends/gpu/shared_state.hpp
similarity index 96%
rename from src/backends/gpu/shared_state.hpp
rename to arbor/backends/gpu/shared_state.hpp
index 256198f633ed92b51b63cc097ab62112e8eb513a..6193fc39b7a663e8b800bfd64ed1a066727403b4 100644
--- a/src/backends/gpu/shared_state.hpp
+++ b/arbor/backends/gpu/shared_state.hpp
@@ -5,10 +5,11 @@
 #include <utility>
 #include <vector>
 
-#include <backends/fvm_types.hpp>
-#include <backends/gpu/gpu_store_types.hpp>
-#include <ion.hpp>
-#include <util/enumhash.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/ion.hpp>
+#include <arbor/util/enumhash.hpp>
+
+#include "backends/gpu/gpu_store_types.hpp"
 
 namespace arb {
 namespace gpu {
diff --git a/src/backends/gpu/stack.hpp b/arbor/backends/gpu/stack.hpp
similarity index 87%
rename from src/backends/gpu/stack.hpp
rename to arbor/backends/gpu/stack.hpp
index 2e19d329154dc5420240dd58f4d075d42760f842..53293463d3c6bab02e5fd6bc594247463dc26111 100644
--- a/src/backends/gpu/stack.hpp
+++ b/arbor/backends/gpu/stack.hpp
@@ -2,8 +2,10 @@
 
 #include <algorithm>
 
-#include <backends/gpu/managed_ptr.hpp>
-#include <memory/allocator.hpp>
+#include <arbor/assert.hpp>
+
+#include "backends/gpu/managed_ptr.hpp"
+#include "memory/allocator.hpp"
 #include "stack_storage.hpp"
 
 namespace arb {
@@ -57,11 +59,18 @@ public:
     explicit stack(unsigned capacity): storage_(create_storage(capacity)) {}
 
     ~stack() {
+        storage_.synchronize();
         if (storage_->data) {
             allocator<value_type>().deallocate(storage_->data, storage_->capacity);
         }
     }
 
+    // Perform any required synchronization if concurrent host-side access is not supported.
+    // (Correctness still requires that GPU operations on this stack are complete.)
+    void host_access() const {
+        storage_.host_access();
+    }
+
     void clear() {
         storage_->stores = 0u;
     }
@@ -92,12 +101,12 @@ public:
     }
 
     value_type& operator[](unsigned i) {
-        EXPECTS(i<size());
+        arb_assert(i<size());
         return storage_->data[i];
     }
 
     value_type& operator[](unsigned i) const {
-        EXPECTS(i<size());
+        arb_assert(i<size());
         return storage_->data[i];
     }
 
diff --git a/src/backends/gpu/stack_cu.hpp b/arbor/backends/gpu/stack_cu.hpp
similarity index 100%
rename from src/backends/gpu/stack_cu.hpp
rename to arbor/backends/gpu/stack_cu.hpp
diff --git a/src/backends/gpu/stack_storage.hpp b/arbor/backends/gpu/stack_storage.hpp
similarity index 95%
rename from src/backends/gpu/stack_storage.hpp
rename to arbor/backends/gpu/stack_storage.hpp
index fa586c6ce29b0f94c636072b0c1d58f97fbc361d..412baebeda0d2eb17a33f2e88f3cf42ddcc57e6a 100644
--- a/src/backends/gpu/stack_storage.hpp
+++ b/arbor/backends/gpu/stack_storage.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 namespace arb {
 namespace gpu {
diff --git a/src/backends/gpu/stimulus.cpp b/arbor/backends/gpu/stimulus.cpp
similarity index 89%
rename from src/backends/gpu/stimulus.cpp
rename to arbor/backends/gpu/stimulus.cpp
index b08561e568b1af6ff66dda93200b43c879be9c2f..960beeaf92e44f2f1fed4b8558016b29af09f1cc 100644
--- a/src/backends/gpu/stimulus.cpp
+++ b/arbor/backends/gpu/stimulus.cpp
@@ -1,9 +1,6 @@
-#include <cmath>
-
-#include <backends/builtin_mech_proto.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/gpu/mechanism.hpp>
-#include <backends/gpu/mechanism_ppack_base.hpp>
+#include "backends/builtin_mech_proto.hpp"
+#include "backends/gpu/mechanism.hpp"
+#include "backends/gpu/mechanism_ppack_base.hpp"
 
 #include "stimulus.hpp"
 
diff --git a/src/backends/gpu/stimulus.cu b/arbor/backends/gpu/stimulus.cu
similarity index 96%
rename from src/backends/gpu/stimulus.cu
rename to arbor/backends/gpu/stimulus.cu
index a07f93f2a882184eab68731052b60349e07a87e8..bc8f184684bd5856bb3942e123c424b270a259b3 100644
--- a/src/backends/gpu/stimulus.cu
+++ b/arbor/backends/gpu/stimulus.cu
@@ -1,4 +1,4 @@
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 #include "cuda_atomic.hpp"
 #include "cuda_common.hpp"
diff --git a/src/backends/gpu/stimulus.hpp b/arbor/backends/gpu/stimulus.hpp
similarity index 100%
rename from src/backends/gpu/stimulus.hpp
rename to arbor/backends/gpu/stimulus.hpp
diff --git a/src/backends/gpu/threshold_watcher.cu b/arbor/backends/gpu/threshold_watcher.cu
similarity index 97%
rename from src/backends/gpu/threshold_watcher.cu
rename to arbor/backends/gpu/threshold_watcher.cu
index 734bb192a188a0f2a83cb9b728ae79b71d726614..c18022ee0f8c494f5373cc6db6b5a4e9f1752e79 100644
--- a/src/backends/gpu/threshold_watcher.cu
+++ b/arbor/backends/gpu/threshold_watcher.cu
@@ -1,7 +1,8 @@
 #include <cmath>
 
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
+#include "backends/threshold_crossing.hpp"
 #include "cuda_common.hpp"
 #include "stack_cu.hpp"
 
diff --git a/src/backends/gpu/threshold_watcher.hpp b/arbor/backends/gpu/threshold_watcher.hpp
similarity index 90%
rename from src/backends/gpu/threshold_watcher.hpp
rename to arbor/backends/gpu/threshold_watcher.hpp
index 898bcca825fcbbe0fb90033e18fe251c6ecf62bc..09f5627561018e51a9c4c0201cc2958456842b02 100644
--- a/src/backends/gpu/threshold_watcher.hpp
+++ b/arbor/backends/gpu/threshold_watcher.hpp
@@ -1,14 +1,15 @@
 #pragma once
 
-#include <common_types.hpp>
-#include <memory/memory.hpp>
-#include <util/span.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
 
-#include <backends/fvm_types.hpp>
+#include "memory/memory.hpp"
+#include "util/span.hpp"
 
-#include <backends/gpu/gpu_store_types.hpp>
-#include <backends/gpu/managed_ptr.hpp>
-#include <backends/gpu/stack.hpp>
+#include "backends/threshold_crossing.hpp"
+#include "backends/gpu/gpu_store_types.hpp"
+#include "backends/gpu/managed_ptr.hpp"
+#include "backends/gpu/stack.hpp"
 
 #include "stack.hpp"
 
@@ -64,9 +65,7 @@ public:
 
     /// Remove all stored crossings that were detected in previous calls to test()
     void clear_crossings() {
-        if (managed_synch_required()) {
-            cudaDeviceSynchronize();
-        }
+        stack_.host_access();
         stack_.clear();
     }
 
@@ -86,6 +85,8 @@ public:
     }
 
     const std::vector<threshold_crossing>& crossings() const {
+        stack_.host_access();
+
         if (stack_.overflow()) {
             throw std::runtime_error("GPU spike buffer overflow.");
         }
@@ -111,11 +112,11 @@ public:
             // Check that the number of spikes has not exceeded capacity.
             // ATTENTION: requires cudaDeviceSynchronize to avoid simultaneous
             // host-device managed memory access.
-            EXPECTS((cudaDeviceSynchronize(), !stack_.overflow()));
+            arb_assert((cudaDeviceSynchronize(), !stack_.overflow()));
         }
     }
 
-    /// the number of threashold values that are being monitored
+    /// the number of threshold values that are being monitored
     std::size_t size() const {
         return cv_index_.size();
     }
diff --git a/src/backends/matrix_storage.md b/arbor/backends/matrix_storage.md
similarity index 100%
rename from src/backends/matrix_storage.md
rename to arbor/backends/matrix_storage.md
diff --git a/src/backends/multi_event_stream_state.hpp b/arbor/backends/multi_event_stream_state.hpp
similarity index 96%
rename from src/backends/multi_event_stream_state.hpp
rename to arbor/backends/multi_event_stream_state.hpp
index 303d4059e5079552218fa3edf2b651c364d29e6d..5331cd9e3dae13ec76ff9a9a92654f8dd6e44b0e 100644
--- a/src/backends/multi_event_stream_state.hpp
+++ b/arbor/backends/multi_event_stream_state.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <backends/fvm_types.hpp>
+#include <arbor/fvm_types.hpp>
 
 // Pointer representation of multi-event stream marked event state,
 // common across CPU and GPU backends.
diff --git a/src/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp
similarity index 79%
rename from src/backends/multicore/fvm.hpp
rename to arbor/backends/multicore/fvm.hpp
index 6dbb7a209c0228c4ebca707b8d82ae19d02e92bd..c431876301990afae4b1c09eb22d94d8e1f48147 100644
--- a/src/backends/multicore/fvm.hpp
+++ b/arbor/backends/multicore/fvm.hpp
@@ -3,16 +3,15 @@
 #include <string>
 #include <vector>
 
-#include <backends/event.hpp>
-#include <util/padded_alloc.hpp>
-#include <util/range.hpp>
-#include <util/rangeutil.hpp>
-
-#include <backends/multicore/matrix_state.hpp>
-#include <backends/multicore/multi_event_stream.hpp>
-#include <backends/multicore/multicore_common.hpp>
-#include <backends/multicore/shared_state.hpp>
-#include <backends/multicore/threshold_watcher.hpp>
+#include "backends/event.hpp"
+#include "backends/multicore/matrix_state.hpp"
+#include "backends/multicore/multi_event_stream.hpp"
+#include "backends/multicore/multicore_common.hpp"
+#include "backends/multicore/shared_state.hpp"
+#include "backends/multicore/threshold_watcher.hpp"
+#include "util/padded_alloc.hpp"
+#include "util/range.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 namespace multicore {
diff --git a/src/backends/multicore/matrix_state.hpp b/arbor/backends/multicore/matrix_state.hpp
similarity index 96%
rename from src/backends/multicore/matrix_state.hpp
rename to arbor/backends/multicore/matrix_state.hpp
index 67976ee401827e15d2b4afcc79f12840453d1612..a4d318aa82f805ab9a369d99d1e55dbb15ca0211 100644
--- a/src/backends/multicore/matrix_state.hpp
+++ b/arbor/backends/multicore/matrix_state.hpp
@@ -46,9 +46,9 @@ public:
         face_conductance(cond.begin(), cond.end()),
         cv_area(area.begin(), area.end())
     {
-        EXPECTS(cap.size() == size());
-        EXPECTS(cond.size() == size());
-        EXPECTS(cell_cv_divs.back() == (index_type)size());
+        arb_assert(cap.size() == size());
+        arb_assert(cond.size() == size());
+        arb_assert(cell_cv_divs.back() == (index_type)size());
 
         auto n = size();
         invariant_d = array(n, 0);
diff --git a/src/backends/multicore/mechanism.cpp b/arbor/backends/multicore/mechanism.cpp
similarity index 90%
rename from src/backends/multicore/mechanism.cpp
rename to arbor/backends/multicore/mechanism.cpp
index b0fc076f9afba368939e2c370af4bddf71471ef1..3fbb4fb1a81238be8531061c980d962208387fb8 100644
--- a/src/backends/multicore/mechanism.cpp
+++ b/arbor/backends/multicore/mechanism.cpp
@@ -5,21 +5,21 @@
 #include <utility>
 #include <vector>
 
-#include <backends/fvm_types.hpp>
-#include <common_types.hpp>
-
-#include <math.hpp>
-#include <mechanism.hpp>
-#include <util/index_into.hpp>
-#include <util/optional.hpp>
-#include <util/maputil.hpp>
-#include <util/padded_alloc.hpp>
-#include <util/range.hpp>
-
-#include <backends/multicore/mechanism.hpp>
-#include <backends/multicore/multicore_common.hpp>
-#include <backends/multicore/fvm.hpp>
-#include <backends/multicore/partition_by_constraint.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "math.hpp"
+#include "util/index_into.hpp"
+#include "util/maputil.hpp"
+#include "util/padded_alloc.hpp"
+#include "util/range.hpp"
+
+#include "backends/multicore/mechanism.hpp"
+#include "backends/multicore/multicore_common.hpp"
+#include "backends/multicore/fvm.hpp"
+#include "backends/multicore/partition_by_constraint.hpp"
 
 namespace arb {
 namespace multicore {
@@ -61,7 +61,7 @@ void copy_extend(const Source& source, Dest&& dest, const Fill& fill) {
 // these past-the-end values are given a weight of zero, and any corresponding
 // indices into shared state point to the last valid slot.
 
-void mechanism::instantiate(fvm_size_type id, backend::shared_state& shared, const layout& pos_data) {
+void mechanism::instantiate(unsigned id, backend::shared_state& shared, const layout& pos_data) {
     using util::make_range;
 
     util::padded_allocator<> pad(shared.alignment);
@@ -146,7 +146,7 @@ void mechanism::instantiate(fvm_size_type id, backend::shared_state& shared, con
         ion_index = iarray(width_padded_, pad);
         copy_extend(indices, ion_index, util::back(indices));
 
-        EXPECTS(compatible_index_constraints(node_index_, ion_index, simd_width));
+        arb_assert(compatible_index_constraints(node_index_, ion_index, simd_width));
     }
 
 }
diff --git a/src/backends/multicore/mechanism.hpp b/arbor/backends/multicore/mechanism.hpp
similarity index 94%
rename from src/backends/multicore/mechanism.hpp
rename to arbor/backends/multicore/mechanism.hpp
index c03bbb75812cc08f4d688b07e367f9b9f31bd5f7..8724d8aa03704f1ebea7f506a8659b9eb17c86c7 100644
--- a/src/backends/multicore/mechanism.hpp
+++ b/arbor/backends/multicore/mechanism.hpp
@@ -7,13 +7,14 @@
 #include <utility>
 #include <vector>
 
-#include <backends/fvm_types.hpp>
-#include <common_types.hpp>
-#include <mechanism.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/mechanism.hpp>
+
+#include "backends/multicore/multicore_common.hpp"
+#include "backends/multicore/partition_by_constraint.hpp"
+#include "backends/multicore/fvm.hpp"
 
-#include <backends/multicore/multicore_common.hpp>
-#include <backends/multicore/partition_by_constraint.hpp>
-#include <backends/multicore/fvm.hpp>
 
 namespace arb {
 namespace multicore {
diff --git a/src/backends/multicore/multi_event_stream.hpp b/arbor/backends/multicore/multi_event_stream.hpp
similarity index 88%
rename from src/backends/multicore/multi_event_stream.hpp
rename to arbor/backends/multicore/multi_event_stream.hpp
index 0b188c63b7f82e24c5fb1374dc9dc5af0653512a..2a761b8c7cb6d218c10eee7f0b354bd1669fbade 100644
--- a/src/backends/multicore/multi_event_stream.hpp
+++ b/arbor/backends/multicore/multi_event_stream.hpp
@@ -6,15 +6,16 @@
 #include <ostream>
 #include <utility>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/multi_event_stream_state.hpp>
-#include <generic_event.hpp>
-#include <algorithms.hpp>
-#include <util/debug.hpp>
-#include <util/range.hpp>
-#include <util/rangeutil.hpp>
-#include <util/strprintf.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "backends/event.hpp"
+#include "backends/multi_event_stream_state.hpp"
+#include "generic_event.hpp"
+#include "algorithms.hpp"
+#include "util/range.hpp"
+#include "util/rangeutil.hpp"
+#include "util/strprintf.hpp"
 
 namespace arb {
 namespace multicore {
@@ -61,7 +62,7 @@ public:
         }
 
         // Sort by index (staged events should already be time-sorted).
-        EXPECTS(util::is_sorted_by(staged, [](const Event& ev) { return event_time(ev); }));
+        arb_assert(util::is_sorted_by(staged, [](const Event& ev) { return event_time(ev); }));
         util::stable_sort_by(staged, [](const Event& ev) { return event_index(ev); });
 
         std::size_t n_ev = staged.size();
@@ -69,9 +70,9 @@ public:
         util::assign_by(ev_time_, staged, [](const Event& ev) { return event_time(ev); });
 
         // Determine divisions by `event_index` in ev list.
-        EXPECTS(n_streams() == span_begin_.size());
-        EXPECTS(n_streams() == span_end_.size());
-        EXPECTS(n_streams() == mark_.size());
+        arb_assert(n_streams() == span_begin_.size());
+        arb_assert(n_streams() == span_end_.size());
+        arb_assert(n_streams() == mark_.size());
 
         index_type ev_begin_i = 0;
         index_type ev_i = 0;
@@ -80,7 +81,7 @@ public:
 
             // Within a subrange of events with the same index, events should
             // be sorted by time.
-            EXPECTS(std::is_sorted(&ev_time_[ev_begin_i], &ev_time_[ev_i]));
+            arb_assert(std::is_sorted(&ev_time_[ev_begin_i], &ev_time_[ev_i]));
             mark_[s] = ev_begin_i;
             span_begin_[s] = ev_begin_i;
             span_end_[s] = ev_i;
@@ -96,7 +97,7 @@ public:
     void mark_until_after(const TimeSeq& t_until) {
         using ::arb::event_time;
 
-        EXPECTS(n_streams()==util::size(t_until));
+        arb_assert(n_streams()==util::size(t_until));
 
         // note: operation on each `i` is independent.
         for (size_type i = 0; i<n_streams(); ++i) {
@@ -117,7 +118,7 @@ public:
     void mark_until(const TimeSeq& t_until) {
         using ::arb::event_time;
 
-        EXPECTS(n_streams()==util::size(t_until));
+        arb_assert(n_streams()==util::size(t_until));
 
         // note: operation on each `i` is independent.
         for (size_type i = 0; i<n_streams(); ++i) {
diff --git a/src/backends/multicore/multicore_common.hpp b/arbor/backends/multicore/multicore_common.hpp
similarity index 82%
rename from src/backends/multicore/multicore_common.hpp
rename to arbor/backends/multicore/multicore_common.hpp
index dfb75c43c8d150191c0d7f8e49891868b50d8a87..f2ef70ec1fa45cac19a793b5b67b682b4a624c5e 100644
--- a/src/backends/multicore/multicore_common.hpp
+++ b/arbor/backends/multicore/multicore_common.hpp
@@ -8,11 +8,12 @@
 #include <utility>
 #include <vector>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <math.hpp>
-#include <simd/simd.hpp>
-#include <util/padded_alloc.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "backends/event.hpp"
+#include "math.hpp"
+#include "simd/simd.hpp"
+#include "util/padded_alloc.hpp"
 
 #include "multi_event_stream.hpp"
 
diff --git a/src/backends/multicore/partition_by_constraint.hpp b/arbor/backends/multicore/partition_by_constraint.hpp
similarity index 100%
rename from src/backends/multicore/partition_by_constraint.hpp
rename to arbor/backends/multicore/partition_by_constraint.hpp
diff --git a/src/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
similarity index 94%
rename from src/backends/multicore/shared_state.cpp
rename to arbor/backends/multicore/shared_state.cpp
index bc02cc9cc31bde72ee480c97c56c4e91152eab64..008481997d03157d789ef4d70afb2ec83310a2e8 100644
--- a/src/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -5,17 +5,18 @@
 #include <utility>
 #include <vector>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <common_types.hpp>
-#include <constants.hpp>
-#include <ion.hpp>
-#include <math.hpp>
-#include <simd/simd.hpp>
-#include <util/padded_alloc.hpp>
-#include <util/rangeutil.hpp>
-
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/ion.hpp>
+
+#include "backends/event.hpp"
+#include "constants.hpp"
+#include "io/sepval.hpp"
+#include "math.hpp"
+#include "simd/simd.hpp"
+#include "util/padded_alloc.hpp"
+#include "util/rangeutil.hpp"
 
 #include "multi_event_stream.hpp"
 #include "multicore_common.hpp"
@@ -63,8 +64,8 @@ ion_state::ion_state(
     default_int_concentration(info.default_int_concentration),
     default_ext_concentration(info.default_ext_concentration)
 {
-    EXPECTS(node_index_.size()==weight_Xi_.size());
-    EXPECTS(node_index_.size()==weight_Xo_.size());
+    arb_assert(node_index_.size()==weight_Xi_.size());
+    arb_assert(node_index_.size()==weight_Xo_.size());
 }
 
 void ion_state::nernst(fvm_value_type temperature_K) {
@@ -231,7 +232,7 @@ void shared_state::take_samples(
 
 // (Debug interface only.)
 std::ostream& operator<<(std::ostream& out, const shared_state& s) {
-    using util::csv;
+    using io::csv;
 
     out << "n_cell     " << s.n_cell << "\n";
     out << "n_cv       " << s.n_cv << "\n";
diff --git a/src/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp
similarity index 93%
rename from src/backends/multicore/shared_state.hpp
rename to arbor/backends/multicore/shared_state.hpp
index a95605c54a8fd185b286945454e7da0532814294..3188dadee9dba85f4a5c0333bf6b5750e240b606 100644
--- a/src/backends/multicore/shared_state.hpp
+++ b/arbor/backends/multicore/shared_state.hpp
@@ -7,19 +7,19 @@
 #include <utility>
 #include <vector>
 
-#include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
-#include <common_types.hpp>
-#include <constants.hpp>
-#include <event_queue.hpp>
-#include <ion.hpp>
-#include <math.hpp>
-#include <simd/simd.hpp>
-#include <util/enumhash.hpp>
-#include <util/padded_alloc.hpp>
-#include <util/rangeutil.hpp>
-
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/ion.hpp>
+#include <arbor/util/enumhash.hpp>
+
+#include "backends/event.hpp"
+#include "constants.hpp"
+#include "event_queue.hpp"
+#include "math.hpp"
+#include "simd/simd.hpp"
+#include "util/padded_alloc.hpp"
+#include "util/rangeutil.hpp"
 
 #include "matrix_state.hpp"
 #include "multi_event_stream.hpp"
diff --git a/src/backends/multicore/stimulus.cpp b/arbor/backends/multicore/stimulus.cpp
similarity index 93%
rename from src/backends/multicore/stimulus.cpp
rename to arbor/backends/multicore/stimulus.cpp
index 8f072a11df73a966c20a3cedc58d29629447fe90..11f6bdf3a792d354c347623c59b254a58b1a0510 100644
--- a/src/backends/multicore/stimulus.cpp
+++ b/arbor/backends/multicore/stimulus.cpp
@@ -1,8 +1,9 @@
 #include <cmath>
 
-#include <backends/builtin_mech_proto.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/multicore/mechanism.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "backends/builtin_mech_proto.hpp"
+#include "backends/multicore/mechanism.hpp"
 
 namespace arb {
 
diff --git a/src/backends/multicore/threshold_watcher.hpp b/arbor/backends/multicore/threshold_watcher.hpp
similarity index 95%
rename from src/backends/multicore/threshold_watcher.hpp
rename to arbor/backends/multicore/threshold_watcher.hpp
index 5a000385c72009a908581b8654a1c28bd9116b85..cc5b741691b14ea63fbf66e90f072ae816dafd7f 100644
--- a/src/backends/multicore/threshold_watcher.hpp
+++ b/arbor/backends/multicore/threshold_watcher.hpp
@@ -1,8 +1,10 @@
 #pragma once
 
-#include <backends/fvm_types.hpp>
-#include <math.hpp>
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "backends/threshold_crossing.hpp"
+#include "math.hpp"
 
 #include "multicore_common.hpp"
 
@@ -31,7 +33,7 @@ public:
         thresholds_(thresholds),
         v_prev_(values_, values_+n_cv_)
     {
-        EXPECTS(n_cv_==thresholds.size());
+        arb_assert(n_cv_==thresholds.size());
         reset();
     }
 
diff --git a/src/backends/fvm_types.hpp b/arbor/backends/threshold_crossing.hpp
similarity index 57%
rename from src/backends/fvm_types.hpp
rename to arbor/backends/threshold_crossing.hpp
index fdb8fdb794bfb06d60267f2b42c699e5becbb5d1..3e5829d6552f465a439dd4be85d8095139d8b448 100644
--- a/src/backends/fvm_types.hpp
+++ b/arbor/backends/threshold_crossing.hpp
@@ -1,16 +1,10 @@
 #pragma once
 
-#include <common_types.hpp>
-
-// Basic types shared across FVM implementations/backends.
+#include <arbor/fvm_types.hpp>
 
 namespace arb {
 
-using fvm_value_type = double;
-using fvm_size_type = cell_local_size_type;
-using fvm_index_type = int;
-
-// Stores a single crossing event.
+// Representation of a single crossing event.
 
 struct threshold_crossing {
     fvm_size_type index;    // index of variable
diff --git a/src/benchmark_cell.hpp b/arbor/benchmark_cell.hpp
similarity index 100%
rename from src/benchmark_cell.hpp
rename to arbor/benchmark_cell.hpp
diff --git a/src/benchmark_cell_group.cpp b/arbor/benchmark_cell_group.cpp
similarity index 98%
rename from src/benchmark_cell_group.cpp
rename to arbor/benchmark_cell_group.cpp
index cd2237ba60f1fd0048b7e2db365b9b26c20d0986..d89dacd20315ea725f93f5a49339f535185cfd5d 100644
--- a/src/benchmark_cell_group.cpp
+++ b/arbor/benchmark_cell_group.cpp
@@ -2,7 +2,7 @@
 #include <exception>
 
 #include <cell_group.hpp>
-#include <profiling/profiler.hpp>
+#include <profile/profiler_macro.hpp>
 #include <recipe.hpp>
 #include <benchmark_cell.hpp>
 #include <benchmark_cell_group.hpp>
diff --git a/src/benchmark_cell_group.hpp b/arbor/benchmark_cell_group.hpp
similarity index 100%
rename from src/benchmark_cell_group.hpp
rename to arbor/benchmark_cell_group.hpp
diff --git a/src/builtin_mechanisms.cpp b/arbor/builtin_mechanisms.cpp
similarity index 81%
rename from src/builtin_mechanisms.cpp
rename to arbor/builtin_mechanisms.cpp
index 2187987e1fc45f98dbcdd9e1b4eb36f99a50520e..77a288593964e54f1e6403f2b959f133e0a50a4d 100644
--- a/src/builtin_mechanisms.cpp
+++ b/arbor/builtin_mechanisms.cpp
@@ -1,9 +1,10 @@
-#include <mechcat.hpp>
-#include <backends/builtin_mech_proto.hpp>
+#include <arbor/mechcat.hpp>
 
-#include <backends/multicore/fvm.hpp>
+#include "backends/builtin_mech_proto.hpp"
+
+#include "backends/multicore/fvm.hpp"
 #if ARB_HAVE_GPU
-#include <backends/gpu/fvm.hpp>
+#include "backends/gpu/fvm.hpp"
 #endif
 
 namespace arb {
diff --git a/src/builtin_mechanisms.hpp b/arbor/builtin_mechanisms.hpp
similarity index 77%
rename from src/builtin_mechanisms.hpp
rename to arbor/builtin_mechanisms.hpp
index a2bdd70074219849fb607da690cfa2edc54970b7..1a6688e3821ca39f896c29cf7ddfc48d49c3470e 100644
--- a/src/builtin_mechanisms.hpp
+++ b/arbor/builtin_mechanisms.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <mechcat.hpp>
+#include <arbor/mechcat.hpp>
 
 namespace arb {
 
diff --git a/src/cell.cpp b/arbor/cell.cpp
similarity index 99%
rename from src/cell.cpp
rename to arbor/cell.cpp
index 470af08dac0200b21b0d7b1be1ec019817f5841c..2058e78f802224f1b19d23cd80e2b655b8c7af7d 100644
--- a/src/cell.cpp
+++ b/arbor/cell.cpp
@@ -161,7 +161,7 @@ cell make_cell(const morphology& morph, bool compartments_from_discretization) {
         return newcell;
     }
 
-    EXPECTS(morph.check_valid());
+    arb_assert(morph.check_valid());
 
     // (not supporting soma-less cells yet)
     newcell.add_soma(morph.soma.r, point3d(morph.soma.x, morph.soma.y, morph.soma.z));
diff --git a/src/cell.hpp b/arbor/cell.hpp
similarity index 94%
rename from src/cell.hpp
rename to arbor/cell.hpp
index ff286e005306cf4babd7f47eadc6c2b178d8edf3..0354fd4093b8ff68e9dbafbe47cfb22354a4a61b 100644
--- a/src/cell.hpp
+++ b/arbor/cell.hpp
@@ -4,14 +4,15 @@
 #include <stdexcept>
 #include <vector>
 
-#include <common_types.hpp>
-#include <constants.hpp>
-#include <ion.hpp>
-#include <mechcat.hpp>
-#include <morphology.hpp>
-#include <segment.hpp>
-#include <tree.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/mechcat.hpp>
+#include <arbor/ion.hpp>
+
+#include "constants.hpp"
+#include "morphology.hpp"
+#include "segment.hpp"
+#include "tree.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 
@@ -21,7 +22,7 @@ struct segment_location {
     segment_location(cell_lid_type s, double l):
         segment(s), position(l)
     {
-        EXPECTS(position>=0. && position<=1.);
+        arb_assert(position>=0. && position<=1.);
     }
 
      bool operator==(segment_location other) const {
@@ -62,6 +63,10 @@ struct cell_probe_address {
 struct cell_global_properties {
     const mechanism_catalogue* catalogue = &global_default_catalogue();
 
+    // If >0, check membrane voltage magnitude is less than limit
+    // during integration.
+    double membrane_voltage_limit_mV = 0;
+
     // TODO: consider making some/all of the following parameters
     // cell or even segment-local.
     // 
diff --git a/src/cell_group.hpp b/arbor/cell_group.hpp
similarity index 84%
rename from src/cell_group.hpp
rename to arbor/cell_group.hpp
index 091fd0b1f5c89ec9c76bd2e432dacb032bdbc504..3a3d3035748ecbe77dc42cb418b6520f1bf37910 100644
--- a/src/cell_group.hpp
+++ b/arbor/cell_group.hpp
@@ -4,14 +4,15 @@
 #include <memory>
 #include <vector>
 
-#include <cell.hpp>
-#include <common_types.hpp>
-#include <epoch.hpp>
-#include <event_binner.hpp>
-#include <event_queue.hpp>
-#include <sampling.hpp>
-#include <schedule.hpp>
-#include <spike.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "cell.hpp"
+#include "epoch.hpp"
+#include "event_binner.hpp"
+#include "event_queue.hpp"
+#include "sampling.hpp"
+#include "schedule.hpp"
 
 namespace arb {
 
diff --git a/src/cell_group_factory.cpp b/arbor/cell_group_factory.cpp
similarity index 100%
rename from src/cell_group_factory.cpp
rename to arbor/cell_group_factory.cpp
diff --git a/src/cell_group_factory.cu b/arbor/cell_group_factory.cu
similarity index 100%
rename from src/cell_group_factory.cu
rename to arbor/cell_group_factory.cu
diff --git a/src/cell_group_factory.hpp b/arbor/cell_group_factory.hpp
similarity index 100%
rename from src/cell_group_factory.hpp
rename to arbor/cell_group_factory.hpp
diff --git a/src/common_types_io.cpp b/arbor/common_types_io.cpp
similarity index 94%
rename from src/common_types_io.cpp
rename to arbor/common_types_io.cpp
index ada86a91b1bf89a3e7d8459add546be98b5b4481..2e46ddd7d301474f098cd2650992011b1d79d4af 100644
--- a/src/common_types_io.cpp
+++ b/arbor/common_types_io.cpp
@@ -1,6 +1,6 @@
 #include <iostream>
 
-#include <common_types.hpp>
+#include <arbor/common_types.hpp>
 
 std::ostream& operator<<(std::ostream& O, arb::cell_member_type m) {
     return O << m.gid << ':' << m.index;
diff --git a/src/communication/communicator.hpp b/arbor/communication/communicator.hpp
similarity index 94%
rename from src/communication/communicator.hpp
rename to arbor/communication/communicator.hpp
index a372937cebc2839187b322081e61ee6c96549779..60c7d5cbf2f02dc1945f07e12199b89f67d1471b 100644
--- a/src/communication/communicator.hpp
+++ b/arbor/communication/communicator.hpp
@@ -7,20 +7,22 @@
 #include <utility>
 #include <vector>
 
-#include <algorithms.hpp>
-#include <common_types.hpp>
-#include <communication/gathered_vector.hpp>
-#include <communication/distributed_context.hpp>
-#include <connection.hpp>
-#include <domain_decomposition.hpp>
-#include <event_queue.hpp>
-#include <profiling/profiler.hpp>
-#include <recipe.hpp>
-#include <spike.hpp>
-#include <util/debug.hpp>
-#include <util/double_buffer.hpp>
-#include <util/partition.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/communication/gathered_vector.hpp>
+#include <arbor/distributed_context.hpp>
+#include <arbor/spike.hpp>
+
+#include "algorithms.hpp"
+#include "connection.hpp"
+#include "domain_decomposition.hpp"
+#include "event_queue.hpp"
+#include "profile/profiler_macro.hpp"
+#include "recipe.hpp"
+#include "threading/threading.hpp"
+#include "util/double_buffer.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 
@@ -131,7 +133,7 @@ public:
 
     /// The range of event queues that belong to cells in group i.
     std::pair<cell_size_type, cell_size_type> group_queue_range(cell_size_type i) {
-        EXPECTS(i<num_local_groups_);
+        arb_assert(i<num_local_groups_);
         return index_part_[i];
     }
 
@@ -176,7 +178,7 @@ public:
             const gathered_vector<spike>& global_spikes,
             std::vector<pse_vector>& queues)
     {
-        EXPECTS(queues.size()==num_local_cells_);
+        arb_assert(queues.size()==num_local_cells_);
 
         using util::subrange_view;
         using util::make_span;
diff --git a/arbor/communication/mpi.cpp b/arbor/communication/mpi.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..92619e050479874b8ef3e85ed5fd408d009f6529
--- /dev/null
+++ b/arbor/communication/mpi.cpp
@@ -0,0 +1,25 @@
+#include <mpi.h>
+
+#include "communication/mpi.hpp"
+
+namespace arb {
+namespace mpi {
+
+int rank(MPI_Comm comm) {
+    int r;
+    MPI_OR_THROW(MPI_Comm_rank, comm, &r);
+    return r;
+}
+
+int size(MPI_Comm comm) {
+    int s;
+    MPI_OR_THROW(MPI_Comm_size, comm, &s);
+    return s;
+}
+
+void barrier(MPI_Comm comm) {
+    MPI_OR_THROW(MPI_Barrier, comm);
+}
+
+} // namespace mpi
+} // namespace arb
diff --git a/src/communication/mpi.hpp b/arbor/communication/mpi.hpp
similarity index 81%
rename from src/communication/mpi.hpp
rename to arbor/communication/mpi.hpp
index 5f593066217dd3420b78262643ada1c28eddee48..7b9e64311a197c81bd1966bb361ce37cfa4399ce 100644
--- a/src/communication/mpi.hpp
+++ b/arbor/communication/mpi.hpp
@@ -5,45 +5,26 @@
 #include <type_traits>
 #include <vector>
 
-#include <cassert>
-
 #include <mpi.h>
 
-#include <algorithms.hpp>
-#include <communication/gathered_vector.hpp>
-#include <util/debug.hpp>
-#include <profiling/profiler.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/communication/gathered_vector.hpp>
+#include <arbor/communication/mpi_error.hpp>
+
+#include "algorithms.hpp"
+#include "profile/profiler_macro.hpp"
 
 
 namespace arb {
 namespace mpi {
 
 // prototypes
-void init(int *argc, char ***argv);
-void finalize();
 int rank(MPI_Comm);
 int size(MPI_Comm);
 void barrier(MPI_Comm);
 
-void handle_mpi_error(const char* msg, int code);
-
-// Exception class to be thrown when MPI API calls return a error code other
-// than MPI_SUCCESS.
-class mpi_error: public std::exception {
-public:
-    mpi_error(const char* msg, int code);
-    const char* what() const throw() override;
-    int error_code() const;
-
-private:
-    std::string message_;
-    int error_code_;
-};
-
-struct scoped_guard {
-    scoped_guard(int *argc, char ***argv);
-    ~scoped_guard();
-};
+#define MPI_OR_THROW(fn, ...)\
+while (int r_ = fn(__VA_ARGS__)) throw mpi_error(r_, #fn)
 
 // Type traits for automatically setting MPI_Datatype information for C++ types.
 template <typename T>
@@ -86,10 +67,10 @@ std::vector<T> gather(T value, int root, MPI_Comm comm) {
     auto buffer_size = (rank(comm)==root) ? size(comm) : 0;
     std::vector<T> buffer(buffer_size);
 
-    handle_mpi_error("MPI_Gather",
-    MPI_Gather( &value,        traits::count(), traits::mpi_type(), // send buffer
+    MPI_OR_THROW(MPI_Gather,
+                &value,        traits::count(), traits::mpi_type(), // send buffer
                 buffer.data(), traits::count(), traits::mpi_type(), // receive buffer
-                root, comm));
+                root, comm);
 
     return buffer;
 }
@@ -102,11 +83,10 @@ std::vector<T> gather_all(T value, MPI_Comm comm) {
     using traits = mpi_traits<T>;
     std::vector<T> buffer(size(comm));
 
-    handle_mpi_error("MPI_Allgather",
-        MPI_Allgather(
+    MPI_OR_THROW(MPI_Allgather,
             &value,        traits::count(), traits::mpi_type(), // send buffer
             buffer.data(), traits::count(), traits::mpi_type(), // receive buffer
-            comm));
+            comm);
 
     return buffer;
 }
@@ -123,11 +103,10 @@ inline std::vector<std::string> gather(std::string str, int root, MPI_Comm comm)
     // const_cast required for MPI implementations that don't use const* in
     // their interfaces.
     std::string::value_type* ptr = const_cast<std::string::value_type*>(str.data());
-    handle_mpi_error("MPI_Gatherv",
-        MPI_Gatherv(
+    MPI_OR_THROW(MPI_Gatherv,
             ptr, counts[rank(comm)], traits::mpi_type(),                       // send
             buffer.data(), counts.data(), displs.data(), traits::mpi_type(),   // receive
-            root, comm));
+            root, comm);
 
     // Unpack the raw string data into a vector of strings.
     std::vector<std::string> result;
@@ -150,12 +129,11 @@ std::vector<T> gather_all(const std::vector<T>& values, MPI_Comm comm) {
     auto displs = algorithms::make_index(counts);
 
     std::vector<T> buffer(displs.back()/traits::count());
-    handle_mpi_error("MPI_Allgatherv",
-        MPI_Allgatherv(
+    MPI_OR_THROW(MPI_Allgatherv,
             // const_cast required for MPI implementations that don't use const* in their interfaces
             const_cast<T*>(values.data()), counts[rank(comm)], traits::mpi_type(),  // send buffer
             buffer.data(), counts.data(), displs.data(), traits::mpi_type(), // receive buffer
-            comm));
+            comm);
 
     return buffer;
 }
@@ -179,12 +157,11 @@ gathered_vector<T> gather_all_with_partition(const std::vector<T>& values, MPI_C
 
     std::vector<T> buffer(displs.back()/traits::count());
 
-    handle_mpi_error("MPI_Allgatherv",
-        MPI_Allgatherv(
+    MPI_OR_THROW(MPI_Allgatherv,
             // const_cast required for MPI implementations that don't use const* in their interfaces
             const_cast<T*>(values.data()), counts[rank(comm)], traits::mpi_type(), // send buffer
             buffer.data(), counts.data(), displs.data(), traits::mpi_type(), // receive buffer
-            comm));
+            comm);
 
     for (auto& d : displs) {
         d /= traits::count();
@@ -204,8 +181,8 @@ T reduce(T value, MPI_Op op, int root, MPI_Comm comm) {
 
     T result;
 
-    handle_mpi_error("MPI_Reduce",
-        MPI_Reduce(&value, &result, 1, traits::mpi_type(), op, root, comm));
+    MPI_OR_THROW(MPI_Reduce,
+        &value, &result, 1, traits::mpi_type(), op, root, comm);
 
     return result;
 }
@@ -240,8 +217,8 @@ T broadcast(T value, int root, MPI_Comm comm) {
 
     using traits = mpi_traits<T>;
 
-    handle_mpi_error("MPI_Bcast",
-        MPI_Bcast(&value, traits::count(), traits::mpi_type(), root, comm));
+    MPI_OR_THROW(MPI_Bcast,
+        &value, traits::count(), traits::mpi_type(), root, comm);
 
     return value;
 }
@@ -254,8 +231,8 @@ T broadcast(int root, MPI_Comm comm) {
     using traits = mpi_traits<T>;
     T value;
 
-    handle_mpi_error("MPI_Bcast",
-        MPI_Bcast(&value, traits::count(), traits::mpi_type(), root, comm));
+    MPI_OR_THROW(MPI_Bcast,
+        &value, traits::count(), traits::mpi_type(), root, comm);
 
     return value;
 }
diff --git a/arbor/communication/mpi_context.cpp b/arbor/communication/mpi_context.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4161e3b84b26e8e804922ad99cd6b10b289511e4
--- /dev/null
+++ b/arbor/communication/mpi_context.cpp
@@ -0,0 +1,74 @@
+// Attempting to acquire an MPI context without MPI enabled will produce
+// a link error.
+
+#ifndef ARB_HAVE_MPI
+#error "build only if MPI is enabled"
+#endif
+
+#include <string>
+#include <vector>
+
+#include <mpi.h>
+
+#include <arbor/distributed_context.hpp>
+#include <arbor/spike.hpp>
+
+#include "communication/mpi.hpp"
+
+namespace arb {
+
+// Throws arb::mpi::mpi_error if MPI calls fail.
+struct mpi_context_impl {
+    int size_;
+    int rank_;
+    MPI_Comm comm_;
+
+    explicit mpi_context_impl(MPI_Comm comm): comm_(comm) {
+        size_ = mpi::size(comm_);
+        rank_ = mpi::rank(comm_);
+    }
+
+    gathered_vector<arb::spike>
+    gather_spikes(const std::vector<arb::spike>& local_spikes) const {
+        return mpi::gather_all_with_partition(local_spikes, comm_);
+    }
+
+    std::string name() const { return "MPI"; }
+    int id() const { return rank_; }
+    int size() const { return size_; }
+
+    template <typename T>
+    T min(T value) const {
+        return mpi::reduce(value, MPI_MIN, comm_);
+    }
+
+    template <typename T>
+    T max(T value) const {
+        return mpi::reduce(value, MPI_MAX, comm_);
+    }
+
+    template <typename T>
+    T sum(T value) const {
+        return mpi::reduce(value, MPI_SUM, comm_);
+    }
+
+    template <typename T>
+    std::vector<T> gather(T value, int root) const {
+        return mpi::gather(value, root, comm_);
+    }
+
+    void barrier() const {
+        mpi::barrier(comm_);
+    }
+};
+
+distributed_context mpi_context() {
+    return mpi_context_impl(MPI_COMM_WORLD);
+}
+
+template <>
+distributed_context mpi_context(MPI_Comm comm) {
+    return mpi_context_impl(comm);
+}
+
+} // namespace arb
diff --git a/arbor/communication/mpi_error.cpp b/arbor/communication/mpi_error.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4f0b62c54e8f00c99e8330d4ba20c625eccf73b6
--- /dev/null
+++ b/arbor/communication/mpi_error.cpp
@@ -0,0 +1,12 @@
+#include <mpi.h>
+
+#include <arbor/communication/mpi_error.hpp>
+
+namespace arb {
+
+const mpi_error_category_impl& mpi_error_category() {
+    static mpi_error_category_impl the_category;
+    return the_category;
+}
+
+}
diff --git a/src/compartment.hpp b/arbor/compartment.hpp
similarity index 95%
rename from src/compartment.hpp
rename to arbor/compartment.hpp
index b68118ef825c6e3cc137cbc81d4a76b05c9f520c..fa220ccf828b534d5f53a3d034270ba5514e456c 100644
--- a/src/compartment.hpp
+++ b/arbor/compartment.hpp
@@ -3,15 +3,16 @@
 #include <iterator>
 #include <utility>
 
-#include <common_types.hpp>
-#include <math.hpp>
-#include <util/compat.hpp>
-#include <util/counter.hpp>
-#include <util/iterutil.hpp>
-#include <util/partition.hpp>
-#include <util/span.hpp>
-#include <util/rangeutil.hpp>
-#include <util/transform.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/util/compat.hpp>
+
+#include "math.hpp"
+#include "util/counter.hpp"
+#include "util/iterutil.hpp"
+#include "util/partition.hpp"
+#include "util/span.hpp"
+#include "util/rangeutil.hpp"
+#include "util/transform.hpp"
 
 namespace arb {
 
@@ -189,7 +190,7 @@ public:
         nseg_ = size(segs_);
         scale_ = segs_.bounds().second/n;
         assign(radii_, radii);
-        EXPECTS(size(radii_)==size(offsets_));
+        arb_assert(size(radii_)==size(offsets_));
     }
 
     div_compartment operator()(size_type i) const {
@@ -219,7 +220,7 @@ protected:
     };
 
     sub_segment_index locate(value_type x) const {
-        EXPECTS(x>=0);
+        arb_assert(x>=0);
 
         auto i = segs_.index(x);
         if (i==segs_.npos) {
@@ -263,7 +264,7 @@ public:
 
 protected:
     semi_compartment sub_segment_frustrum(sub_segment_index a, sub_segment_index b) const {
-        EXPECTS(a.i==b.i && a.p<=b.p);
+        arb_assert(a.i==b.i && a.p<=b.p);
 
         auto seg = segs_[a.i];
         auto l = (b.p-a.p)*(seg.second-seg.first);
diff --git a/src/connection.hpp b/arbor/connection.hpp
similarity index 95%
rename from src/connection.hpp
rename to arbor/connection.hpp
index 018c69b4551dc32e95f40f4f31407037d1a8e2bc..40d19287763395d9513cd82caaaf5a5f4b080595 100644
--- a/src/connection.hpp
+++ b/arbor/connection.hpp
@@ -2,9 +2,10 @@
 
 #include <cstdint>
 
-#include <common_types.hpp>
-#include <event_queue.hpp>
-#include <spike.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "event_queue.hpp"
 
 namespace arb {
 
diff --git a/src/constants.hpp b/arbor/constants.hpp
similarity index 100%
rename from src/constants.hpp
rename to arbor/constants.hpp
diff --git a/src/domain_decomposition.hpp b/arbor/domain_decomposition.hpp
similarity index 86%
rename from src/domain_decomposition.hpp
rename to arbor/domain_decomposition.hpp
index a4f209d2d415b69b9ead9808e313513b2215d868..b225970d7e3d08204592ab9163a7b2298ebf8e25 100644
--- a/src/domain_decomposition.hpp
+++ b/arbor/domain_decomposition.hpp
@@ -5,14 +5,15 @@
 #include <unordered_map>
 #include <vector>
 
-#include <backends.hpp>
-#include <common_types.hpp>
-#include <hardware/node_info.hpp>
-#include <recipe.hpp>
-#include <util/optional.hpp>
-#include <util/partition.hpp>
-#include <util/range.hpp>
-#include <util/transform.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "backends.hpp"
+#include "hardware/node_info.hpp"
+#include "recipe.hpp"
+#include "util/partition.hpp"
+#include "util/range.hpp"
+#include "util/transform.hpp"
 
 namespace arb {
 
@@ -37,7 +38,7 @@ struct group_description {
     group_description(cell_kind k, std::vector<cell_gid_type> g, backend_kind b):
         kind(k), gids(std::move(g)), backend(b)
     {
-        EXPECTS(util::is_sorted(gids));
+        arb_assert(util::is_sorted(gids));
     }
 };
 
diff --git a/src/epoch.hpp b/arbor/epoch.hpp
similarity index 89%
rename from src/epoch.hpp
rename to arbor/epoch.hpp
index 3f9eb4f8350a60b7b76316f840bf63a1c8f8667d..85d2ebcea5f4baa89aaea150c712db48f1768cfc 100644
--- a/src/epoch.hpp
+++ b/arbor/epoch.hpp
@@ -2,8 +2,8 @@
 
 #include <cstdint>
 
-#include <common_types.hpp>
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
 
 namespace arb {
 
@@ -28,7 +28,7 @@ struct epoch {
     epoch(std::size_t id, time_type tfinal): id(id), tfinal(tfinal) {}
 
     void advance(time_type t) {
-        EXPECTS(t>=tfinal);
+        arb_assert(t>=tfinal);
         tfinal = t;
         ++id;
     }
diff --git a/src/event_binner.cpp b/arbor/event_binner.cpp
similarity index 87%
rename from src/event_binner.cpp
rename to arbor/event_binner.cpp
index 1b5d8c0a1f3bb2828f874cec768c3ababbb4a9ac..7fcdbbac4f790b41ef5021d17d526755f95c0ee7 100644
--- a/src/event_binner.cpp
+++ b/arbor/event_binner.cpp
@@ -4,10 +4,11 @@
 #include <stdexcept>
 #include <unordered_map>
 
-#include <common_types.hpp>
-#include <event_binner.hpp>
-#include <spike.hpp>
-#include <util/optional.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "event_binner.hpp"
 
 namespace arb {
 
diff --git a/src/event_binner.hpp b/arbor/event_binner.hpp
similarity index 91%
rename from src/event_binner.hpp
rename to arbor/event_binner.hpp
index f465327405b427cc670980d104cc9c94bf69143f..25ff4f0bf444fe1da659a9fe8b67b63eb83186f9 100644
--- a/src/event_binner.hpp
+++ b/arbor/event_binner.hpp
@@ -3,9 +3,9 @@
 #include <limits>
 #include <unordered_map>
 
-#include <common_types.hpp>
-#include <spike.hpp>
-#include <util/optional.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+#include <arbor/util/optional.hpp>
 
 namespace arb {
 
diff --git a/src/event_generator.hpp b/arbor/event_generator.hpp
similarity index 97%
rename from src/event_generator.hpp
rename to arbor/event_generator.hpp
index d72263b68f63cff60f0e4ff9ce16c723f1742f03..6b4fc482bf848fbe84dc2971b7550e7207ddbfca 100644
--- a/src/event_generator.hpp
+++ b/arbor/event_generator.hpp
@@ -4,11 +4,12 @@
 #include <memory>
 #include <random>
 
-#include <common_types.hpp>
-#include <event_queue.hpp>
-#include <time_sequence.hpp>
-#include <util/range.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+
+#include "event_queue.hpp"
+#include "time_sequence.hpp"
+#include "util/range.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 
@@ -181,7 +182,7 @@ struct seq_generator {
         events_(events),
         it_(std::begin(events_))
     {
-        EXPECTS(util::is_sorted(events_));
+        arb_assert(util::is_sorted(events_));
     }
 
     postsynaptic_spike_event front() {
diff --git a/src/event_queue.hpp b/arbor/event_queue.hpp
similarity index 94%
rename from src/event_queue.hpp
rename to arbor/event_queue.hpp
index 0d3fe5535954e7275408bb4b10c2c0bc91f867a2..0a5a24d9e6397c3eb2a6c4ca869a23e1d922f909 100644
--- a/src/event_queue.hpp
+++ b/arbor/event_queue.hpp
@@ -7,13 +7,14 @@
 #include <type_traits>
 #include <utility>
 
-#include <common_types.hpp>
-#include <generic_event.hpp>
-#include <util/meta.hpp>
-#include <util/optional.hpp>
-#include <util/range.hpp>
-#include <util/rangeutil.hpp>
-#include <util/strprintf.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "generic_event.hpp"
+#include "util/meta.hpp"
+#include "util/range.hpp"
+#include "util/rangeutil.hpp"
+#include "util/strprintf.hpp"
 
 namespace arb {
 
diff --git a/src/fvm_layout.cpp b/arbor/fvm_layout.cpp
similarity index 99%
rename from src/fvm_layout.cpp
rename to arbor/fvm_layout.cpp
index 90d6509752d8b9725fe02a2ef2d8b2d79afdf20b..12b5d7caad8278ed77275dbce5919f946395afa0 100644
--- a/src/fvm_layout.cpp
+++ b/arbor/fvm_layout.cpp
@@ -3,13 +3,14 @@
 #include <unordered_set>
 #include <vector>
 
-#include <fvm_layout.hpp>
-#include <util/enumhash.hpp>
-#include <util/maputil.hpp>
-#include <util/meta.hpp>
-#include <util/partition.hpp>
-#include <util/rangeutil.hpp>
-#include <util/transform.hpp>
+#include <arbor/util/enumhash.hpp>
+
+#include "fvm_layout.hpp"
+#include "util/maputil.hpp"
+#include "util/meta.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
+#include "util/transform.hpp"
 
 namespace arb {
 
diff --git a/src/fvm_layout.hpp b/arbor/fvm_layout.hpp
similarity index 93%
rename from src/fvm_layout.hpp
rename to arbor/fvm_layout.hpp
index 22c1cf10e2c8bc3d67e0aa45dd9fc44a216a73fa..c23d0372f741b03214f16268e3ad04c4704960e7 100644
--- a/src/fvm_layout.hpp
+++ b/arbor/fvm_layout.hpp
@@ -1,14 +1,15 @@
 #pragma once
 
-#include <backends/fvm_types.hpp>
-#include <cell.hpp>
-#include <compartment.hpp>
-#include <mechanism.hpp>
-#include <mechinfo.hpp>
-#include <mechcat.hpp>
-#include <util/deduce_return.hpp>
-#include <util/enumhash.hpp>
-#include <util/span.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/mechinfo.hpp>
+#include <arbor/mechcat.hpp>
+#include <arbor/util/enumhash.hpp>
+
+#include "cell.hpp"
+#include "compartment.hpp"
+#include "util/deduce_return.hpp"
+#include "util/span.hpp"
 
 namespace arb {
 
@@ -80,7 +81,7 @@ struct fvm_discretization {
         auto cell_segs = cell_segment_part()[cell_index];
 
         size_type seg = segloc.segment+cell_segs.first;
-        EXPECTS(seg<cell_segs.second);
+        arb_assert(seg<cell_segs.second);
         return segments[seg].cv_by_position(segloc.position);
     }
 };
diff --git a/src/fvm_lowered_cell.hpp b/arbor/fvm_lowered_cell.hpp
similarity index 89%
rename from src/fvm_lowered_cell.hpp
rename to arbor/fvm_lowered_cell.hpp
index 66c04b7f785b89b890f17f681d650e72e2b53391..3b22c5ffb8a5618bc126b5c8da8abdb2316dd498 100644
--- a/src/fvm_lowered_cell.hpp
+++ b/arbor/fvm_lowered_cell.hpp
@@ -3,9 +3,11 @@
 #include <memory>
 #include <vector>
 
+#include <arbor/fvm_types.hpp>
+
 #include <backends.hpp>
 #include <backends/event.hpp>
-#include <backends/fvm_types.hpp>
+#include <backends/threshold_crossing.hpp>
 #include <recipe.hpp>
 #include <sampler_map.hpp>
 #include <util/range.hpp>
@@ -33,8 +35,7 @@ struct fvm_lowered_cell {
         fvm_value_type tfinal,
         fvm_value_type max_dt,
         std::vector<deliverable_event> staged_events,
-        std::vector<sample_event> staged_samples,
-        bool check_physical = false) = 0;
+        std::vector<sample_event> staged_samples) = 0;
 
     virtual fvm_value_type time() const = 0;
 
diff --git a/src/fvm_lowered_cell_impl.cpp b/arbor/fvm_lowered_cell_impl.cpp
similarity index 80%
rename from src/fvm_lowered_cell_impl.cpp
rename to arbor/fvm_lowered_cell_impl.cpp
index f2529be36ab4493dab3a8c0796c65b9b78012c56..fe317e93b18ec909dd33e92dffcd32ca4d4106b3 100644
--- a/src/fvm_lowered_cell_impl.cpp
+++ b/arbor/fvm_lowered_cell_impl.cpp
@@ -1,12 +1,12 @@
 #include <memory>
 #include <stdexcept>
 
-#include <backends.hpp>
-#include <backends/multicore/fvm.hpp>
+#include "backends.hpp"
+#include "backends/multicore/fvm.hpp"
 #ifdef ARB_HAVE_GPU
-#include <backends/gpu/fvm.hpp>
+#include "backends/gpu/fvm.hpp"
 #endif
-#include <fvm_lowered_cell_impl.hpp>
+#include "fvm_lowered_cell_impl.hpp"
 
 namespace arb {
 
diff --git a/src/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
similarity index 93%
rename from src/fvm_lowered_cell_impl.hpp
rename to arbor/fvm_lowered_cell_impl.hpp
index c793659b3382497092819292ce69f0ab2c7e527d..11e9f5356ee1a519f30077e2f9c960faf122ba5d 100644
--- a/src/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -13,21 +13,22 @@
 #include <vector>
 #include <stdexcept>
 
-#include <common_types.hpp>
-#include <builtin_mechanisms.hpp>
-#include <fvm_layout.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <ion.hpp>
-#include <matrix.hpp>
-#include <profiling/profiler.hpp>
-#include <recipe.hpp>
-#include <sampler_map.hpp>
-#include <util/meta.hpp>
-#include <util/range.hpp>
-#include <util/rangeutil.hpp>
-#include <util/transform.hpp>
-
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/ion.hpp>
+
+#include "builtin_mechanisms.hpp"
+#include "fvm_layout.hpp"
+#include "fvm_lowered_cell.hpp"
+#include "matrix.hpp"
+#include "profile/profiler_macro.hpp"
+#include "recipe.hpp"
+#include "sampler_map.hpp"
+#include "util/meta.hpp"
+#include "util/range.hpp"
+#include "util/rangeutil.hpp"
+#include "util/transform.hpp"
+
 
 namespace arb {
 
@@ -51,8 +52,7 @@ public:
         value_type tfinal,
         value_type max_dt,
         std::vector<deliverable_event> staged_events,
-        std::vector<sample_event> staged_samples,
-        bool check_physical = false) override;
+        std::vector<sample_event> staged_samples) override;
 
     value_type time() const override { return tmin_; }
 
@@ -82,6 +82,9 @@ private:
     value_type temperature_ = NAN;
     std::vector<mechanism_ptr> mechanisms_;
 
+    // Non-physical voltage check threshold, 0 => no check.
+    value_type check_voltage_mV = 0;
+
     // Host-side views/copies and local state.
     decltype(backend::host_view(sample_time_)) sample_time_host_;
     decltype(backend::host_view(sample_value_)) sample_value_host_;
@@ -97,7 +100,7 @@ private:
     // Assign tmin_ and call assert_tmin() if assertions on.
     void set_tmin(value_type t) {
         tmin_ = t;
-        EXPECTS((assert_tmin(), true));
+        arb_assert((assert_tmin(), true));
     }
 
     static unsigned dt_steps(value_type t0, value_type t1, value_type dt) {
@@ -137,8 +140,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
     value_type tfinal,
     value_type dt_max,
     std::vector<deliverable_event> staged_events,
-    std::vector<sample_event> staged_samples,
-    bool check_physical)
+    std::vector<sample_event> staged_samples)
 {
     using util::as_const;
 
@@ -155,7 +157,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
     state_->deliverable_events.init(std::move(staged_events));
     sample_events_.init(std::move(staged_samples));
 
-    EXPECTS((assert_tmin(), true));
+    arb_assert((assert_tmin(), true));
     unsigned remaining_steps = dt_steps(tmin_, tfinal, dt_max);
     PL();
 
@@ -227,9 +229,9 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
 
         // Check for non-physical solutions:
 
-        if (check_physical) {
+        if (check_voltage_mV>0) {
             PE(advance_integrate_physicalcheck);
-            assert_voltage_bounded(1000.);
+            assert_voltage_bounded(check_voltage_mV);
             PL();
         }
 
@@ -311,10 +313,14 @@ void fvm_lowered_cell_impl<B>::initialize(
         return cat->instance<backend>(name);
     };
 
+    // Check for physically reasonable membrane volages?
+
+    check_voltage_mV = global_props.membrane_voltage_limit_mV;
+
     // Discretize cells, build matrix.
 
     fvm_discretization D = fvm_discretize(cells);
-    EXPECTS(D.ncell == ncell);
+    arb_assert(D.ncell == ncell);
     matrix_ = matrix<backend>(D.parent_cv, D.cell_cv_bounds, D.cv_capacitance, D.face_conductance, D.cv_area);
     sample_events_ = sample_event_stream(ncell);
 
diff --git a/src/generic_event.hpp b/arbor/generic_event.hpp
similarity index 100%
rename from src/generic_event.hpp
rename to arbor/generic_event.hpp
diff --git a/src/hardware/affinity.cpp b/arbor/hardware/affinity.cpp
similarity index 96%
rename from src/hardware/affinity.cpp
rename to arbor/hardware/affinity.cpp
index 54b5f47b7e68ed1f7d455597b7ae521f6b97cf39..29ef7ee428f93041afb4912606fb8ed3cc59939b 100644
--- a/src/hardware/affinity.cpp
+++ b/arbor/hardware/affinity.cpp
@@ -1,7 +1,7 @@
 #include <cstdlib>
 #include <vector>
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
 #ifdef __linux__
 
diff --git a/src/hardware/affinity.hpp b/arbor/hardware/affinity.hpp
similarity index 96%
rename from src/hardware/affinity.hpp
rename to arbor/hardware/affinity.hpp
index 6c58e7bb894995d4b7e8bc7c0c3d6e1de5001d22..8c5cbc59db3f3213eb5a1b6337bf83bcac14b5f1 100644
--- a/src/hardware/affinity.hpp
+++ b/arbor/hardware/affinity.hpp
@@ -3,7 +3,7 @@
 #include <cstdint>
 #include <vector>
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
 namespace arb {
 namespace hw {
diff --git a/src/hardware/gpu.cpp b/arbor/hardware/gpu.cpp
similarity index 100%
rename from src/hardware/gpu.cpp
rename to arbor/hardware/gpu.cpp
diff --git a/src/hardware/gpu.hpp b/arbor/hardware/gpu.hpp
similarity index 100%
rename from src/hardware/gpu.hpp
rename to arbor/hardware/gpu.hpp
diff --git a/src/hardware/memory.cpp b/arbor/hardware/memory.cpp
similarity index 100%
rename from src/hardware/memory.cpp
rename to arbor/hardware/memory.cpp
diff --git a/src/hardware/memory.hpp b/arbor/hardware/memory.hpp
similarity index 81%
rename from src/hardware/memory.hpp
rename to arbor/hardware/memory.hpp
index b5cb68968d3783a375a217485870b742315f0b0e..1632dbfcadd0a8dd9896312ce635f486dc9b403a 100644
--- a/src/hardware/memory.hpp
+++ b/arbor/hardware/memory.hpp
@@ -12,12 +12,12 @@ namespace hw {
 using memory_size_type = std::int64_t;
 
 // Returns the amount of memory currently allocated in bytes.
-// Returns a negative value on error, or if the operation is not supported on
+// Returns -1 on error, or if the operation is not supported on
 // the target architecture.
 memory_size_type allocated_memory();
 
 // Returns the amount of memory currently allocated on the gpu in bytes.
-// Returns a negative value on error, or if not using the gpu
+// Returns -1 on error, or if not using the gpu.
 memory_size_type gpu_allocated_memory();
 
 } // namespace hw
diff --git a/src/hardware/node_info.cpp b/arbor/hardware/node_info.cpp
similarity index 100%
rename from src/hardware/node_info.cpp
rename to arbor/hardware/node_info.cpp
diff --git a/src/hardware/node_info.hpp b/arbor/hardware/node_info.hpp
similarity index 100%
rename from src/hardware/node_info.hpp
rename to arbor/hardware/node_info.hpp
diff --git a/src/hardware/power.cpp b/arbor/hardware/power.cpp
similarity index 50%
rename from src/hardware/power.cpp
rename to arbor/hardware/power.cpp
index 4819b6307a481dd8e59623bf6c79b787d3a4a22b..7ca8df68ff494e55a2ef8ee8e65aa7d435240e35 100644
--- a/src/hardware/power.cpp
+++ b/arbor/hardware/power.cpp
@@ -2,15 +2,21 @@
 
 #include "power.hpp"
 
+// Currently only supporting Cray PM counters.
+
+#define CRAY_PM_COUNTER_ENERGY "/sys/cray/pm_counters/energy"
+
 namespace arb {
 namespace hw {
 
-#ifdef ARB_HAVE_CRAY
+bool has_energy_measurement() {
+    return static_cast<bool>(std::ifstream(CRAY_PM_COUNTER_ENERGY));
+}
 
 energy_size_type energy() {
     energy_size_type result = energy_size_type(-1);
 
-    std::ifstream fid("/sys/cray/pm_counters/energy");
+    std::ifstream fid(CRAY_PM_COUNTER_ENERGY);
     if (fid) {
         fid >> result;
     }
@@ -18,13 +24,6 @@ energy_size_type energy() {
     return result;
 }
 
-#else
-
-energy_size_type energy() {
-    return -1;
-}
-
-#endif
-
 } // namespace hw
 } // namespace arb
+
diff --git a/src/hardware/power.hpp b/arbor/hardware/power.hpp
similarity index 75%
rename from src/hardware/power.hpp
rename to arbor/hardware/power.hpp
index 2d3c2e593623c74a2a8093fb9c3ca322fc02a585..003a30798b2ad77d4913e25b96c2f063c3e6305d 100644
--- a/src/hardware/power.hpp
+++ b/arbor/hardware/power.hpp
@@ -5,6 +5,9 @@
 namespace arb {
 namespace hw {
 
+// Test for support on configured architecture:
+bool has_energy_measurement();
+
 // Energy in Joules (J)
 using energy_size_type = std::uint64_t;
 
diff --git a/src/io/exporter.hpp b/arbor/io/exporter.hpp
similarity index 88%
rename from src/io/exporter.hpp
rename to arbor/io/exporter.hpp
index 7804d9e4a3d360752a8e9685583d73ee5a7c8159..9370391dc84a1c6f6e3ec551bb241bc53f87afcd 100644
--- a/src/io/exporter.hpp
+++ b/arbor/io/exporter.hpp
@@ -3,8 +3,8 @@
 #include <random>
 #include <string>
 
-#include <common_types.hpp>
-#include <spike.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
 
 namespace arb {
 namespace io {
diff --git a/src/io/exporter_spike_file.hpp b/arbor/io/exporter_spike_file.hpp
similarity index 95%
rename from src/io/exporter_spike_file.hpp
rename to arbor/io/exporter_spike_file.hpp
index 24fc74d6c5b83a7ca5ddec2da06dcdaaf75418a1..7ee80d7a3bbea417845a76da94838cefd66c1d46 100644
--- a/src/io/exporter_spike_file.hpp
+++ b/arbor/io/exporter_spike_file.hpp
@@ -10,10 +10,11 @@
 #include <cstring>
 #include <cstdio>
 
-#include <common_types.hpp>
-#include <io/exporter.hpp>
-#include <util/file.hpp>
-#include <spike.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "io/exporter.hpp"
+#include "util/file.hpp"
 
 namespace arb {
 namespace io {
diff --git a/arbor/io/locked_ostream.cpp b/arbor/io/locked_ostream.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1aa8552170f06be746355406368fbf3907f542ed
--- /dev/null
+++ b/arbor/io/locked_ostream.cpp
@@ -0,0 +1,76 @@
+#include <ostream>
+#include <vector>
+#include <unordered_map>
+#include <memory>
+#include <mutex>
+
+#include "locked_ostream.hpp"
+
+namespace arb {
+namespace io {
+
+using tbl_type = std::unordered_map<std::streambuf*, std::weak_ptr<std::mutex>>;
+
+static tbl_type& g_mex_tbl() {
+    static tbl_type tbl;
+    return tbl;
+}
+
+static std::mutex& g_mex_tbl_mex() {
+    static std::mutex mex;
+    return mex;
+}
+
+static std::shared_ptr<std::mutex> register_sbuf(std::streambuf* b) {
+    if (b) {
+        std::lock_guard<std::mutex> lock(g_mex_tbl_mex());
+
+        auto& wptr = g_mex_tbl()[b];
+        auto mex = wptr.lock();
+        if (!mex) {
+            mex = std::shared_ptr<std::mutex>(new std::mutex);
+            wptr = mex;
+        }
+        return mex;
+    }
+    else {
+        return std::shared_ptr<std::mutex>();
+    }
+}
+
+static void deregister_sbuf(std::streambuf* b) {
+    if (b) {
+        std::lock_guard<std::mutex> lock(g_mex_tbl_mex());
+
+        auto i = g_mex_tbl().find(b);
+        if (i!=g_mex_tbl().end() && !(i->second.use_count())) {
+            g_mex_tbl().erase(i);
+        }
+    }
+}
+
+locked_ostream::locked_ostream(std::streambuf *b):
+    std::ostream(b),
+    mex(register_sbuf(b))
+{}
+
+
+locked_ostream::locked_ostream(locked_ostream&& other):
+    std::ostream(std::move(other)),
+    mex(std::move(other.mex))
+{
+    set_rdbuf(other.rdbuf());
+    other.set_rdbuf(nullptr);
+}
+
+locked_ostream::~locked_ostream() {
+    mex.reset();
+    deregister_sbuf(rdbuf());
+}
+
+std::unique_lock<std::mutex> locked_ostream::guard() {
+    return std::unique_lock<std::mutex>(*mex);
+}
+
+} // namespace io
+} // namespace arb
diff --git a/arbor/io/locked_ostream.hpp b/arbor/io/locked_ostream.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..20ecfc74a6f3052174ea29225b9580d3db178dd0
--- /dev/null
+++ b/arbor/io/locked_ostream.hpp
@@ -0,0 +1,25 @@
+#pragma once
+
+// Lockable ostream over a provided streambuf.
+
+#include <ostream>
+#include <memory>
+#include <mutex>
+
+namespace arb {
+namespace io {
+
+struct locked_ostream: std::ostream {
+    locked_ostream(std::streambuf *b);
+    locked_ostream(locked_ostream&& other);
+
+    ~locked_ostream();
+
+    std::unique_lock<std::mutex> guard();
+
+private:
+    std::shared_ptr<std::mutex> mex;
+};
+
+} // namespace io
+} // namespace arb
diff --git a/arbor/io/sepval.hpp b/arbor/io/sepval.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..d43fc4c26204ee0cb98bd06ccde1be5ddf6aec17
--- /dev/null
+++ b/arbor/io/sepval.hpp
@@ -0,0 +1,46 @@
+#pragma once
+
+// Format a sequence delimitted by some separator.
+
+#include <iostream>
+
+namespace arb {
+namespace io {
+
+namespace impl {
+    template <typename Seq, typename Separator>
+    struct sepval {
+        const Seq& seq;
+        Separator sep;
+
+        sepval(const Seq& seq, Separator sep): seq(seq), sep(std::move(sep)) {}
+
+        friend std::ostream& operator<<(std::ostream& out, const sepval& sv) {
+            bool emitsep = false;
+            for (const auto& v: sv.seq) {
+                if (emitsep) out << sv.sep;
+                emitsep = true;
+                out << v;
+            }
+            return out;
+        }
+    };
+}
+
+// Adapt a sequence with arbitrary delimiter.
+
+template <typename Seq, typename Separator>
+impl::sepval<Seq, Separator> sepval(const Seq& seq, Separator sep) {
+    return impl::sepval<Seq, Separator>(seq, std::move(sep));
+}
+
+// Adapt a sequence with delimiter ", ".
+
+template <typename Seq>
+impl::sepval<Seq, const char*> csv(const Seq& seq) {
+    return sepval(seq, ", ");
+}
+
+} // namespace io
+} // namespace arb
+
diff --git a/arbor/io/serialize_hex.cpp b/arbor/io/serialize_hex.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..522f8401a60930f7607fc1c798849b500a5f6d65
--- /dev/null
+++ b/arbor/io/serialize_hex.cpp
@@ -0,0 +1,62 @@
+// Adaptor for hexadecimal output to a std::ostream.
+
+#include <ostream>
+#include <string>
+
+// Required for endianness macros:
+#include <sys/types.h>
+
+#include "io/serialize_hex.hpp"
+
+namespace arb {
+namespace io {
+
+namespace impl {
+
+    enum class endian {
+        little = __ORDER_LITTLE_ENDIAN__,
+        big = __ORDER_BIG_ENDIAN__,
+        native = __BYTE_ORDER__
+    };
+
+    std::ostream& operator<<(std::ostream& out, const hex_inline_wrap& h) {
+        using std::ptrdiff_t;
+
+        constexpr bool little = endian::native==endian::little;
+        ptrdiff_t width = h.width;
+        const unsigned char* from = h.from;
+        const unsigned char* end = h.from+h.size;
+        std::string buf;
+
+        auto emit = [&buf](unsigned char c) {
+            const char* digit = "0123456789abcdef";
+            buf += digit[(c>>4)&0xf];
+            buf += digit[c&0xf];
+        };
+
+        constexpr unsigned bufsz = 512;
+        unsigned bufmargin = 4*width+1;
+
+        buf.reserve(bufsz);
+        while (end-from>width) {
+            if (buf.size()+bufmargin>=bufsz) {
+                out << buf;
+                buf.clear();
+            }
+            for (ptrdiff_t i = 0; i<width; ++i) {
+                emit(little? from[width-i-1]: from[i]);
+            }
+            from += width;
+            buf += ' ';
+        }
+        for (ptrdiff_t i = 0; i<end-from; ++i) {
+            emit(little? from[width-i-1]: from[i]);
+        }
+
+        out << buf;
+        return out;
+    }
+
+} // namespace impl
+} // namespace io
+} // namespace arb
diff --git a/arbor/io/serialize_hex.hpp b/arbor/io/serialize_hex.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..55783891cb759d3a297c703467fc48fb85aa0c87
--- /dev/null
+++ b/arbor/io/serialize_hex.hpp
@@ -0,0 +1,37 @@
+#pragma once
+
+// Adaptor for hexadecimal output to a std::ostream.
+
+#include <iostream>
+
+namespace arb {
+namespace io {
+
+namespace impl {
+    // Wrapper for emitting values on an ostream as a sequence of hex digits.
+    struct hex_inline_wrap {
+        const unsigned char* from;
+        std::size_t size;
+        unsigned width;
+    };
+
+    std::ostream& operator<<(std::ostream&, const hex_inline_wrap&);
+} // namespace impl
+
+// Inline hexadecimal adaptor: group output in `width` bytes.
+
+template <typename T>
+impl::hex_inline_wrap hex_inline(const T& obj, unsigned width = 4) {
+    return impl::hex_inline_wrap{reinterpret_cast<const unsigned char*>(&obj), sizeof obj, width};
+}
+
+// Inline hexadecimal adaptor: print `n` bytes of data from `ptr`, grouping output in `width` bytes.
+
+template <typename T>
+impl::hex_inline_wrap hex_inline_n(const T* ptr, std::size_t n, unsigned width = 4) {
+    return impl::hex_inline_wrap{reinterpret_cast<const unsigned char*>(ptr), n, width};
+}
+
+} // namespace io
+} // namespace arb
+
diff --git a/arbor/io/trace.hpp b/arbor/io/trace.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..159847150fd9010ee22a08a99973be6d22c290a0
--- /dev/null
+++ b/arbor/io/trace.hpp
@@ -0,0 +1,77 @@
+#pragma once
+
+// Internal TRACE macros and formatters for debugging during
+// development.
+
+#include <iostream>
+#include <mutex>
+
+// Required for endianness macros:
+#include <sys/types.h>
+
+#include "io/locked_ostream.hpp"
+#include "io/sepval.hpp"
+#include "io/serialize_hex.hpp"
+
+
+// TRACE(expr1 [, expr2 ...])
+//
+// Emit current source location to std::cerr, followed by the
+// literal expressions expr1, ..., and then the values of those expressions.
+//
+// TRACE output is to std::cerr is serialized.
+
+#define TRACE(vars...) arb::impl::debug_emit_trace(__FILE__, __LINE__, #vars, ##vars)
+
+
+// DEBUG << ...;
+//
+// Emit arguments to std::cerr followed by a newline.
+// DEBUG output to std::cerr is serialized.
+
+#define DEBUG arb::impl::emit_nl_locked(std::cerr.rdbuf())
+
+
+namespace arb {
+
+namespace impl {
+    template <typename Head, typename... Tail>
+    void debug_emit_csv(std::ostream& out, const Head& head, const Tail&... tail) {
+        out << head;
+        if (sizeof...(tail)) {
+            out << ", ";
+        }
+        debug_emit_csv(out, tail...);
+    }
+
+    void debug_emit_trace_leader(std::ostream&, const char* file, int line, const char* vars);
+
+    struct emit_nl_locked: public io::locked_ostream {
+        emit_nl_locked(std::streambuf* buf):
+            io::locked_ostream(buf),
+            lock_(this->guard())
+        {}
+
+        emit_nl_locked(emit_nl_locked&&) = default;
+
+        ~emit_nl_locked() {
+            if (rdbuf()) {
+                (*this) << std::endl;
+            }
+        }
+
+    private:
+        std::unique_lock<std::mutex> lock_;
+    };
+
+    template <typename... Args>
+    void debug_emit_trace(const char* file, int line, const char* varlist, const Args&... args) {
+        impl::emit_nl_locked out(std::cerr.rdbuf());
+
+        out.precision(17);
+        impl::debug_emit_trace_leader(out, file, line, varlist);
+        impl::debug_emit_csv(out, args...);
+    }
+} // namespace impl
+
+} // namespace arb
diff --git a/src/lif_cell_description.hpp b/arbor/lif_cell_description.hpp
similarity index 100%
rename from src/lif_cell_description.hpp
rename to arbor/lif_cell_description.hpp
diff --git a/src/lif_cell_group.cpp b/arbor/lif_cell_group.cpp
similarity index 100%
rename from src/lif_cell_group.cpp
rename to arbor/lif_cell_group.cpp
diff --git a/src/lif_cell_group.hpp b/arbor/lif_cell_group.hpp
similarity index 96%
rename from src/lif_cell_group.hpp
rename to arbor/lif_cell_group.hpp
index 6bf52b3617e9241eeaf1628cac2cfb38d3bb240b..d4582a8aad9997c9bc01138dd7aece06589087c1 100644
--- a/src/lif_cell_group.hpp
+++ b/arbor/lif_cell_group.hpp
@@ -1,10 +1,9 @@
 #pragma once
 #include <algorithm>
-#include <threading/timer.hpp>
 #include <cell_group.hpp>
 #include <event_queue.hpp>
 #include <lif_cell_description.hpp>
-#include <profiling/profiler.hpp>
+#include "profile/profiler_macro.hpp"
 #include <recipe.hpp>
 #include <util/unique_any.hpp>
 #include <vector>
diff --git a/src/load_balance.hpp b/arbor/load_balance.hpp
similarity index 62%
rename from src/load_balance.hpp
rename to arbor/load_balance.hpp
index c786e59d85ae2e535a338115c961905413ea1446..0dbb0cf4b86b477da691714a6996e249a41c9d83 100644
--- a/src/load_balance.hpp
+++ b/arbor/load_balance.hpp
@@ -1,7 +1,8 @@
-#include <communication/distributed_context.hpp>
-#include <domain_decomposition.hpp>
-#include <hardware/node_info.hpp>
-#include <recipe.hpp>
+#include <arbor/distributed_context.hpp>
+
+#include "domain_decomposition.hpp"
+#include "hardware/node_info.hpp"
+#include "recipe.hpp"
 
 namespace arb {
 
diff --git a/src/math.hpp b/arbor/math.hpp
similarity index 100%
rename from src/math.hpp
rename to arbor/math.hpp
diff --git a/src/matrix.hpp b/arbor/matrix.hpp
similarity index 95%
rename from src/matrix.hpp
rename to arbor/matrix.hpp
index 949ebc7cbeb3d5df12dc1fd2718ce10bc3fee0ef..bab4c6efc9a45374b40f4c1944f3daf434611439 100644
--- a/src/matrix.hpp
+++ b/arbor/matrix.hpp
@@ -2,9 +2,9 @@
 
 #include <type_traits>
 
-#include <memory/memory.hpp>
+#include <arbor/assert.hpp>
 
-#include <util/debug.hpp>
+#include <memory/memory.hpp>
 #include <util/span.hpp>
 
 namespace arb {
@@ -40,7 +40,7 @@ public:
         cell_index_(ci.begin(), ci.end()),
         state_(pi, ci, cv_capacitance, face_conductance, cv_area)
     {
-        EXPECTS(cell_index_[num_cells()] == index_type(parent_index_.size()));
+        arb_assert(cell_index_[num_cells()] == index_type(parent_index_.size()));
     }
 
     /// the dimension of the matrix (i.e. the number of rows or colums)
diff --git a/src/mc_cell_group.cpp b/arbor/mc_cell_group.cpp
similarity index 92%
rename from src/mc_cell_group.cpp
rename to arbor/mc_cell_group.cpp
index d96f9062942cecba4fb3ece3f9f782da214e5593..6988757ef907baf90177ab1848c1dfeaf06e153f 100644
--- a/src/mc_cell_group.cpp
+++ b/arbor/mc_cell_group.cpp
@@ -2,24 +2,24 @@
 #include <unordered_map>
 #include <vector>
 
-#include <backends/event.hpp>
-#include <cell.hpp>
-#include <cell_group.hpp>
-#include <common_types.hpp>
-#include <cell_group.hpp>
-#include <event_binner.hpp>
-#include <event_queue.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <mc_cell_group.hpp>
-#include <recipe.hpp>
-#include <sampler_map.hpp>
-#include <sampling.hpp>
-#include <spike.hpp>
-#include <util/filter.hpp>
-#include <util/partition.hpp>
-
-#include <profiling/profiler.hpp>
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "backends/event.hpp"
+#include "cell.hpp"
+#include "cell_group.hpp"
+#include "cell_group.hpp"
+#include "event_binner.hpp"
+#include "event_queue.hpp"
+#include "fvm_lowered_cell.hpp"
+#include "mc_cell_group.hpp"
+#include "profile/profiler_macro.hpp"
+#include "recipe.hpp"
+#include "sampler_map.hpp"
+#include "sampling.hpp"
+#include "util/filter.hpp"
+#include "util/partition.hpp"
 
 namespace arb {
 
@@ -153,7 +153,7 @@ void mc_cell_group::advance(epoch ep, time_type dt, const event_lane_subrange& e
     PL();
 
     // Run integration and collect samples, spikes.
-    auto result = lowered_->integrate(ep.tfinal, dt, staged_events_, std::move(sample_events), util::is_debug_mode());
+    auto result = lowered_->integrate(ep.tfinal, dt, staged_events_, std::move(sample_events));
 
     // For each sampler callback registered in `call_info`, construct the
     // vector of sample entries from the lowered cell sample times and values
diff --git a/src/mc_cell_group.hpp b/arbor/mc_cell_group.hpp
similarity index 81%
rename from src/mc_cell_group.hpp
rename to arbor/mc_cell_group.hpp
index e98fc1596b06522dc26f69105f7ff4a8e4e0bf92..2609858b8f689c889c393c4f770a7d23e48c58ea 100644
--- a/src/mc_cell_group.hpp
+++ b/arbor/mc_cell_group.hpp
@@ -6,25 +6,25 @@
 #include <unordered_map>
 #include <vector>
 
-#include <backends/event.hpp>
-#include <cell_group.hpp>
-#include <cell.hpp>
-#include <common_types.hpp>
-#include <event_binner.hpp>
-#include <event_queue.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <recipe.hpp>
-#include <sampler_map.hpp>
-#include <sampling.hpp>
-#include <spike.hpp>
-#include <util/debug.hpp>
-#include <util/double_buffer.hpp>
-#include <util/filter.hpp>
-#include <util/partition.hpp>
-#include <util/range.hpp>
-#include <util/unique_any.hpp>
-
-#include <profiling/profiler.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "backends/event.hpp"
+#include "cell.hpp"
+#include "cell_group.hpp"
+#include "event_binner.hpp"
+#include "event_queue.hpp"
+#include "fvm_lowered_cell.hpp"
+#include "profile/profiler_macro.hpp"
+#include "recipe.hpp"
+#include "sampler_map.hpp"
+#include "sampling.hpp"
+#include "util/double_buffer.hpp"
+#include "util/filter.hpp"
+#include "util/partition.hpp"
+#include "util/range.hpp"
+#include "util/unique_any.hpp"
 
 namespace arb {
 
diff --git a/src/mechcat.cpp b/arbor/mechcat.cpp
similarity index 98%
rename from src/mechcat.cpp
rename to arbor/mechcat.cpp
index 83360802a9cf7867c490c4d7e751bf2bb5253084..112317715241cd51a0eb32b1fbecb4c34d78da9c 100644
--- a/src/mechcat.cpp
+++ b/arbor/mechcat.cpp
@@ -3,9 +3,10 @@
 #include <string>
 #include <vector>
 
-#include <mechcat.hpp>
-#include <util/maputil.hpp>
-#include <util/make_unique.hpp>
+#include <arbor/mechcat.hpp>
+
+#include "util/maputil.hpp"
+#include "util/make_unique.hpp"
 
 namespace arb {
 
diff --git a/src/memory/allocator.hpp b/arbor/memory/allocator.hpp
similarity index 74%
rename from src/memory/allocator.hpp
rename to arbor/memory/allocator.hpp
index 3774cd30fd5a794a84676e354d5445122203e95a..67983d7da588547cc326eed06c3ff16a25bb1ca1 100644
--- a/src/memory/allocator.hpp
+++ b/arbor/memory/allocator.hpp
@@ -2,14 +2,7 @@
 
 #include <limits>
 
-#ifdef ARB_HAVE_GPU
-#include <cuda.h>
-#include <cuda_runtime.h>
-#endif
-#ifdef WITH_KNL
-#include <hbwmalloc.h>
-#endif
-
+#include "cuda_wrappers.hpp"
 #include "definitions.hpp"
 #include "util.hpp"
 
@@ -96,48 +89,6 @@ namespace impl {
         }
     };
 
-#ifdef WITH_KNL
-    namespace knl {
-        // allocate memory with alignment specified as a template parameter
-        // returns nullptr on failure
-        template <typename T, size_type alignment=minimum_possible_alignment<T>()>
-        T* hbw_malloc(size_type size) {
-            // double check that alignment is a multiple of sizeof(void*),
-            // which is a prerequisite for posix_memalign()
-            static_assert( !(alignment%sizeof(void*)),
-                    "alignment is not a multiple of sizeof(void*)");
-            static_assert( is_power_of_two(alignment),
-                    "alignment is not a power of two");
-            void *ptr;
-            int result = hbw_posix_memalign(&ptr, alignment, size*sizeof(T));
-            if(result) {
-                return nullptr;
-            }
-            return reinterpret_cast<T*>(ptr);
-        }
-
-        template <size_type Alignment>
-        class hbw_policy {
-        public:
-            void *allocate_policy(size_type size) {
-                return reinterpret_cast<void *>(hbw_malloc<char, Alignment>(size));
-            }
-
-            void free_policy(void *ptr) {
-                hbw_free(ptr);
-            }
-
-            static constexpr size_type alignment() {
-                return Alignment;
-            }
-            static constexpr bool is_malloc_compatible() {
-                return true;
-            }
-        };
-    }
-#endif
-
-#ifdef ARB_HAVE_GPU
     namespace cuda {
         template <size_type Alignment>
         class pinned_policy {
@@ -152,13 +103,9 @@ namespace impl {
                 }
 
                 // register the memory with CUDA
-                auto status
-                    = cudaHostRegister(ptr, size, cudaHostRegisterPortable);
-
-                if(status != cudaSuccess) {
-                    LOG_ERROR("memory:: unable to register host memory with with cudaHostRegister");
+                if (!cuda_host_register(ptr, size)) {
                     free(ptr);
-                    return nullptr;
+                    ptr = nullptr;
                 }
 
                 return ptr;
@@ -168,7 +115,7 @@ namespace impl {
                 if (!ptr) {
                     return;
                 }
-                cudaHostUnregister(ptr);
+                cuda_host_unregister(ptr);
                 free(ptr);
             }
 
@@ -191,13 +138,7 @@ namespace impl {
                 if (!n) {
                     return nullptr;
                 }
-                void* ptr;
-                auto status = cudaMallocManaged(&ptr, n);
-                if (status != cudaSuccess) {
-                    LOG_ERROR("memory:: unable to allocate managed memory");
-                    ptr = nullptr;
-                }
-                return ptr;
+                return cuda_malloc_managed(n);
             }
 
             static constexpr size_type alignment() {
@@ -210,32 +151,18 @@ namespace impl {
             }
 
             void free_policy(void* p) {
-                if (p) {
-                    cudaFree(p);
-                }
+                cuda_free(p);
             }
         };
 
         class device_policy {
         public:
             void *allocate_policy(size_type size) {
-                void* ptr = nullptr;
-                auto status = cudaMalloc(&ptr, size);
-                if(status != cudaSuccess) {
-                    LOG_ERROR("CUDA: unable to allocate "+std::to_string(size)+" bytes");
-                    ptr = nullptr;
-                }
-
-                return ptr;
+                return cuda_malloc(size);
             }
 
             void free_policy(void *ptr) {
-                if(ptr) {
-                    auto status = cudaFree(ptr);
-                    if(status != cudaSuccess) {
-                        LOG_ERROR("CUDA: unable to free memory");
-                    }
-                }
+                cuda_free(ptr);
             }
 
             // memory allocated using cudaMalloc has alignment of 256 bytes
@@ -247,7 +174,6 @@ namespace impl {
             }
         };
     } // namespace cuda
-#endif // #ifdef ARB_HAVE_GPU
 } // namespace impl
 
 template<typename T, typename Policy >
@@ -325,7 +251,6 @@ namespace util {
         }
     };
 
-#ifdef ARB_HAVE_GPU
     template <size_t Alignment>
     struct type_printer<impl::cuda::pinned_policy<Alignment>>{
         static std::string print() {
@@ -348,7 +273,6 @@ namespace util {
             return std::string("managed_policy");
         }
     };
-#endif
 
     template <typename T, typename Policy>
     struct type_printer<allocator<T,Policy>>{
@@ -365,13 +289,6 @@ namespace util {
 template <class T, size_t alignment=impl::minimum_possible_alignment<T>()>
 using aligned_allocator = allocator<T, impl::aligned_policy<alignment>>;
 
-#ifdef WITH_KNL
-// align with 512 bit vector register size
-template <class T, size_t alignment=(512/8)>
-using hbw_allocator = allocator<T, impl::knl::hbw_policy<alignment>>;
-#endif
-
-#ifdef ARB_HAVE_GPU
 // For pinned and allocation set the default alignment to correspond to
 // the alignment of 1024 bytes, because pinned memory is allocated at
 // page boundaries. It is allocated at page boundaries (typically 4k),
@@ -386,7 +303,5 @@ using managed_allocator = allocator<T, impl::cuda::managed_policy<alignment>>;
 template <class T, size_t alignment=256>
 using cuda_allocator = allocator<T, impl::cuda::device_policy>;
 
-#endif
-
 } // namespace memory
 } // namespace arb
diff --git a/src/memory/array.hpp b/arbor/memory/array.hpp
similarity index 99%
rename from src/memory/array.hpp
rename to arbor/memory/array.hpp
index 4382b9653aeed084c935ac34312370ee7141e2c1..e2915e6ff332abb483d96a1f9b519d9d632d5ac8 100644
--- a/src/memory/array.hpp
+++ b/arbor/memory/array.hpp
@@ -10,7 +10,8 @@
 #include <iostream>
 #include <type_traits>
 
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+
 #include <util/range.hpp>
 
 #include "definitions.hpp"
@@ -228,7 +229,7 @@ public:
         // Only valid for contiguous range, but we can't test that at compile time.
         // Can check though that taking &*b+n = &*e where n = e-b, while acknowledging
         // this is not fail safe.
-        EXPECTS(&*b+(e-b)==&*e);
+        arb_assert(&*b+(e-b)==&*e);
 
         using V = typename std::iterator_traits<iterator>::value_type;
         coordinator_.copy(const_array_view<V, host_coordinator<V, aligned_allocator<V>>>(&*b, e-b), view_type(*this));
diff --git a/src/memory/array_view.hpp b/arbor/memory/array_view.hpp
similarity index 96%
rename from src/memory/array_view.hpp
rename to arbor/memory/array_view.hpp
index df89aae97d30cf768aba822f0c963e1d42c498c6..3c187761892611d37767816d3d2f4da9597957e2 100644
--- a/src/memory/array_view.hpp
+++ b/arbor/memory/array_view.hpp
@@ -5,10 +5,11 @@
 #include <type_traits>
 #include <vector>
 
+#include <arbor/assert.hpp>
+
 #include "definitions.hpp"
 #include "util.hpp"
 #include "range_limits.hpp"
-#include <util/debug.hpp>
 
 namespace arb {
 namespace memory{
@@ -163,7 +164,7 @@ public:
     explicit array_view(array_view& other, size_type n) :
         pointer_(other.data()), size_(n)
     {
-        EXPECTS(n<=other.size());
+        arb_assert(n<=other.size());
         #ifdef VERBOSE
         std::cout << util::green("array_view(array_view, size_type)")
                   << "\n  this  " << util::pretty_printer<array_view>::print(*this)
@@ -180,14 +181,14 @@ public:
     /// access half open sub-range using two indexes [left, right)
     view_type operator()(size_type left, size_type right) {
         #ifndef NDEBUG
-        EXPECTS(right<=size_ && left<=right);
+        arb_assert(right<=size_ && left<=right);
         #endif
         return view_type(pointer_+left, right-left);
     }
 
     const_view_type operator()(size_type left, size_type right) const {
         #ifndef NDEBUG
-        EXPECTS(right<=size_ && left<=right);
+        arb_assert(right<=size_ && left<=right);
         #endif
         return view_type(pointer_+left, right-left);
     }
@@ -195,14 +196,14 @@ public:
     /// access half open sub-range using one index and one-past-the-end [left, end)
     view_type operator()(size_type left, end_type) {
         #ifndef NDEBUG
-        EXPECTS(left<=size_);
+        arb_assert(left<=size_);
         #endif
         return view_type(pointer_+left, size_-left);
     }
 
     const_view_type operator()(size_type left, end_type) const {
         #ifndef NDEBUG
-        EXPECTS(left<=size_);
+        arb_assert(left<=size_);
         #endif
         return view_type(pointer_+left, size_-left);
     }
@@ -260,14 +261,14 @@ public:
     // return a reference type provided by Coordinator
     reference operator[] (size_type i) {
         #ifndef NDEBUG
-        EXPECTS(i<size_);
+        arb_assert(i<size_);
         #endif
         return coordinator_.make_reference(pointer_+i);
     }
 
     const_reference operator[] (size_type i) const {
         #ifndef NDEBUG
-        EXPECTS(i<size_);
+        arb_assert(i<size_);
         #endif
         return coordinator_.make_reference(pointer_+i);
     }
@@ -367,13 +368,13 @@ public:
     explicit const_array_view(view_type& other, size_type n) :
         pointer_(other.data()), size_(n)
     {
-        EXPECTS(n<=other.size());
+        arb_assert(n<=other.size());
     }
 
     explicit const_array_view(const_array_view& other, size_type n) :
         pointer_(other.data()), size_(n)
     {
-        EXPECTS(n<=other.size());
+        arb_assert(n<=other.size());
     }
 
     explicit const_array_view() {
@@ -385,7 +386,7 @@ public:
     /// access half open sub-range using two indexes [left, right)
     const_view_type operator()(size_type left, size_type right) const {
 #ifndef NDEBUG
-        EXPECTS(right<=size_ && left<=right);
+        arb_assert(right<=size_ && left<=right);
 #endif
         return const_view_type(pointer_+left, right-left);
     }
@@ -393,7 +394,7 @@ public:
     /// access half open sub-range using one index and one-past-the-end [left, end)
     const_view_type operator()(size_type left, end_type) const {
 #ifndef NDEBUG
-        EXPECTS(left<=size_);
+        arb_assert(left<=size_);
 #endif
         return const_view_type(pointer_+left, size_-left);
     }
@@ -439,7 +440,7 @@ public:
     // return a reference type provided by Coordinator
     const_reference operator[] (size_type i) const {
         #ifndef NDEBUG
-        EXPECTS(i<size_);
+        arb_assert(i<size_);
         #endif
         return coordinator_.make_reference(pointer_+i);
     }
diff --git a/src/memory/copy.hpp b/arbor/memory/copy.hpp
similarity index 95%
rename from src/memory/copy.hpp
rename to arbor/memory/copy.hpp
index 3c8a84312efbd29f6d0cc312332cdcf77137046d..9d864376abfc5a1f536bb52821affc068c9fc35a 100644
--- a/src/memory/copy.hpp
+++ b/arbor/memory/copy.hpp
@@ -2,7 +2,7 @@
 
 #include <type_traits>
 
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
 
 #include "wrappers.hpp"
 
@@ -12,7 +12,7 @@ namespace memory {
 template <typename LHS, typename RHS>
 void copy(LHS&& from, RHS&& to) {
 #ifndef NDEBUG
-    EXPECTS(from.size() == to.size());
+    arb_assert(from.size() == to.size());
 #endif
 #ifdef VERBOSE
     std::cerr
diff --git a/arbor/memory/cuda_wrappers.cpp b/arbor/memory/cuda_wrappers.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7e694d7e347bb2847987444ee9d7b2ee6b680af7
--- /dev/null
+++ b/arbor/memory/cuda_wrappers.cpp
@@ -0,0 +1,128 @@
+#include <cstdlib>
+#include <string>
+
+#include "util.hpp"
+
+#ifdef ARB_HAVE_GPU
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#define LOG_CUDA_ERROR(error, msg)\
+LOG_ERROR("memory:: "+std::string(__func__)+" "+std::string((msg))+": "+cudaGetErrorString(error))
+
+namespace arb {
+namespace memory {
+
+using std::to_string;
+
+void cuda_memcpy_d2d(void* dest, const void* src, std::size_t n) {
+    if (auto error = cudaMemcpy(dest, src, n, cudaMemcpyDeviceToDevice)) {
+        LOG_CUDA_ERROR(error, "n="+to_string(n));
+        abort();
+    }
+}
+
+void cuda_memcpy_d2h(void* dest, const void* src, std::size_t n) {
+    if (auto error = cudaMemcpy(dest, src, n, cudaMemcpyDeviceToHost)) {
+        LOG_CUDA_ERROR(error, "n="+to_string(n));
+        abort();
+    }
+}
+
+void cuda_memcpy_h2d(void* dest, const void* src, std::size_t n) {
+    if (auto error = cudaMemcpy(dest, src, n, cudaMemcpyHostToDevice)) {
+        LOG_CUDA_ERROR(error, "n="+to_string(n));
+        abort();
+    }
+}
+
+void* cuda_host_register(void* ptr, std::size_t size) {
+    if (auto error = cudaHostRegister(ptr, size, cudaHostRegisterPortable)) {
+        LOG_CUDA_ERROR(error, "unable to register host memory");
+        return nullptr;
+    }
+    return ptr;
+}
+
+void cuda_host_unregister(void* ptr) {
+    cudaHostUnregister(ptr);
+}
+
+void* cuda_malloc(std::size_t n) {
+    void* ptr;
+
+    if (auto error = cudaMalloc(&ptr, n)) {
+        LOG_CUDA_ERROR(error, "unable to allocate "+to_string(n)+" bytes");
+        ptr = nullptr;
+    }
+    return ptr;
+}
+
+void* cuda_malloc_managed(std::size_t n) {
+    void* ptr;
+
+    if (auto error = cudaMallocManaged(&ptr, n)) {
+        LOG_CUDA_ERROR(error, "unable to allocate "+to_string(n)+" bytes");
+        ptr = nullptr;
+    }
+    return ptr;
+}
+
+void cuda_free(void* ptr) {
+    if (auto error = cudaFree(ptr)) {
+        LOG_CUDA_ERROR(error, "");
+    }
+}
+
+} // namespace memory
+} // namespace arb
+
+#else
+
+#define NOCUDA \
+LOG_ERROR("memory:: "+std::string(__func__)+"(): no CUDA support")
+
+namespace arb {
+namespace memory {
+
+void cuda_memcpy_d2d(void* dest, const void* src, std::size_t n) {
+    NOCUDA;
+}
+
+void cuda_memcpy_d2h(void* dest, const void* src, std::size_t n) {
+    NOCUDA;
+}
+
+void cuda_memcpy_h2d(void* dest, const void* src, std::size_t n) {
+    NOCUDA;
+}
+
+void* cuda_host_register(void* ptr, std::size_t size) {
+    NOCUDA;
+    return 0;
+}
+
+void cuda_host_unregister(void* ptr) {
+    NOCUDA;
+}
+
+void* cuda_malloc(std::size_t n) {
+    NOCUDA;
+    return 0;
+}
+
+void* cuda_malloc_managed(std::size_t n) {
+    NOCUDA;
+    return 0;
+}
+
+void cuda_free(void* ptr) {
+    NOCUDA;
+}
+
+} // namespace memory
+} // namespace arb
+
+#endif // def ARB_HAVE_GPU
+
diff --git a/arbor/memory/cuda_wrappers.hpp b/arbor/memory/cuda_wrappers.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..04e014acb774bea0084b3e7340fa4cf12bd55983
--- /dev/null
+++ b/arbor/memory/cuda_wrappers.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+namespace arb {
+namespace memory {
+
+void cuda_memcpy_d2d(void* dest, const void* src, std::size_t n);
+void cuda_memcpy_d2h(void* dest, const void* src, std::size_t n);
+void cuda_memcpy_h2d(void* dest, const void* src, std::size_t n);
+void* cuda_host_register(void* ptr, std::size_t size);
+void cuda_host_unregister(void* ptr);
+void* cuda_malloc(std::size_t n);
+void* cuda_malloc_managed(std::size_t n);
+void cuda_free(void* ptr);
+
+} // namespace memory
+} // namespace arb
diff --git a/src/memory/definitions.hpp b/arbor/memory/definitions.hpp
similarity index 100%
rename from src/memory/definitions.hpp
rename to arbor/memory/definitions.hpp
diff --git a/src/memory/device_coordinator.hpp b/arbor/memory/device_coordinator.hpp
similarity index 77%
rename from src/memory/device_coordinator.hpp
rename to arbor/memory/device_coordinator.hpp
index bb8303f11ee157ca6b9bec0988b0f856179de868..812684f4e44721413a6019180be80bce595bd6a2 100644
--- a/src/memory/device_coordinator.hpp
+++ b/arbor/memory/device_coordinator.hpp
@@ -3,13 +3,13 @@
 #include <cstdint>
 #include <exception>
 
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
 
 #include "allocator.hpp"
 #include "array.hpp"
+#include "cuda_wrappers.hpp"
 #include "definitions.hpp"
 #include "fill.hpp"
-#include "gpu.hpp"
 #include "util.hpp"
 
 namespace arb {
@@ -56,13 +56,8 @@ public:
 
     operator T() const {
         T tmp;
-        auto success
-            = cudaMemcpy(&tmp, pointer_, sizeof(T), cudaMemcpyDeviceToHost);
-        if(success != cudaSuccess) {
-            LOG_ERROR("cudaMemcpy(d2h, " + std::to_string(sizeof(T)) + ") " + cudaGetErrorString(success));
-            abort();
-        }
-        return T(tmp);
+        cuda_memcpy_d2h(&tmp, pointer_, sizeof(T));
+        return tmp;
     }
 
 protected:
@@ -80,25 +75,15 @@ public:
 
     device_reference(pointer p) : pointer_(p) {}
 
-    device_reference& operator = (const T& value) {
-        auto success =
-            cudaMemcpy(pointer_, &value, sizeof(T), cudaMemcpyHostToDevice);
-        if(success != cudaSuccess) {
-            LOG_ERROR("cudaMemcpy(h2d, " + std::to_string(sizeof(T)) + ") " + cudaGetErrorString(success));
-            abort();
-        }
+    device_reference& operator=(const T& value) {
+        cuda_memcpy_h2d(pointer_, &value, sizeof(T));
         return *this;
     }
 
     operator T() const {
         T tmp;
-        auto success =
-            cudaMemcpy(&tmp, pointer_, sizeof(T), cudaMemcpyDeviceToHost);
-        if(success != cudaSuccess) {
-            LOG_ERROR("cudaMemcpy(d2h, " + std::to_string(sizeof(T)) + ") " + cudaGetErrorString(success));
-            abort();
-        }
-        return T(tmp);
+        cuda_memcpy_d2h(&tmp, pointer_, sizeof(T));
+        return tmp;
     }
 
 private:
@@ -156,21 +141,16 @@ public:
 
     // copy memory from one gpu range to another
     void copy(const_view_type from, view_type to) {
-    //template<typename Alloc1, typename Alloc2>
-    //void copy(
-        //const_array_view<value_type, device_coordinator<value_type, Alloc1>> from,
-        //array_view<value_type, device_coordinator<value_type, Alloc2>> to)
-    //{
         #ifdef VERBOSE
         std::cerr << util::type_printer<device_coordinator>::print()
                   << util::blue("::copy") << "(size=" << from.size() << ") "
                   << util::print_pointer(from.data()) << " -> "
                   << util::print_pointer(to.data()) << "\n";
         #endif
-        EXPECTS(from.size()==to.size());
-        EXPECTS(!from.overlaps(to));
+        arb_assert(from.size()==to.size());
+        arb_assert(!from.overlaps(to));
 
-        gpu::memcpy_d2d(from.data(), to.data(), from.size());
+        cuda_memcpy_d2d(to.data(), from.data(), from.size()*sizeof(value_type));
     }
 
     // copy memory from gpu to host
@@ -185,9 +165,9 @@ public:
                   << util::print_pointer(from.data()) << " -> "
                   << util::print_pointer(to.data()) << "\n";
         #endif
-        EXPECTS(from.size()==to.size());
+        arb_assert(from.size()==to.size());
 
-        gpu::memcpy_d2h(from.data(), to.data(), from.size());
+        cuda_memcpy_d2h(to.data(), from.data(), from.size()*sizeof(value_type));
     }
 
     // copy memory from host to gpu
@@ -202,9 +182,9 @@ public:
                   << util::print_pointer(from.data()) << " -> "
                   << util::print_pointer(to.data()) << "\n";
         #endif
-        EXPECTS(from.size()==to.size());
+        arb_assert(from.size()==to.size());
 
-        gpu::memcpy_h2d(from.data(), to.data(), from.size());
+        cuda_memcpy_h2d(to.data(), from.data(), from.size()*sizeof(value_type));
     }
 
     // copy from pinned memory to device
@@ -218,7 +198,7 @@ public:
         std::cerr << util::type_printer<device_coordinator>::print()
                   << util::blue("::copy") << "(size=" << from.size() << ") " << from.data() << " -> " << to.data() << "\n";
         #endif
-        EXPECTS(from.size()==to.size());
+        arb_assert(from.size()==to.size());
 
         #ifdef VERBOSE
         using oType = array_view< value_type, host_coordinator< value_type, pinned_allocator< value_type, alignment>>>;
@@ -229,16 +209,7 @@ public:
                   << util::print_pointer(to.data()) << "\n";
         #endif
 
-        auto status = cudaMemcpy(
-                reinterpret_cast<void*>(to.begin()),
-                reinterpret_cast<const void*>(from.begin()),
-                from.size()*sizeof(value_type),
-                cudaMemcpyHostToDevice
-        );
-        if(status != cudaSuccess) {
-            LOG_ERROR("cudaMemcpy(h2d, " + std::to_string(sizeof(T)*from.size()) + ") " + cudaGetErrorString(status));
-            abort();
-        }
+        cuda_memcpy_h2d(to.begin(), from.begin(), from.size()*sizeof(value_type));
     }
 
     // generates compile time error if there is an attempt to copy from memory
diff --git a/src/memory/fill.cu b/arbor/memory/fill.cu
similarity index 100%
rename from src/memory/fill.cu
rename to arbor/memory/fill.cu
diff --git a/src/memory/fill.hpp b/arbor/memory/fill.hpp
similarity index 100%
rename from src/memory/fill.hpp
rename to arbor/memory/fill.hpp
diff --git a/src/memory/host_coordinator.hpp b/arbor/memory/host_coordinator.hpp
similarity index 93%
rename from src/memory/host_coordinator.hpp
rename to arbor/memory/host_coordinator.hpp
index 607fb954b55f75613e32d2e0a238554d42cff0e2..19f770df6d9cbe1ba00b98ca3fc84af43900476c 100644
--- a/src/memory/host_coordinator.hpp
+++ b/arbor/memory/host_coordinator.hpp
@@ -4,17 +4,14 @@
 #include <memory>
 #include <string>
 
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
 
+#include "cuda_wrappers.hpp"
 #include "definitions.hpp"
 #include "array.hpp"
 #include "allocator.hpp"
 #include "util.hpp"
 
-#ifdef ARB_HAVE_GPU
-#include "gpu.hpp"
-#endif
-
 namespace arb {
 namespace memory {
 
@@ -22,10 +19,8 @@ namespace memory {
 template <typename T, class Allocator>
 class host_coordinator;
 
-#ifdef ARB_HAVE_GPU
 template <typename T, class Allocator>
 class device_coordinator;
-#endif
 
 namespace util {
     template <typename T, typename Allocator>
@@ -108,8 +103,8 @@ public:
         const_array_view<value_type, host_coordinator<value_type, Allocator1>> from,
         array_view<value_type, host_coordinator<value_type, Allocator2>> to)
     {
-        EXPECTS(from.size()==to.size());
-        EXPECTS(!from.overlaps(to));
+        arb_assert(from.size()==to.size());
+        arb_assert(!from.overlaps(to));
 
         #ifdef VERBOSE
         using c1 = host_coordinator<value_type, Allocator1>;
@@ -123,14 +118,13 @@ public:
         std::copy(from.begin(), from.end(), to.begin());
     }
 
-#ifdef ARB_HAVE_GPU
     // copy memory from device to host
     template <class Alloc>
     void copy(
         const_array_view<value_type, device_coordinator<value_type, Alloc>> from,
         view_type to)
     {
-        EXPECTS(from.size()==to.size());
+        arb_assert(from.size()==to.size());
 
         #ifdef VERBOSE
         std::cerr << util::type_printer<host_coordinator>::print()
@@ -140,7 +134,7 @@ public:
                   << util::print_pointer(to.data()) << std::endl;
         #endif
 
-        gpu::memcpy_d2h(from.data(), to.data(), from.size());
+        cuda_memcpy_d2h(to.data(), from.data(), from.size()*sizeof(value_type));
     }
 
     // copy memory from host to device
@@ -149,7 +143,7 @@ public:
         const_view_type from,
         array_view<value_type, device_coordinator<value_type, Alloc>> to)
     {
-        EXPECTS(from.size()==to.size());
+        arb_assert(from.size()==to.size());
 
         #ifdef VERBOSE
         std::cerr << util::type_printer<host_coordinator>::print()
@@ -159,9 +153,8 @@ public:
                   << util::print_pointer(to.data()) << std::endl;
         #endif
 
-        gpu::memcpy_h2d(from.data(), to.data(), from.size());
+        cuda_memcpy_h2d(to.data(), from.data(), from.size()*sizeof(value_type));
     }
-#endif
 
     // set all values in a range to val
     void set(view_type rng, value_type val) {
diff --git a/src/memory/memory.hpp b/arbor/memory/memory.hpp
similarity index 86%
rename from src/memory/memory.hpp
rename to arbor/memory/memory.hpp
index 3404ef7ca8b3973f240192d1c4b2af6acb3e5cbc..7c2aa871db4595b368a7cb0a84f1dcf412de7200 100644
--- a/src/memory/memory.hpp
+++ b/arbor/memory/memory.hpp
@@ -5,10 +5,7 @@
 #include "array.hpp"
 #include "definitions.hpp"
 #include "host_coordinator.hpp"
-
-#ifdef ARB_HAVE_GPU
 #include "device_coordinator.hpp"
-#endif
 
 namespace arb {
 namespace memory {
@@ -30,7 +27,6 @@ std::ostream& operator<< (std::ostream& o, host_view<T> const& v) {
     return o;
 }
 
-#ifdef ARB_HAVE_GPU
 // specialization for pinned vectors. Use a host_coordinator, because memory is
 // in the host memory space, and all of the helpers (copy, set, etc) are the
 // same with and without page locked memory
@@ -59,15 +55,6 @@ std::ostream& operator<<(std::ostream& o, const_device_view<T> v) {
     for (; i<v.size()-1; ++i) o << v[i] << ", ";
     return o << v[i];
 }
-#endif
-
-#ifdef WITH_KNL
-// specialization for HBW memory on KNL
-template <typename T>
-using hwb_vector = array<T, host_coordinator<T, hwb_allocator<T>>>;
-template <typename T>
-using hwb_view = array_view<T, host_coordinator<T, hwb_allocator<T>>>;
-#endif
 
 } // namespace memory
 } // namespace arb
diff --git a/src/memory/range_limits.hpp b/arbor/memory/range_limits.hpp
similarity index 100%
rename from src/memory/range_limits.hpp
rename to arbor/memory/range_limits.hpp
diff --git a/arbor/memory/util.cpp b/arbor/memory/util.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..371b738b6c7c63b24974e2ee3779ed92bb329283
--- /dev/null
+++ b/arbor/memory/util.cpp
@@ -0,0 +1,20 @@
+#include <iostream>
+#include <string>
+
+#include "util/unwind.hpp"
+#include "util.hpp"
+
+namespace arb {
+namespace memory {
+namespace util {
+
+void log_error(const char* file, int line, const std::string& msg) {
+    std::cerr
+        << arb::util::backtrace()
+        << red("runtime error") << " @ "
+        << white(file) << ":" << line << "\n    " << msg << std::endl;
+}
+
+} // namespace util
+} // namespace memory
+} // namespace arb
diff --git a/src/memory/util.hpp b/arbor/memory/util.hpp
similarity index 85%
rename from src/memory/util.hpp
rename to arbor/memory/util.hpp
index 9d5cdcf3988f973f31053ba4587c36738a56b352..86d8dbd36ab2f0afa8b8ea424274afdfe2ca7aed 100644
--- a/src/memory/util.hpp
+++ b/arbor/memory/util.hpp
@@ -4,9 +4,7 @@
 #include <sstream>
 #include <vector>
 
-#include <util/debug.hpp>
-
-#define LOG_ERROR(msg) arb::util::backtrace().print(); util::log_error(__FILE__, __LINE__, msg)
+#define LOG_ERROR(msg) util::log_error(__FILE__, __LINE__, msg)
 
 namespace arb {
 namespace memory {
@@ -79,11 +77,7 @@ std::string print_pointer(const T* ptr) {
     return yellow(s.str());
 }
 
-inline void log_error(const char* file, int line, std::string msg) {
-    std::cerr
-        << red("runtime error") << " @ "
-        << white(file) << ":" << line << "\n    " << msg << std::endl;
-}
+void log_error(const char* file, int line, const std::string& msg);
 
 } // namespace util
 } // namespace memory
diff --git a/src/memory/wrappers.hpp b/arbor/memory/wrappers.hpp
similarity index 96%
rename from src/memory/wrappers.hpp
rename to arbor/memory/wrappers.hpp
index 0371370927c05b03cae5450e4c18ab134124dc02..f51daa032855a40bc243bd46d6ceb05dd92f12bf 100644
--- a/src/memory/wrappers.hpp
+++ b/arbor/memory/wrappers.hpp
@@ -5,11 +5,6 @@
 
 #include <memory/memory.hpp>
 
-#ifdef ARB_HAVE_GPU
-#include <cuda.h>
-#include <cuda_runtime.h>
-#endif
-
 namespace arb {
 namespace memory {
 
@@ -95,7 +90,6 @@ namespace util {
         return is_on_host<typename std::decay<T>::type>::value;
     }
 
-    #ifdef ARB_HAVE_GPU
     template <typename T>
     struct is_on_gpu : std::false_type {};
 
@@ -112,7 +106,6 @@ namespace util {
     constexpr bool is_on_gpu_v() {
         return is_on_gpu<typename std::decay<T>::type>::value;
     }
-    #endif
 }
 
 
@@ -131,7 +124,6 @@ auto on_host(const C& c) -> decltype(make_const_view(c)) {
     return make_const_view(c);
 }
 
-#ifdef ARB_HAVE_GPU
 template <
     typename C,
     typename = typename std::enable_if<util::is_on_gpu_v<C>()>::type
@@ -158,7 +150,6 @@ auto on_gpu(const C& c) -> device_vector<typename C::value_type> {
     using T = typename C::value_type;
     return device_vector<T>(make_const_view(c));
 }
-#endif
 
 } // namespace memory
 } // namespace arb
diff --git a/src/merge_events.cpp b/arbor/merge_events.cpp
similarity index 97%
rename from src/merge_events.cpp
rename to arbor/merge_events.cpp
index 937a61a74561ccedd8049c3215a066d6b907e4ef..fcd31dc3831d4451b9a469f00faaa446d27c4eb0 100644
--- a/src/merge_events.cpp
+++ b/arbor/merge_events.cpp
@@ -10,7 +10,7 @@
 #include <util/filter.hpp>
 #include <util/span.hpp>
 #include <util/unique_any.hpp>
-#include <profiling/profiler.hpp>
+#include "profile/profiler_macro.hpp"
 
 namespace arb {
 
@@ -34,9 +34,9 @@ tourney_tree::tourney_tree(std::vector<event_generator>& input):
     n_lanes_(input_.size())
 {
     // Must have at least 1 queue
-    EXPECTS(n_lanes_);
+    arb_assert(n_lanes_);
     // Maximum value in unsigned limits how many queues we can have
-    EXPECTS(n_lanes_<(1u<<(sizeof(unsigned)*8u-1u)));
+    arb_assert(n_lanes_<(1u<<(sizeof(unsigned)*8u-1u)));
 
     leaves_ = next_power_2(n_lanes_);
     nodes_ = 2u*(leaves_-1u)+1u; // 2*l-1 with overflow protection
@@ -167,7 +167,7 @@ void merge_events(time_type t0, time_type t1,
         //           generators to be delivered in the time interval [t₀, t₁).
         //  Step 2 : Use std::merge to append events in lc and events with
         //           delivery times in the interval [t₁, ∞).
-        EXPECTS(generators.size()>2u);
+        arb_assert(generators.size()>2u);
 
         PE(communication_enqueue_setup);
         // Make an event generator with all the events in events.
diff --git a/src/merge_events.hpp b/arbor/merge_events.hpp
similarity index 98%
rename from src/merge_events.hpp
rename to arbor/merge_events.hpp
index df1596ccd9be123688d3760dd50b2ef782042f5c..eb0d2873a66abb90aa0e5215e274553fe29450a4 100644
--- a/src/merge_events.hpp
+++ b/arbor/merge_events.hpp
@@ -5,7 +5,7 @@
 
 #include <event_generator.hpp>
 #include <event_queue.hpp>
-#include <profiling/profiler.hpp>
+#include "profile/profiler_macro.hpp"
 
 namespace arb {
 
diff --git a/src/morphology.cpp b/arbor/morphology.cpp
similarity index 100%
rename from src/morphology.cpp
rename to arbor/morphology.cpp
diff --git a/src/morphology.hpp b/arbor/morphology.hpp
similarity index 100%
rename from src/morphology.hpp
rename to arbor/morphology.hpp
diff --git a/src/partition_load_balance.cpp b/arbor/partition_load_balance.cpp
similarity index 94%
rename from src/partition_load_balance.cpp
rename to arbor/partition_load_balance.cpp
index 9be17786619f01ad8f5881498fbdcf10d35fcf38..9cff1bd27e9456b38a53bf3683378ccbe6a900d6 100644
--- a/src/partition_load_balance.cpp
+++ b/arbor/partition_load_balance.cpp
@@ -1,8 +1,9 @@
-#include <communication/distributed_context.hpp>
-#include <domain_decomposition.hpp>
-#include <hardware/node_info.hpp>
-#include <recipe.hpp>
-#include <util/enumhash.hpp>
+#include <arbor/distributed_context.hpp>
+#include <arbor/util/enumhash.hpp>
+
+#include "domain_decomposition.hpp"
+#include "hardware/node_info.hpp"
+#include "recipe.hpp"
 
 namespace arb {
 
diff --git a/src/point.hpp b/arbor/point.hpp
similarity index 100%
rename from src/point.hpp
rename to arbor/point.hpp
diff --git a/arbor/profile/clock.cpp b/arbor/profile/clock.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bd73d4aae733f46ef0a54c13a946b6928596b792
--- /dev/null
+++ b/arbor/profile/clock.cpp
@@ -0,0 +1,31 @@
+#define _POSIX_C_SOURCE 200809L
+#include <time.h>
+
+// Keep implementation out of header in order to avoid
+// global namespace pollution from <time.h>.
+
+#include <arbor/profile/clock.hpp>
+
+namespace arb {
+namespace profile {
+
+inline tick_type posix_clock_gettime_ns(clockid_t clock) {
+    timespec ts;
+    if (clock_gettime(clock, &ts)) {
+        return (unsigned long long)-1;
+    }
+
+    // According to SUS, we can assume tv_nsec is in [0, 1e9).
+
+    tick_type seconds = ts.tv_sec;
+    tick_type nanoseconds = 1000000000UL*seconds+(tick_type)ts.tv_nsec;
+
+    return nanoseconds;
+};
+
+tick_type posix_clock_gettime_monotonic_ns() {
+    return posix_clock_gettime_ns(CLOCK_MONOTONIC);
+}
+
+} // namespace profile
+} // namespace arb
diff --git a/src/profiling/memory_meter.cpp b/arbor/profile/memory_meter.cpp
similarity index 83%
rename from src/profiling/memory_meter.cpp
rename to arbor/profile/memory_meter.cpp
index 611eeac2f99274be99d6e68381e08c48fae2c4f1..6109d506c2395911db7d63145bf0bf727c6c6004 100644
--- a/src/profiling/memory_meter.cpp
+++ b/arbor/profile/memory_meter.cpp
@@ -1,18 +1,13 @@
 #include <string>
 #include <vector>
 
-#include <util/config.hpp>
-#include <hardware/memory.hpp>
+#include <arbor/profile/meter.hpp>
 
+#include "hardware/memory.hpp"
 #include "memory_meter.hpp"
-#include "meter.hpp"
 
 namespace arb {
-namespace util {
-
-//
-//  memory_meter
-//
+namespace profile {
 
 class memory_meter: public meter {
 protected:
@@ -43,18 +38,16 @@ public:
 };
 
 meter_ptr make_memory_meter() {
-    if (not config::has_memory_measurement) {
+    if (hw::allocated_memory()==-1) {
         return nullptr;
     }
     return meter_ptr(new memory_meter());
 }
 
-//
-//  gpu_memory_meter
-//
 
 // The gpu memory meter specializes the reading and name methods of the basic
 // memory_meter.
+
 class gpu_memory_meter: public memory_meter {
 public:
     std::string name() override {
@@ -67,11 +60,11 @@ public:
 };
 
 meter_ptr make_gpu_memory_meter() {
-    if (not config::has_cuda) {
+    if (hw::gpu_allocated_memory()==-1) {
         return nullptr;
     }
     return meter_ptr(new gpu_memory_meter());
 }
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/src/profiling/memory_meter.hpp b/arbor/profile/memory_meter.hpp
similarity index 60%
rename from src/profiling/memory_meter.hpp
rename to arbor/profile/memory_meter.hpp
index 4242e3c9bacd6e021a7cb5c7fdad94c20bf210e5..4bf0f9107b5e259292e9935eb2cd274aa3118937 100644
--- a/src/profiling/memory_meter.hpp
+++ b/arbor/profile/memory_meter.hpp
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "meter.hpp"
+#include <arbor/profile/meter.hpp>
 
 namespace arb {
-namespace util {
+namespace profile {
 
 meter_ptr make_memory_meter();
 meter_ptr make_gpu_memory_meter();
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/src/profiling/meter_manager.cpp b/arbor/profile/meter_manager.cpp
similarity index 82%
rename from src/profiling/meter_manager.cpp
rename to arbor/profile/meter_manager.cpp
index 34d50b8a9ead2e4dce90bde73624e2a1ab463cbd..525420355817a237e419f12e8928092a13200326 100644
--- a/src/profiling/meter_manager.cpp
+++ b/arbor/profile/meter_manager.cpp
@@ -1,16 +1,21 @@
-#include <communication/distributed_context.hpp>
-#include <algorithms.hpp>
-#include <util/hostname.hpp>
-#include <util/strprintf.hpp>
-#include <util/rangeutil.hpp>
-#include <json/json.hpp>
-
-#include "meter_manager.hpp"
+#include <arbor/profile/timer.hpp>
+
+#include <arbor/distributed_context.hpp>
+#include <arbor/profile/meter_manager.hpp>
+
 #include "memory_meter.hpp"
 #include "power_meter.hpp"
 
+#include "algorithms.hpp"
+#include "util/hostname.hpp"
+#include "util/strprintf.hpp"
+#include "util/rangeutil.hpp"
+
 namespace arb {
-namespace util {
+namespace profile {
+
+using timer_type = timer<>;
+using util::strprintf;
 
 measurement::measurement(std::string n, std::string u,
                          const std::vector<double>& readings,
@@ -43,7 +48,7 @@ meter_manager::meter_manager(const distributed_context* ctx): glob_ctx_(ctx) {
 };
 
 void meter_manager::start() {
-    EXPECTS(!started_);
+    arb_assert(!started_);
 
     started_ = true;
 
@@ -60,11 +65,10 @@ void meter_manager::start() {
 
 
 void meter_manager::checkpoint(std::string name) {
-    EXPECTS(started_);
+    arb_assert(started_);
 
     // Record the time taken on this domain since the last checkpoint
-    auto end_time = timer_type::tic();
-    times_.push_back(timer_type::difference(start_time_, end_time));
+    times_.push_back(timer<>::toc(start_time_));
 
     // Update meters
     checkpoint_names_.push_back(std::move(name));
@@ -74,7 +78,7 @@ void meter_manager::checkpoint(std::string name) {
 
     // Synchronize all domains before setting start time for the next interval
     glob_ctx_->barrier();
-    start_time_ = timer_type::tic();
+    start_time_ = timer<>::tic();
 }
 
 const std::vector<std::unique_ptr<meter>>& meter_manager::meters() const {
@@ -93,19 +97,6 @@ const distributed_context* meter_manager::context() const {
     return glob_ctx_;
 }
 
-nlohmann::json to_json(const measurement& mnt) {
-    nlohmann::json measurements;
-    for (const auto& m: mnt.measurements) {
-        measurements.push_back(m);
-    }
-
-    return {
-        {"name", mnt.name},
-        {"units", mnt.units},
-        {"measurements", measurements}
-    };
-}
-
 // Build a report of meters, for use at the end of a simulation
 // for output to file or analysis.
 meter_report make_meter_report(const meter_manager& manager) {
@@ -116,14 +107,14 @@ meter_report make_meter_report(const meter_manager& manager) {
     // Add the times to the meter outputs
     report.meters.push_back(measurement("time", "s", manager.times(), ctx));
 
-    // Gather the meter outputs into a json Array
+    // Gather the meter outputs.
     for (auto& m: manager.meters()) {
         report.meters.push_back(
             measurement(m->name(), m->units(), m->measurements(), ctx));
     }
 
     // Gather a vector with the names of the node that each rank is running on.
-    auto host = hostname();
+    auto host = util::hostname();
     auto hosts = ctx->gather(host? *host: "unknown", 0);
     report.hosts = hosts;
 
@@ -139,15 +130,6 @@ meter_report make_meter_report(const meter_manager& manager) {
     return report;
 }
 
-nlohmann::json to_json(const meter_report& report) {
-    return {
-        {"checkpoints", report.checkpoints},
-        {"num_domains", report.num_domains},
-        {"meters", util::transform_view(report.meters, [](measurement const& m){return to_json(m);})},
-        {"hosts", report.hosts},
-    };
-}
-
 // Print easy to read report of meters to a stream.
 std::ostream& operator<<(std::ostream& o, const meter_report& report) {
     o << "\n---- meters -------------------------------------------------------------------------------\n";
diff --git a/src/profiling/meters.md b/arbor/profile/meters.md
similarity index 100%
rename from src/profiling/meters.md
rename to arbor/profile/meters.md
diff --git a/src/profiling/power_meter.cpp b/arbor/profile/power_meter.cpp
similarity index 81%
rename from src/profiling/power_meter.cpp
rename to arbor/profile/power_meter.cpp
index 9432253e929c161c6e29fc2b8f66eac2a8d4671a..aa5adf02de1b7e41a7015cfac256cc1a52fa46f0 100644
--- a/src/profiling/power_meter.cpp
+++ b/arbor/profile/power_meter.cpp
@@ -1,13 +1,12 @@
 #include <string>
 #include <vector>
 
-#include "meter.hpp"
+#include <arbor/profile/meter.hpp>
 
-#include <util/config.hpp>
-#include <hardware/power.hpp>
+#include "hardware/power.hpp"
 
 namespace arb {
-namespace util {
+namespace profile {
 
 class power_meter: public meter {
     std::vector<hw::energy_size_type> readings_;
@@ -37,11 +36,11 @@ public:
 };
 
 meter_ptr make_power_meter() {
-    if (not config::has_power_measurement) {
+    if (!arb::hw::has_energy_measurement()) {
         return nullptr;
     }
     return meter_ptr(new power_meter());
 }
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/src/profiling/power_meter.hpp b/arbor/profile/power_meter.hpp
similarity index 51%
rename from src/profiling/power_meter.hpp
rename to arbor/profile/power_meter.hpp
index f781964fd7baf30f72cfc8794fe71c1d512610a3..fdf5f0ad851fa1d1b4557885a842e7b9fca5de52 100644
--- a/src/profiling/power_meter.hpp
+++ b/arbor/profile/power_meter.hpp
@@ -1,11 +1,11 @@
 #pragma once
 
-#include "meter.hpp"
+#include <arbor/profile/meter.hpp>
 
 namespace arb {
-namespace util {
+namespace profile {
 
 meter_ptr make_power_meter();
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/src/profiling/profiler.cpp b/arbor/profile/profiler.cpp
similarity index 97%
rename from src/profiling/profiler.cpp
rename to arbor/profile/profiler.cpp
index 0403a9953a9eaa18f3a69f44f471cda2f42fbad4..7a9ff84edf3a1a42f00eb49f89594695afdb3619 100644
--- a/src/profiling/profiler.cpp
+++ b/arbor/profile/profiler.cpp
@@ -1,16 +1,18 @@
 #include <cstdio>
+#include <mutex>
 #include <ostream>
 
-#include <util/span.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/profile/profiler.hpp>
 
-#include "profiler.hpp"
+#include "threading/threading.hpp"
+#include "util/span.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
-namespace util {
+namespace profile {
 
-using timer_type = arb::threading::timer;
-using time_point = timer_type::time_point;
+using timer_type = timer<>;
+using util::make_span;
 
 #ifdef ARB_HAVE_PROFILING
 namespace {
@@ -56,7 +58,7 @@ class recorder {
     // If set to npos, no region is being timed.
     region_id_type index_ = npos;
 
-    time_point start_time_;
+    tick_type start_time_;
 
     // One accumulator for call count and wall time for each region.
     std::vector<profile_accumulator> accumulators_;
@@ -237,7 +239,6 @@ profile profiler::results() const {
 
 profile_node make_profile_tree(const profile& p) {
     using std::vector;
-    using util::make_span;
     using util::assign_from;
     using util::transform_view;
 
@@ -331,8 +332,6 @@ void profiler_enter(region_id_type region_id) {
 std::ostream& operator<<(std::ostream& o, const profile& prof) {
     char buf[80];
 
-    using util::make_span;
-
     auto tree = make_profile_tree(prof);
 
     snprintf(buf, util::size(buf), "_p_ %-20s%12s%12s%12s%8s", "REGION", "CALLS", "THREAD", "WALL", "\%");
@@ -357,5 +356,5 @@ std::ostream& operator<<(std::ostream& o, const profile&) {return o;}
 
 #endif // ARB_HAVE_PROFILING
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/arbor/profile/profiler_macro.hpp b/arbor/profile/profiler_macro.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7565c5b63b6a46a8ecdda2c85beca74a1ef48ac0
--- /dev/null
+++ b/arbor/profile/profiler_macro.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <arbor/profile/profiler.hpp>
+
+#ifdef ARB_HAVE_PROFILING
+
+    // enter a profiling region
+    #define PE(name) \
+        { \
+            static std::size_t region_id_ = arb::profile::profiler_region_id(#name); \
+            arb::profile::profiler_enter(region_id_); \
+        }
+
+    // leave a profling region
+    #define PL arb::profile::profiler_leave
+
+#else
+
+    #define PE(name)
+    #define PL()
+
+#endif
+
diff --git a/src/recipe.hpp b/arbor/recipe.hpp
similarity index 95%
rename from src/recipe.hpp
rename to arbor/recipe.hpp
index a03518fd69734476e61af57702c84e7376280bb5..1ce9453d9594e02563fe515151debd871c334d3e 100644
--- a/src/recipe.hpp
+++ b/arbor/recipe.hpp
@@ -5,10 +5,11 @@
 #include <unordered_map>
 #include <stdexcept>
 
-#include <cell.hpp>
-#include <common_types.hpp>
-#include <event_generator.hpp>
-#include <util/unique_any.hpp>
+#include <arbor/common_types.hpp>
+
+#include "cell.hpp"
+#include "event_generator.hpp"
+#include "util/unique_any.hpp"
 
 namespace arb {
 
diff --git a/src/sampler_map.hpp b/arbor/sampler_map.hpp
similarity index 92%
rename from src/sampler_map.hpp
rename to arbor/sampler_map.hpp
index 7d5fc5c8084b24a79161429b1382f6b69430d789..cd6a157666660f000f573e8bb2a84aa1d4020525 100644
--- a/src/sampler_map.hpp
+++ b/arbor/sampler_map.hpp
@@ -9,11 +9,12 @@
 #include <mutex>
 #include <unordered_map>
 
-#include <common_types.hpp>
-#include <sampling.hpp>
-#include <schedule.hpp>
-#include <util/deduce_return.hpp>
-#include <util/transform.hpp>
+#include <arbor/common_types.hpp>
+
+#include "sampling.hpp"
+#include "schedule.hpp"
+#include "util/deduce_return.hpp"
+#include "util/transform.hpp"
 
 namespace arb {
 
diff --git a/src/sampling.hpp b/arbor/sampling.hpp
similarity index 92%
rename from src/sampling.hpp
rename to arbor/sampling.hpp
index aa2c82a8ed0b6f2f96219c5950b949c6172ce571..0433763317596005829b53dabae1c94700d14c59 100644
--- a/src/sampling.hpp
+++ b/arbor/sampling.hpp
@@ -3,8 +3,9 @@
 #include <cstddef>
 #include <functional>
 
-#include <common_types.hpp>
-#include <util/any_ptr.hpp>
+#include <arbor/common_types.hpp>
+
+#include "util/any_ptr.hpp"
 
 namespace arb {
 
diff --git a/src/schedule.cpp b/arbor/schedule.cpp
similarity index 94%
rename from src/schedule.cpp
rename to arbor/schedule.cpp
index 6fd82067720887e33e683f51fdcc44cc9acf5214..18880296422e2d9e4ec4d10268e41d0263c9e52c 100644
--- a/src/schedule.cpp
+++ b/arbor/schedule.cpp
@@ -3,8 +3,9 @@
 #include <utility>
 #include <vector>
 
-#include <common_types.hpp>
-#include <schedule.hpp>
+#include <arbor/common_types.hpp>
+
+#include "schedule.hpp"
 
 // Implementations for specific schedules.
 
diff --git a/src/schedule.hpp b/arbor/schedule.hpp
similarity index 95%
rename from src/schedule.hpp
rename to arbor/schedule.hpp
index 3f7788c2c74337861975a4c540cb4942a1d606e3..66086a841b1090fd97a1853354a2c601d1b9d7ed 100644
--- a/src/schedule.hpp
+++ b/arbor/schedule.hpp
@@ -5,10 +5,11 @@
 #include <random>
 #include <vector>
 
-#include <common_types.hpp>
-#include <util/compat.hpp>
-#include <util/debug.hpp>
-#include <util/meta.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/util/compat.hpp>
+
+#include "util/meta.hpp"
 
 // Time schedules for probe–sampler associations.
 
@@ -106,7 +107,7 @@ public:
         start_index_(0),
         times_(std::begin(seq), compat::end(seq))
     {
-        EXPECTS(std::is_sorted(times_.begin(), times_.end()));
+        arb_assert(std::is_sorted(times_.begin(), times_.end()));
     }
 
     void reset() {
@@ -133,7 +134,7 @@ public:
     poisson_schedule_impl(time_type tstart, time_type mean_dt, const RandomNumberEngine& rng):
         tstart_(tstart), exp_(1./mean_dt), rng_(rng), reset_state_(rng), next_(tstart)
     {
-        EXPECTS(tstart_>=0);
+        arb_assert(tstart_>=0);
         step();
     }
 
diff --git a/src/segment.hpp b/arbor/segment.hpp
similarity index 96%
rename from src/segment.hpp
rename to arbor/segment.hpp
index 43f8b21ebf7f69218f3ca29decf0ffd2589096c9..32a2a7f59da9ed171ea6d87c19ced59cea8aac81 100644
--- a/src/segment.hpp
+++ b/arbor/segment.hpp
@@ -6,15 +6,16 @@
 #include <unordered_map>
 #include <vector>
 
-#include <algorithms.hpp>
-#include <common_types.hpp>
-#include <compartment.hpp>
-#include <math.hpp>
-#include <morphology.hpp>
-#include <mechinfo.hpp>
-#include <point.hpp>
-#include <util/make_unique.hpp>
-#include <util/maputil.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/mechinfo.hpp>
+
+#include "algorithms.hpp"
+#include "compartment.hpp"
+#include "math.hpp"
+#include "morphology.hpp"
+#include "point.hpp"
+#include "util/make_unique.hpp"
+#include "util/maputil.hpp"
 
 namespace arb {
 
@@ -272,7 +273,7 @@ public:
     cable_segment(section_kind k, std::vector<value_type> r, std::vector<value_type> lens):
         segment(k), radii_(std::move(r)), lengths_(std::move(lens))
     {
-        EXPECTS(kind_==section_kind::dendrite || kind_==section_kind::axon);
+        arb_assert(kind_==section_kind::dendrite || kind_==section_kind::axon);
     }
 
     cable_segment(section_kind k, value_type r1, value_type r2, value_type len):
@@ -284,7 +285,7 @@ public:
     cable_segment(section_kind k, std::vector<value_type> r, std::vector<point_type> p):
         segment(k), radii_(std::move(r)), locations_(std::move(p))
     {
-        EXPECTS(kind_==section_kind::dendrite || kind_==section_kind::axon);
+        arb_assert(kind_==section_kind::dendrite || kind_==section_kind::axon);
         update_lengths();
     }
 
diff --git a/src/simd/approx.hpp b/arbor/simd/approx.hpp
similarity index 100%
rename from src/simd/approx.hpp
rename to arbor/simd/approx.hpp
diff --git a/src/simd/avx.hpp b/arbor/simd/avx.hpp
similarity index 100%
rename from src/simd/avx.hpp
rename to arbor/simd/avx.hpp
diff --git a/src/simd/avx512.hpp b/arbor/simd/avx512.hpp
similarity index 100%
rename from src/simd/avx512.hpp
rename to arbor/simd/avx512.hpp
diff --git a/src/simd/generic.hpp b/arbor/simd/generic.hpp
similarity index 100%
rename from src/simd/generic.hpp
rename to arbor/simd/generic.hpp
diff --git a/src/simd/implbase.hpp b/arbor/simd/implbase.hpp
similarity index 100%
rename from src/simd/implbase.hpp
rename to arbor/simd/implbase.hpp
diff --git a/src/simd/native.hpp b/arbor/simd/native.hpp
similarity index 100%
rename from src/simd/native.hpp
rename to arbor/simd/native.hpp
diff --git a/src/simd/simd.hpp b/arbor/simd/simd.hpp
similarity index 99%
rename from src/simd/simd.hpp
rename to arbor/simd/simd.hpp
index 02d1ad62beb413997574c224e524a4e02ecbe4c2..b863a283324ceaf6990036c028644d80a2772c97 100644
--- a/src/simd/simd.hpp
+++ b/arbor/simd/simd.hpp
@@ -7,7 +7,6 @@
 #include <simd/implbase.hpp>
 #include <simd/generic.hpp>
 #include <simd/native.hpp>
-#include <common_types.hpp>
 
 namespace arb {
 namespace simd {
diff --git a/src/simd/simd_io.hpp b/arbor/simd/simd_io.hpp
similarity index 100%
rename from src/simd/simd_io.hpp
rename to arbor/simd/simd_io.hpp
diff --git a/src/simple_sampler.hpp b/arbor/simple_sampler.hpp
similarity index 89%
rename from src/simple_sampler.hpp
rename to arbor/simple_sampler.hpp
index 5961c9da0d6d2d46fae97d34aa1319bc0a1bc0c8..8d85ab809084f67c6d0556a98adeb7b874a45247 100644
--- a/src/simple_sampler.hpp
+++ b/arbor/simple_sampler.hpp
@@ -7,14 +7,13 @@
 
 #include <vector>
 
-#include <common_types.hpp>
-#include <sampling.hpp>
-#include <util/any_ptr.hpp>
-#include <util/deduce_return.hpp>
-#include <util/span.hpp>
-#include <util/transform.hpp>
-
-#include <iostream>
+#include <arbor/common_types.hpp>
+
+#include "sampling.hpp"
+#include "util/any_ptr.hpp"
+#include "util/deduce_return.hpp"
+#include "util/span.hpp"
+#include "util/transform.hpp"
 
 namespace arb {
 
diff --git a/src/simulation.cpp b/arbor/simulation.cpp
similarity index 87%
rename from src/simulation.cpp
rename to arbor/simulation.cpp
index ca5beb589cd4009b1778c0514d5484dadaf13886..0fbde656c28450fd0c59fd1b5f60c1f9bc630cdd 100644
--- a/src/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -1,3 +1,4 @@
+#include <memory>
 #include <set>
 #include <vector>
 
@@ -8,17 +9,40 @@
 #include <merge_events.hpp>
 #include <simulation.hpp>
 #include <recipe.hpp>
+#include <thread_private_spike_store.hpp>
+#include <util/double_buffer.hpp>
 #include <util/filter.hpp>
 #include <util/span.hpp>
 #include <util/unique_any.hpp>
-#include <profiling/profiler.hpp>
+#include "profile/profiler_macro.hpp"
 
 namespace arb {
 
+class spike_double_buffer {
+    util::double_buffer<thread_private_spike_store> buffer_;
+
+public:
+    // Convenience functions that map the spike buffers onto the appropriate
+    // integration interval.
+    //
+    // To overlap communication and computation, integration intervals of
+    // size Delta/2 are used, where Delta is the minimum delay in the global
+    // system.
+    // From the frame of reference of the current integration period we
+    // define three intervals: previous, current and future
+    // Then we define the following :
+    //      current:  spikes generated in the current interval
+    //      previous: spikes generated in the preceding interval
+
+    thread_private_spike_store& current()  { return buffer_.get(); }
+    thread_private_spike_store& previous() { return buffer_.other(); }
+    void exchange() { buffer_.exchange(); }
+};
+
 simulation::simulation(const recipe& rec,
                        const domain_decomposition& decomp,
                        const distributed_context* ctx):
-    context_(ctx),
+    local_spikes_(new spike_double_buffer{}),
     communicator_(rec, decomp, ctx)
 {
     const auto num_local_cells = communicator_.num_local_cells();
@@ -68,6 +92,8 @@ simulation::simulation(const recipe& rec,
     event_lanes_[1].resize(num_local_cells);
 }
 
+simulation::~simulation() = default;
+
 void simulation::reset() {
     t_ = 0.;
 
@@ -97,8 +123,8 @@ void simulation::reset() {
 
     communicator_.reset();
 
-    current_spikes().clear();
-    previous_spikes().clear();
+    local_spikes_->current().clear();
+    local_spikes_->previous().clear();
 }
 
 time_type simulation::run(time_type tfinal, time_type dt) {
@@ -121,7 +147,7 @@ time_type simulation::run(time_type tfinal, time_type dt) {
                     communicator_.group_queue_range(i));
                 group->advance(epoch_, dt, queues);
                 PE(advance_spikes);
-                current_spikes().insert(group->spikes());
+                local_spikes_->current().insert(group->spikes());
                 group->clear_spikes();
                 PL();
             });
@@ -133,7 +159,7 @@ time_type simulation::run(time_type tfinal, time_type dt) {
     // integration period at the latest.
     auto exchange = [&] () {
         PE(communication_exchange_gatherlocal);
-        auto local_spikes = previous_spikes().gather();
+        auto local_spikes = local_spikes_->previous().gather();
         PL();
         auto global_spikes = communicator_.exchange(local_spikes);
 
@@ -155,11 +181,11 @@ time_type simulation::run(time_type tfinal, time_type dt) {
     epoch_ = epoch(0, tuntil);
     setup_events(t_, tuntil, 1);
     while (t_<tfinal) {
-        local_spikes_.exchange();
+        local_spikes_->exchange();
 
         // empty the spike buffers for the current integration period.
         // these buffers will store the new spikes generated in update_cells.
-        current_spikes().clear();
+        local_spikes_->current().clear();
 
         // run the tasks, overlapping if the threading model and number of
         // available threads permits it.
@@ -175,7 +201,7 @@ time_type simulation::run(time_type tfinal, time_type dt) {
     }
 
     // Run the exchange one last time to ensure that all spikes are output to file.
-    local_spikes_.exchange();
+    local_spikes_->exchange();
     exchange();
 
     return t_;
diff --git a/src/simulation.hpp b/arbor/simulation.hpp
similarity index 67%
rename from src/simulation.hpp
rename to arbor/simulation.hpp
index 0acf7419ac05b95526397653f59ea6e52c5fc6c4..9c1787429bf40ad61e69063e5658008be51fe0df 100644
--- a/src/simulation.hpp
+++ b/arbor/simulation.hpp
@@ -1,25 +1,27 @@
 #pragma once
 
 #include <array>
+#include <memory>
 #include <unordered_map>
 #include <vector>
 
-#include <backends.hpp>
-#include <cell_group.hpp>
-#include <common_types.hpp>
-#include <communication/communicator.hpp>
-#include <communication/distributed_context.hpp>
-#include <domain_decomposition.hpp>
-#include <epoch.hpp>
-#include <recipe.hpp>
-#include <sampling.hpp>
-#include <thread_private_spike_store.hpp>
-#include <util/nop.hpp>
-#include <util/handle_set.hpp>
-#include <util/unique_any.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/distributed_context.hpp>
+
+#include "backends.hpp"
+#include "cell_group.hpp"
+#include "communication/communicator.hpp"
+#include "domain_decomposition.hpp"
+#include "epoch.hpp"
+#include "recipe.hpp"
+#include "sampling.hpp"
+#include "util/nop.hpp"
+#include "util/handle_set.hpp"
 
 namespace arb {
 
+class spike_double_buffer;
+
 class simulation {
 public:
     using spike_export_function = std::function<void(const std::vector<spike>&)>;
@@ -58,6 +60,8 @@ public:
     // are to be delivered at or after the current simulation time.
     void inject_events(const pse_vector& events);
 
+    ~simulation();
+
 private:
     // Private helper function that sets up the event lanes for an epoch.
     // See comments on implementation for more information.
@@ -67,9 +71,6 @@ private:
 
     std::size_t num_groups() const;
 
-    // communication context
-    const distributed_context* context_;
-
     // keep track of information about the current integration interval
     epoch epoch_;
 
@@ -80,8 +81,7 @@ private:
     // one set of event_generators for each local cell
     std::vector<std::vector<event_generator>> event_generators_;
 
-    using local_spike_store_type = thread_private_spike_store;
-    util::double_buffer<local_spike_store_type> local_spikes_;
+    std::unique_ptr<spike_double_buffer> local_spikes_;
 
     spike_export_function global_export_callback_ = util::nop_function;
     spike_export_function local_export_callback_ = util::nop_function;
@@ -93,21 +93,6 @@ private:
 
     communicator communicator_;
 
-    // Convenience functions that map the spike buffers onto the appropriate
-    // integration interval.
-    //
-    // To overlap communication and computation, integration intervals of
-    // size Delta/2 are used, where Delta is the minimum delay in the global
-    // system.
-    // From the frame of reference of the current integration period we
-    // define three intervals: previous, current and future
-    // Then we define the following :
-    //      current_spikes : spikes generated in the current interval
-    //      previous_spikes: spikes generated in the preceding interval
-
-    local_spike_store_type& current_spikes()  { return local_spikes_.get(); }
-    local_spike_store_type& previous_spikes() { return local_spikes_.other(); }
-
     // Pending events to be delivered.
     std::array<std::vector<pse_vector>, 2> event_lanes_;
     std::vector<pse_vector> pending_events_;
diff --git a/src/spike_source_cell.hpp b/arbor/spike_source_cell.hpp
similarity index 100%
rename from src/spike_source_cell.hpp
rename to arbor/spike_source_cell.hpp
diff --git a/src/spike_source_cell_group.cpp b/arbor/spike_source_cell_group.cpp
similarity index 96%
rename from src/spike_source_cell_group.cpp
rename to arbor/spike_source_cell_group.cpp
index 1cbdaade5e1b9e230fff25b1d8eb3e80173d91bf..0bef83033e042e5f03979fe2db44939d1565c5b5 100644
--- a/src/spike_source_cell_group.cpp
+++ b/arbor/spike_source_cell_group.cpp
@@ -1,8 +1,9 @@
 #include <exception>
 
 #include <cell_group.hpp>
-#include <profiling/profiler.hpp>
+#include "profile/profiler_macro.hpp"
 #include <recipe.hpp>
+#include <spike_source_cell.hpp>
 #include <spike_source_cell_group.hpp>
 #include <time_sequence.hpp>
 
diff --git a/src/spike_source_cell_group.hpp b/arbor/spike_source_cell_group.hpp
similarity index 95%
rename from src/spike_source_cell_group.hpp
rename to arbor/spike_source_cell_group.hpp
index 6f81367f54e7fcbb69ac20aa670f47183426d70b..d4d5249790c0575f8015bb24501d23ec622a1373 100644
--- a/src/spike_source_cell_group.hpp
+++ b/arbor/spike_source_cell_group.hpp
@@ -6,10 +6,6 @@
 
 namespace arb {
 
-struct spike_source_cell {
-    time_seq seq;
-};
-
 class spike_source_cell_group: public cell_group {
 public:
     spike_source_cell_group(std::vector<cell_gid_type> gids, const recipe& rec);
diff --git a/src/swcio.cpp b/arbor/swcio.cpp
similarity index 99%
rename from src/swcio.cpp
rename to arbor/swcio.cpp
index 7a3c2720d7a956597c4394a0edea321b149c81db..02f49f0d6f1a5618239ec85a6cb2c16be1d5e089 100644
--- a/src/swcio.cpp
+++ b/arbor/swcio.cpp
@@ -5,12 +5,13 @@
 #include <sstream>
 #include <unordered_set>
 
+#include <arbor/assert.hpp>
+
 #include <algorithms.hpp>
 #include <cell.hpp>
 #include <morphology.hpp>
 #include <point.hpp>
 #include <swcio.hpp>
-#include <util/debug.hpp>
 
 namespace arb {
 namespace io {
diff --git a/src/swcio.hpp b/arbor/swcio.hpp
similarity index 98%
rename from src/swcio.hpp
rename to arbor/swcio.hpp
index ee347995e896ed66f235d959f4f6e9f08a49bf4c..849962938abfacdc284893a9946acb52f9c605ea 100644
--- a/src/swcio.hpp
+++ b/arbor/swcio.hpp
@@ -8,10 +8,11 @@
 #include <unordered_set>
 #include <vector>
 
+#include <arbor/assert.hpp>
+
 #include <algorithms.hpp>
 #include <morphology.hpp>
 #include <point.hpp>
-#include <util/debug.hpp>
 
 namespace arb {
 namespace io {
@@ -125,7 +126,7 @@ morphology swc_as_morphology(const RandomAccessSequence& swc_records) {
     auto parent_branch_index = algorithms::tree_reduce(swc_parent_index, branch_index);
 
     // sanity check
-    EXPECTS(parent_branch_index.size() == branch_index.size() - 1);
+    arb_assert(parent_branch_index.size() == branch_index.size() - 1);
 
     // Add the soma first; then the segments
     const auto& soma = swc_records[0];
diff --git a/arbor/thread_private_spike_store.cpp b/arbor/thread_private_spike_store.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..883d234cc0d9aad94a1b56dfdf37652170731e0b
--- /dev/null
+++ b/arbor/thread_private_spike_store.cpp
@@ -0,0 +1,45 @@
+#include <vector>
+
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "thread_private_spike_store.hpp"
+
+namespace arb {
+
+struct local_spike_store_type {
+    threading::enumerable_thread_specific<std::vector<spike>> buffers_;
+};
+
+thread_private_spike_store::thread_private_spike_store():
+    impl_(new local_spike_store_type)
+{}
+
+thread_private_spike_store::~thread_private_spike_store() {}
+
+std::vector<spike> thread_private_spike_store::gather() const {
+    std::vector<spike> spikes;
+    unsigned num_spikes = 0u;
+    for (auto& b: impl_->buffers_) {
+        num_spikes += b.size();
+    }
+    spikes.reserve(num_spikes);
+
+    for (auto& b: impl_->buffers_) {
+        spikes.insert(spikes.begin(), b.begin(), b.end());
+    }
+
+    return spikes;
+}
+
+std::vector<spike>& thread_private_spike_store::get() {
+    return impl_->buffers_.local();
+}
+
+void thread_private_spike_store::clear() {
+    for (auto& b: impl_->buffers_) {
+        b.clear();
+    }
+}
+
+} // namespace arb
diff --git a/arbor/thread_private_spike_store.hpp b/arbor/thread_private_spike_store.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9e1829efb594460586637e2e598d1bc9a1a891ad
--- /dev/null
+++ b/arbor/thread_private_spike_store.hpp
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <arbor/common_types.hpp>
+#include <arbor/spike.hpp>
+
+#include "threading/threading.hpp"
+
+namespace arb {
+
+struct local_spike_store_type;
+
+/// Handles the complexity of managing thread private buffers of spikes.
+/// Internally stores one thread private buffer of spikes for each hardware thread.
+/// This can be accessed directly using the get() method, which returns a reference to
+/// The thread private buffer of the calling thread.
+/// The insert() and gather() methods add a vector of spikes to the buffer,
+/// and collate all of the buffers into a single vector respectively.
+class thread_private_spike_store {
+public :
+    thread_private_spike_store();
+    ~thread_private_spike_store();
+
+    /// Collate all of the individual buffers into a single vector of spikes.
+    /// Does not modify the buffer contents.
+    std::vector<spike> gather() const;
+
+    /// Return a reference to the thread private buffer of the calling thread
+    std::vector<spike>& get();
+
+    /// Clear all of the thread private buffers
+    void clear();
+
+    /// Append the passed spikes to the end of the thread private buffer of the
+    /// calling thread
+    void insert(const std::vector<spike>& spikes) {
+        auto& buff = get();
+        buff.insert(buff.end(), spikes.begin(), spikes.end());
+    }
+
+private :
+    /// thread private storage for accumulating spikes
+    std::unique_ptr<local_spike_store_type> impl_;
+};
+
+} // namespace arb
diff --git a/arbor/threadinfo.cpp b/arbor/threadinfo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..42df2933fd7dcb3d9ced5825ae5ee4f1df1c5d66
--- /dev/null
+++ b/arbor/threadinfo.cpp
@@ -0,0 +1,17 @@
+#include <string>
+
+#include <arbor/threadinfo.hpp>
+
+#include "threading/threading.hpp"
+
+namespace arb {
+
+int num_threads() {
+    return threading::num_threads();
+}
+
+std::string thread_implementation() {
+    return threading::description();
+}
+
+} // namespace arb
diff --git a/src/threading/cthread.cpp b/arbor/threading/cthread.cpp
similarity index 100%
rename from src/threading/cthread.cpp
rename to arbor/threading/cthread.cpp
diff --git a/src/threading/cthread.hpp b/arbor/threading/cthread.hpp
similarity index 56%
rename from src/threading/cthread.hpp
rename to arbor/threading/cthread.hpp
index da70a2b54d6490eb8e74fd179de7eee3f2b4dc34..b2a6142be763205ad9d6c99e37d4f61e2f12ed07 100644
--- a/src/threading/cthread.hpp
+++ b/arbor/threading/cthread.hpp
@@ -1,9 +1,5 @@
 #pragma once
 
-#if !defined(ARB_HAVE_CTHREAD)
-    #error "this header can only be loaded if ARB_HAVE_CTHREAD is set"
-#endif
-
 // task_group definition
 #include "cthread_impl.hpp"
 
diff --git a/src/threading/cthread_impl.hpp b/arbor/threading/cthread_impl.hpp
similarity index 99%
rename from src/threading/cthread_impl.hpp
rename to arbor/threading/cthread_impl.hpp
index fcdaa676278297e66b64672f7922f7cc7f4f647a..52e84be05f9d2778ee860b8c1695a673240c2e7e 100644
--- a/src/threading/cthread_impl.hpp
+++ b/arbor/threading/cthread_impl.hpp
@@ -17,14 +17,12 @@
 
 #include <cstdlib>
 
-#include "timer.hpp"
-
 namespace arb {
 namespace threading {
+inline namespace cthread {
 
 // Forward declare task_group at bottom of this header
 class task_group;
-using arb::threading::impl::timer;
 
 namespace impl {
 
@@ -267,5 +265,6 @@ inline std::size_t thread_id() {
     return impl::task_pool::get_global_task_pool().get_current_thread();
 }
 
+} // namespace cthread
 } // namespace threading
 } // namespace arb
diff --git a/src/threading/cthread_sort.hpp b/arbor/threading/cthread_sort.hpp
similarity index 89%
rename from src/threading/cthread_sort.hpp
rename to arbor/threading/cthread_sort.hpp
index 32630e74eaa53b5731ffac81776d07994307015b..631b2117831d0980fb44078e32853ea4a8f89a29 100644
--- a/src/threading/cthread_sort.hpp
+++ b/arbor/threading/cthread_sort.hpp
@@ -2,6 +2,7 @@
 
 namespace arb {
 namespace threading {
+inline namespace cthread {
 
 template <typename RandomIt>
 void sort(RandomIt begin, RandomIt end) {
@@ -18,5 +19,6 @@ void sort(Container& c) {
     std::sort(std::begin(c), std::end(c));
 }
 
+} // namespace cthread
 } // namespace threading
 } // namespace arb
diff --git a/src/threading/serial.hpp b/arbor/threading/serial.hpp
similarity index 94%
rename from src/threading/serial.hpp
rename to arbor/threading/serial.hpp
index 857898e313af1469fcee2d70aa4f4b09243139a1..103818f636488599eba7095daf7e3871191f7cbb 100644
--- a/src/threading/serial.hpp
+++ b/arbor/threading/serial.hpp
@@ -1,21 +1,14 @@
 #pragma once
 
-#if !defined(ARB_HAVE_SERIAL)
-    #error "this header can only be loaded if ARB_HAVE_SERIAL is set"
-#endif
-
 #include <algorithm>
 #include <array>
 #include <chrono>
 #include <string>
 #include <vector>
 
-#include "timer.hpp"
-
 namespace arb {
 namespace threading {
-
-using arb::threading::impl::timer;
+inline namespace serial {
 
 ///////////////////////////////////////////////////////////////////////
 // types
@@ -123,6 +116,7 @@ public:
     {}
 };
 
+} // namespace serial
 } // namespace threading
 } // namespace arb
 
diff --git a/arbor/threading/tbb.hpp b/arbor/threading/tbb.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b4c2af94ae8eebc959e422813c68c3ea7c1d7615
--- /dev/null
+++ b/arbor/threading/tbb.hpp
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <atomic>
+#include <string>
+
+#include <tbb/tbb.h>
+#include <tbb/tbb_stddef.h>
+#include <tbb/compat/thread>
+#include <tbb/enumerable_thread_specific.h>
+
+namespace arb {
+namespace threading {
+inline namespace tbb {
+
+template <typename T>
+using enumerable_thread_specific = ::tbb::enumerable_thread_specific<T>;
+
+struct parallel_for {
+    template <typename F>
+    static void apply(int left, int right, F f) {
+        ::tbb::parallel_for(left, right, f);
+    }
+};
+
+inline std::string description() {
+    return "TBBv" + std::to_string(::tbb::TBB_runtime_interface_version());
+}
+
+constexpr bool multithreaded() { return true; }
+
+template <typename T>
+using parallel_vector = ::tbb::concurrent_vector<T>;
+
+using task_group = ::tbb::task_group;
+
+inline
+std::size_t thread_id() {
+    static std::atomic<std::size_t> num_threads(0);
+    thread_local std::size_t thread_id = num_threads++;
+    return thread_id;
+}
+
+template <typename RandomIt>
+void sort(RandomIt begin, RandomIt end) {
+    ::tbb::parallel_sort(begin, end);
+}
+
+template <typename RandomIt, typename Compare>
+void sort(RandomIt begin, RandomIt end, Compare comp) {
+    ::tbb::parallel_sort(begin, end, comp);
+}
+
+template <typename Container>
+void sort(Container& c) {
+    ::tbb::parallel_sort(c.begin(), c.end());
+}
+
+} // namespace tbb
+} // namespace threading
+} // namespace arb
+
diff --git a/src/threading/threading.cpp b/arbor/threading/threading.cpp
similarity index 98%
rename from src/threading/threading.cpp
rename to arbor/threading/threading.cpp
index c1cd5f2f98c2ee3a7e564794d7f01462d9a645e0..9f9855037a1eafe4a129ca2a5efe596fb521127d 100644
--- a/src/threading/threading.cpp
+++ b/arbor/threading/threading.cpp
@@ -3,7 +3,7 @@
 #include <regex>
 #include <string>
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 #include <hardware/affinity.hpp>
 
 #include "threading.hpp"
diff --git a/src/threading/threading.hpp b/arbor/threading/threading.hpp
similarity index 85%
rename from src/threading/threading.hpp
rename to arbor/threading/threading.hpp
index 2e90ac36219eab4465e32639d5b7a08e6d485249..8150ca6afca4be71ae155134bcf479ddf37ca7da 100644
--- a/src/threading/threading.hpp
+++ b/arbor/threading/threading.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
 namespace arb {
 namespace threading {
@@ -26,10 +26,16 @@ size_t num_threads();
 } // namespace arb
 
 #if defined(ARB_HAVE_TBB)
-    #include "tbb.hpp"
+
+#include "tbb.hpp"
+
 #elif defined(ARB_HAVE_CTHREAD)
-    #include "cthread.hpp"
+
+#include "cthread.hpp"
+
 #else
-    #define ARB_HAVE_SERIAL
-    #include "serial.hpp"
+
+#define ARB_HAVE_SERIAL
+#include "serial.hpp"
+
 #endif
diff --git a/src/time_sequence.hpp b/arbor/time_sequence.hpp
similarity index 98%
rename from src/time_sequence.hpp
rename to arbor/time_sequence.hpp
index 9b01d0238e2d91b6286b4ef893d881caa9e7aa22..d9845599cf1388c67bf233a2b71536a4575732e8 100644
--- a/src/time_sequence.hpp
+++ b/arbor/time_sequence.hpp
@@ -4,10 +4,11 @@
 #include <memory>
 #include <random>
 
-#include <common_types.hpp>
-#include <event_queue.hpp>
-#include <util/meta.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+
+#include "event_queue.hpp"
+#include "util/meta.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 
diff --git a/src/tree.hpp b/arbor/tree.hpp
similarity index 98%
rename from src/tree.hpp
rename to arbor/tree.hpp
index 75a9cac24a742398a82c19dff57959e0980302f3..af0fb4bbee783cde19114480247d6006cb9700eb 100644
--- a/src/tree.hpp
+++ b/arbor/tree.hpp
@@ -5,10 +5,11 @@
 #include <numeric>
 #include <vector>
 
-#include <algorithms.hpp>
-#include <common_types.hpp>
-#include <memory/memory.hpp>
-#include <util/span.hpp>
+#include <arbor/common_types.hpp>
+
+#include "algorithms.hpp"
+#include "memory/memory.hpp"
+#include "util/span.hpp"
 
 namespace arb {
 
@@ -59,7 +60,7 @@ public:
         }
 
         // an empty parent_index implies a single-compartment/segment cell
-        EXPECTS(parent_index.size()!=0u);
+        arb_assert(parent_index.size()!=0u);
 
         init(parent_index.size());
         memory::copy(parent_index, parents_);
diff --git a/src/util/any.hpp b/arbor/util/any.hpp
similarity index 100%
rename from src/util/any.hpp
rename to arbor/util/any.hpp
diff --git a/src/util/any_ptr.hpp b/arbor/util/any_ptr.hpp
similarity index 91%
rename from src/util/any_ptr.hpp
rename to arbor/util/any_ptr.hpp
index e7c1534f4ff643ecb6246d64b96bfb3466c71436..a8a05bc7aee1e570710eaae65d7045effa82d498 100644
--- a/src/util/any_ptr.hpp
+++ b/arbor/util/any_ptr.hpp
@@ -22,8 +22,7 @@
 #include <cstddef>
 #include <type_traits>
 
-#include <util/lexcmp_def.hpp>
-#include <util/meta.hpp>
+#include <arbor/util/lexcmp_def.hpp>
 
 namespace arb {
 namespace util {
@@ -53,7 +52,7 @@ struct any_ptr {
         type_ptr_ = &typeid(T*);
     }
 
-    template <typename T, typename = enable_if_t<std::is_pointer<T>::value>>
+    template <typename T, typename = typename std::enable_if<std::is_pointer<T>::value>::type>
     T as() const noexcept {
         if (std::is_same<T, void*>::value) {
             return (T)ptr_;
@@ -86,7 +85,7 @@ private:
 };
 
 // Order, compare by pointer value:
-DEFINE_LEXICOGRAPHIC_ORDERING_BY_VALUE(any_ptr, (a.as<void*>()), (b.as<void*>()))
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_BY_VALUE(any_ptr, (a.as<void*>()), (b.as<void*>()))
 
 // Overload `util::any_cast` for these pointers.
 template <typename T>
diff --git a/src/util/config.hpp b/arbor/util/config.hpp
similarity index 100%
rename from src/util/config.hpp
rename to arbor/util/config.hpp
diff --git a/src/util/counter.hpp b/arbor/util/counter.hpp
similarity index 100%
rename from src/util/counter.hpp
rename to arbor/util/counter.hpp
diff --git a/src/util/cycle.hpp b/arbor/util/cycle.hpp
similarity index 100%
rename from src/util/cycle.hpp
rename to arbor/util/cycle.hpp
diff --git a/src/util/deduce_return.hpp b/arbor/util/deduce_return.hpp
similarity index 100%
rename from src/util/deduce_return.hpp
rename to arbor/util/deduce_return.hpp
diff --git a/src/util/double_buffer.hpp b/arbor/util/double_buffer.hpp
similarity index 98%
rename from src/util/double_buffer.hpp
rename to arbor/util/double_buffer.hpp
index 9a56a521eb88ce68f5ed005af0eab4d130b0a0d3..f45200984f9194323ae24dbd26abc563fcccd523 100644
--- a/src/util/double_buffer.hpp
+++ b/arbor/util/double_buffer.hpp
@@ -3,7 +3,7 @@
 #include <array>
 #include <atomic>
 
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
 
 namespace arb {
 namespace util {
diff --git a/src/util/either.hpp b/arbor/util/either.hpp
similarity index 99%
rename from src/util/either.hpp
rename to arbor/util/either.hpp
index 4adc4c2b96f163a20e06fdbead406dff92ef148d..8b40b73510c8df206563001e72ce69e0df5857d4 100644
--- a/src/util/either.hpp
+++ b/arbor/util/either.hpp
@@ -11,8 +11,9 @@
 #include <stdexcept>
 #include <utility>
 
+#include <arbor/util/uninitialized.hpp>
+
 #include "util/meta.hpp"
-#include "util/uninitialized.hpp"
 
 namespace arb {
 namespace util {
diff --git a/src/util/file.hpp b/arbor/util/file.hpp
similarity index 100%
rename from src/util/file.hpp
rename to arbor/util/file.hpp
diff --git a/src/util/filter.hpp b/arbor/util/filter.hpp
similarity index 99%
rename from src/util/filter.hpp
rename to arbor/util/filter.hpp
index 1db4d6d5a2f1b04ca3e85f02db19ee948840bcb1..c8ff441987a6838cf5b05e3523c5b7024b8d53d8 100644
--- a/src/util/filter.hpp
+++ b/arbor/util/filter.hpp
@@ -8,11 +8,12 @@
 #include <memory>
 #include <type_traits>
 
+#include <arbor/assert.hpp>
+
 #include <util/iterutil.hpp>
 #include <util/meta.hpp>
 #include <util/range.hpp>
 
-#include <util/debug.hpp>
 
 namespace arb {
 namespace util {
diff --git a/src/util/handle_set.hpp b/arbor/util/handle_set.hpp
similarity index 100%
rename from src/util/handle_set.hpp
rename to arbor/util/handle_set.hpp
diff --git a/src/util/hostname.cpp b/arbor/util/hostname.cpp
similarity index 94%
rename from src/util/hostname.cpp
rename to arbor/util/hostname.cpp
index a88316d9fd5c2a0f5f1c66fe1da08b092c1f171b..7b335de98d85bbca787e3a42f0e030cc68206ec1 100644
--- a/src/util/hostname.cpp
+++ b/arbor/util/hostname.cpp
@@ -1,6 +1,6 @@
 #include <string>
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
 #include "hostname.hpp"
 
diff --git a/src/util/hostname.hpp b/arbor/util/hostname.hpp
similarity index 85%
rename from src/util/hostname.hpp
rename to arbor/util/hostname.hpp
index 7034fd51046214c2977bce2e6789395b2006c8bb..4eaaf7f72e026f41d268b59223cd996d26dc928e 100644
--- a/src/util/hostname.hpp
+++ b/arbor/util/hostname.hpp
@@ -2,7 +2,7 @@
 
 #include <string>
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
 namespace arb {
 namespace util {
diff --git a/src/util/index_into.hpp b/arbor/util/index_into.hpp
similarity index 96%
rename from src/util/index_into.hpp
rename to arbor/util/index_into.hpp
index 41ed48ea05656b6c98ac322ca16fdc99941858ab..cb31503dcb0ec14996a2042519e815da98c0c279 100644
--- a/src/util/index_into.hpp
+++ b/arbor/util/index_into.hpp
@@ -16,8 +16,9 @@
 #include <iterator>
 #include <type_traits>
 
-#include <util/compat.hpp>
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+#include <arbor/util/compat.hpp>
+
 #include <util/meta.hpp>
 #include <util/range.hpp>
 
@@ -50,7 +51,7 @@ struct index_into_iterator {
     }
 
     index_into_iterator& operator++() {
-        EXPECTS(sup!=sup_end);
+        arb_assert(sup!=sup_end);
 
         ++sub;
         align_fwd();
@@ -116,7 +117,7 @@ private:
             --sup;
         }
 
-        EXPECTS(*sub==*sup);
+        arb_assert(*sub==*sup);
     }
 };
 
diff --git a/src/util/indirect.hpp b/arbor/util/indirect.hpp
similarity index 100%
rename from src/util/indirect.hpp
rename to arbor/util/indirect.hpp
diff --git a/src/util/ioutil.hpp b/arbor/util/ioutil.hpp
similarity index 100%
rename from src/util/ioutil.hpp
rename to arbor/util/ioutil.hpp
diff --git a/src/util/iterutil.hpp b/arbor/util/iterutil.hpp
similarity index 98%
rename from src/util/iterutil.hpp
rename to arbor/util/iterutil.hpp
index 2d3478c0979ca4d3b96cf511008bc4cdc1679dc6..0b37365df515995fcc092c2c5ee64f589ae1ab8e 100644
--- a/src/util/iterutil.hpp
+++ b/arbor/util/iterutil.hpp
@@ -10,8 +10,9 @@
 #include <type_traits>
 #include <utility>
 
-#include <util/compat.hpp>
-#include <util/meta.hpp>
+#include <arbor/util/compat.hpp>
+
+#include "util/meta.hpp"
 
 namespace arb {
 namespace util {
diff --git a/src/util/make_unique.hpp b/arbor/util/make_unique.hpp
similarity index 100%
rename from src/util/make_unique.hpp
rename to arbor/util/make_unique.hpp
diff --git a/src/util/maputil.hpp b/arbor/util/maputil.hpp
similarity index 99%
rename from src/util/maputil.hpp
rename to arbor/util/maputil.hpp
index f6dd91b7aff0fbdf14e1d7e35caadf3e661303ba..223b400ba7e06ca1ecf3d77188a721ed8587bf7b 100644
--- a/src/util/maputil.hpp
+++ b/arbor/util/maputil.hpp
@@ -8,7 +8,7 @@
 
 #include <util/deduce_return.hpp>
 #include <util/meta.hpp>
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 #include <util/transform.hpp>
 
 // Convenience views, algorithms for maps and map-like containers.
diff --git a/src/util/meta.hpp b/arbor/util/meta.hpp
similarity index 99%
rename from src/util/meta.hpp
rename to arbor/util/meta.hpp
index e4ed6bbccfe72088791e11c954d39066af44ed68..da50ce2a31b84ab58944f395da4e84ce5a63e4ff 100644
--- a/src/util/meta.hpp
+++ b/arbor/util/meta.hpp
@@ -6,8 +6,9 @@
 #include <iterator>
 #include <type_traits>
 
-#include <util/compat.hpp>
-#include <util/deduce_return.hpp>
+#include <arbor/util/compat.hpp>
+
+#include "util/deduce_return.hpp"
 
 namespace arb {
 namespace util {
diff --git a/src/util/nop.hpp b/arbor/util/nop.hpp
similarity index 100%
rename from src/util/nop.hpp
rename to arbor/util/nop.hpp
diff --git a/src/util/padded_alloc.hpp b/arbor/util/padded_alloc.hpp
similarity index 100%
rename from src/util/padded_alloc.hpp
rename to arbor/util/padded_alloc.hpp
diff --git a/src/util/partition.hpp b/arbor/util/partition.hpp
similarity index 96%
rename from src/util/partition.hpp
rename to arbor/util/partition.hpp
index 1eccafe479e2a3e598fb506e3c1728de84f5be42..0f1324a0d32bff475460ccf62499b7f5e1e49fe2 100644
--- a/src/util/partition.hpp
+++ b/arbor/util/partition.hpp
@@ -1,13 +1,14 @@
 #pragma once
 
+#include <algorithm>
 #include <iterator>
 #include <stdexcept>
 #include <type_traits>
 
-#include <util/either.hpp>
-#include <util/meta.hpp>
-#include <util/partition_iterator.hpp>
-#include <util/range.hpp>
+#include "util/either.hpp"
+#include "util/meta.hpp"
+#include "util/partition_iterator.hpp"
+#include "util/range.hpp"
 
 namespace arb {
 namespace util {
@@ -43,7 +44,7 @@ public:
 
     template <typename Seq>
     partition_range(const Seq& s): base{std::begin(s), upto(std::begin(s), std::end(s))} {
-        EXPECTS(is_valid());
+        arb_assert(is_valid());
     }
 
     // explicitly check invariants
diff --git a/src/util/partition_iterator.hpp b/arbor/util/partition_iterator.hpp
similarity index 100%
rename from src/util/partition_iterator.hpp
rename to arbor/util/partition_iterator.hpp
diff --git a/src/util/path.cpp b/arbor/util/path.cpp
similarity index 100%
rename from src/util/path.cpp
rename to arbor/util/path.cpp
diff --git a/src/util/path.hpp b/arbor/util/path.hpp
similarity index 99%
rename from src/util/path.hpp
rename to arbor/util/path.hpp
index dd1725c88f8a8e94a9ac2e8ece96f0d1c0cb174d..d11cd1b73e1e44458540849760db78cf7ae71d1e 100644
--- a/src/util/path.hpp
+++ b/arbor/util/path.hpp
@@ -22,9 +22,10 @@
 #include <string>
 #include <iostream>
 #include <utility>
+#include <vector>
 
-#include <util/meta.hpp>
-#include <util/rangeutil.hpp>
+#include "util/meta.hpp"
+#include "util/rangeutil.hpp"
 
 namespace arb {
 namespace util {
diff --git a/src/util/pprintf.hpp b/arbor/util/pprintf.hpp
similarity index 100%
rename from src/util/pprintf.hpp
rename to arbor/util/pprintf.hpp
diff --git a/src/util/range.hpp b/arbor/util/range.hpp
similarity index 99%
rename from src/util/range.hpp
rename to arbor/util/range.hpp
index 7e36e6d136c07f58161d0a7a0016ba59af3aaff0..078af71b4157367799183da0b90d8e31b8b55fbb 100644
--- a/src/util/range.hpp
+++ b/arbor/util/range.hpp
@@ -30,8 +30,9 @@
 #include <tbb/tbb_stddef.h>
 #endif
 
+#include <arbor/assert.hpp>
+
 #include <util/counter.hpp>
-#include <util/debug.hpp>
 #include <util/either.hpp>
 #include <util/iterutil.hpp>
 #include <util/meta.hpp>
diff --git a/src/util/rangeutil.hpp b/arbor/util/rangeutil.hpp
similarity index 100%
rename from src/util/rangeutil.hpp
rename to arbor/util/rangeutil.hpp
diff --git a/src/util/scope_exit.hpp b/arbor/util/scope_exit.hpp
similarity index 100%
rename from src/util/scope_exit.hpp
rename to arbor/util/scope_exit.hpp
diff --git a/src/util/sentinel.hpp b/arbor/util/sentinel.hpp
similarity index 97%
rename from src/util/sentinel.hpp
rename to arbor/util/sentinel.hpp
index 961a4b44bed52d136b8cd362eed01251cd09303d..110dc5676e3446a0fea2f1c3ae18834795d58bec 100644
--- a/src/util/sentinel.hpp
+++ b/arbor/util/sentinel.hpp
@@ -29,22 +29,22 @@ class sentinel_iterator {
     arb::util::either<I, S> e_;
 
     I& iter() {
-        EXPECTS(!is_sentinel());
+        arb_assert(!is_sentinel());
         return e_.template unsafe_get<0>();
     }
 
     const I& iter() const {
-        EXPECTS(!is_sentinel());
+        arb_assert(!is_sentinel());
         return e_.template unsafe_get<0>();
     }
 
     S& sentinel() {
-        EXPECTS(is_sentinel());
+        arb_assert(is_sentinel());
         return e_.template unsafe_get<1>();
     }
 
     const S& sentinel() const {
-        EXPECTS(is_sentinel());
+        arb_assert(is_sentinel());
         return e_.template unsafe_get<1>();
     }
 
diff --git a/src/util/span.hpp b/arbor/util/span.hpp
similarity index 100%
rename from src/util/span.hpp
rename to arbor/util/span.hpp
diff --git a/src/util/strprintf.hpp b/arbor/util/strprintf.hpp
similarity index 100%
rename from src/util/strprintf.hpp
rename to arbor/util/strprintf.hpp
diff --git a/src/util/transform.hpp b/arbor/util/transform.hpp
similarity index 100%
rename from src/util/transform.hpp
rename to arbor/util/transform.hpp
diff --git a/src/util/unique_any.hpp b/arbor/util/unique_any.hpp
similarity index 100%
rename from src/util/unique_any.hpp
rename to arbor/util/unique_any.hpp
diff --git a/src/util/unwind.cpp b/arbor/util/unwind.cpp
similarity index 79%
rename from src/util/unwind.cpp
rename to arbor/util/unwind.cpp
index d74abdb8d99cb3c33ea32d06afa4bd3262fa747e..5303c9024dc4d205b874e6994aeb27b51cffc7f7 100644
--- a/src/util/unwind.cpp
+++ b/arbor/util/unwind.cpp
@@ -64,6 +64,21 @@ std::string demangle(std::string s) {
     return s;
 }
 
+std::ostream& operator<<(std::ostream& out, const backtrace& trace) {
+    for (auto& f: trace.frames_) {
+        char loc_str[64];
+        snprintf(loc_str, sizeof(loc_str), "0x%lx", f.position);
+        out << loc_str << " " << f.name << "\n";
+        if (f.name=="main") {
+            break;
+        }
+    }
+}
+
+#if 0
+// Temporarily deprecated: automatic writing to disk of strack traces
+// needs to be run-time configurable.
+
 void backtrace::print(bool stop_at_main) const {
     using namespace arb::memory::util;
 
@@ -85,12 +100,26 @@ void backtrace::print(bool stop_at_main) const {
     std::cerr << "           View a brief summary of the backtrace by running \"scripts/print_backtrace " << fname << " -b\".\n";
     std::cerr << "           Run \"scripts/print_backtrace -h\" for more options.\n";
 }
+#endif
 
 } // namespace util
 } // namespace arb
 
 #else
-arb::util::backtrace::backtrace() {}
-void arb::util::backtrace::print(bool) const {}
+
+namespace arb {
+namespace util {
+
+backtrace::backtrace() {}
+
+std::ostream& operator<<(std::ostream& out, const backtrace& trace) {
+    return out;
+}
+
+//void arb::util::backtrace::print(bool) const {}
+
+} // namespace util
+} // namespace arb
+
 #endif
 
diff --git a/src/util/unwind.hpp b/arbor/util/unwind.hpp
similarity index 93%
rename from src/util/unwind.hpp
rename to arbor/util/unwind.hpp
index e9480e14b7977308073d6fb9611c6d18d2ac23ea..deb8d937a278fdab0ab772bd3b6f7583f4d54801 100644
--- a/src/util/unwind.hpp
+++ b/arbor/util/unwind.hpp
@@ -28,6 +28,8 @@ public:
     void print(bool stop_at_main=true) const;
     const std::vector<source_location>& frames() const { return frames_; }
 
+    friend std::ostream& operator<<(std::ostream&, const backtrace&);
+
 private:
     std::vector<source_location> frames_;
 };
diff --git a/arbor/version.cpp b/arbor/version.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..de8ba1833d513d858f2b6c706e48592642b1f5cf
--- /dev/null
+++ b/arbor/version.cpp
@@ -0,0 +1,6 @@
+#include <arbor/version.hpp>
+
+namespace arb {
+const char version[] = ARB_VERSION;
+const char source_id[] = ARB_SOURCE_ID;
+}
diff --git a/aux/CMakeLists.txt b/aux/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91de62074d4251e373a14ce8f9e20d3b34a20717
--- /dev/null
+++ b/aux/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(aux-sources
+    json_meter.cpp)
+
+add_library(arbor-aux ${aux-sources})
+target_link_libraries(arbor-aux PUBLIC ext-json arbor)
+target_include_directories(arbor-aux INTERFACE .)
+set_target_properties(arbor-aux PROPERTIES OUTPUT_NAME arboraux)
diff --git a/aux/json_meter.cpp b/aux/json_meter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ec9e8d965a684d0f66ecc74d74a94f3d1b9a198c
--- /dev/null
+++ b/aux/json_meter.cpp
@@ -0,0 +1,33 @@
+#include <arbor/profile/meter_manager.hpp>
+#include <nlohmann/json.hpp>
+
+namespace aux {
+
+static nlohmann::json to_json(const arb::profile::measurement& mnt) {
+    nlohmann::json measurements;
+    for (const auto& m: mnt.measurements) {
+        measurements.push_back(m);
+    }
+
+    return {
+        {"name", mnt.name},
+        {"units", mnt.units},
+        {"measurements", measurements}
+    };
+}
+
+nlohmann::json to_json(const arb::profile::meter_report& report) {
+    nlohmann::json json_meters;
+    for (const auto& mnt: report.meters) {
+        json_meters.push_back(to_json(mnt));
+    }
+
+    return {
+        {"checkpoints", report.checkpoints},
+        {"num_domains", report.num_domains},
+        {"meters", json_meters},
+        {"hosts", report.hosts},
+    };
+}
+
+}
diff --git a/aux/json_meter.hpp b/aux/json_meter.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..27489393de72a3b234658968de0de2a4d2e78b5d
--- /dev/null
+++ b/aux/json_meter.hpp
@@ -0,0 +1,8 @@
+#include <arbor/profile/meter_manager.hpp>
+#include <nlohmann/json.hpp>
+
+namespace aux {
+
+nlohmann::json to_json(const arb::profile::meter_report&);
+
+}
diff --git a/src/tinyopt.hpp b/aux/tinyopt.hpp
similarity index 79%
rename from src/tinyopt.hpp
rename to aux/tinyopt.hpp
index ca4366677f1a940075695382735f735f7c05c40f..058bd2ccb2d38e24cfa79290b0c62f2bc3a22f6b 100644
--- a/src/tinyopt.hpp
+++ b/aux/tinyopt.hpp
@@ -9,12 +9,14 @@
 #include <utility>
 #include <vector>
 
-#include <util/meta.hpp>
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
-namespace arb {
 namespace to {
 
+using arb::util::optional;
+using arb::util::nullopt;
+using arb::util::just;
+
 struct parse_opt_error: public std::runtime_error {
     parse_opt_error(const std::string& s): std::runtime_error(s) {}
     parse_opt_error(const char *arg, const std::string& s):
@@ -38,11 +40,11 @@ void usage(const char* argv0, const std::string& usage_str, const std::string& p
 
 template <typename V>
 struct default_parser {
-    util::optional<V> operator()(const std::string& text) const {
+    optional<V> operator()(const std::string& text) const {
         V v;
         std::istringstream stream(text);
         stream >> v;
-        return stream? util::just(v): util::nullopt;
+        return stream? just(v): nullopt;
     }
 };
 
@@ -54,11 +56,11 @@ public:
     template <typename KeywordPairs>
     keyword_parser(const KeywordPairs& pairs): map_(std::begin(pairs), std::end(pairs)) {}
 
-    util::optional<V> operator()(const std::string& text) const {
+    optional<V> operator()(const std::string& text) const {
         for (const auto& p: map_) {
             if (text==p.first) return p.second;
         }
-        return util::nullopt;
+        return nullopt;
     }
 };
 
@@ -67,12 +69,12 @@ auto keywords(const KeywordPairs& pairs) -> keyword_parser<decltype(std::begin(p
     return keyword_parser<decltype(std::begin(pairs)->second)>(pairs);
 }
 
-template <typename V = std::string, typename P = default_parser<V>, typename = util::enable_if_t<!std::is_same<V, void>::value>>
-util::optional<V> parse_opt(char **& argp, char shortopt, const char* longopt=nullptr, const P& parse = P{}) {
+template <typename V = std::string, typename P = default_parser<V>, typename = typename std::enable_if<!std::is_same<V, void>::value>::type>
+optional<V> parse_opt(char **& argp, char shortopt, const char* longopt=nullptr, const P& parse = P{}) {
     const char* arg = argp[0];
 
     if (!arg || arg[0]!='-') {
-        return util::nullopt;
+        return nullopt;
     }
 
     std::string text;
@@ -91,7 +93,7 @@ util::optional<V> parse_opt(char **& argp, char shortopt, const char* longopt=nu
             argp += 1;
         }
         else {
-            return util::nullopt;
+            return nullopt;
         }
     }
     else if (shortopt && arg[1]==shortopt && arg[2]==0) {
@@ -100,7 +102,7 @@ util::optional<V> parse_opt(char **& argp, char shortopt, const char* longopt=nu
         argp += 2;
     }
     else {
-        return util::nullopt;
+        return nullopt;
     }
 
     auto v = parse(text);
@@ -109,9 +111,9 @@ util::optional<V> parse_opt(char **& argp, char shortopt, const char* longopt=nu
     return v;
 }
 
-util::optional<void> parse_opt(char **& argp, char shortopt, const char* longopt) {
+optional<void> parse_opt(char **& argp, char shortopt, const char* longopt) {
     if (!*argp || *argp[0]!='-') {
-        return util::nullopt;
+        return nullopt;
     }
     else if (argp[0][1]=='-' && longopt && !std::strcmp(argp[0]+2, longopt)) {
         ++argp;
@@ -122,10 +124,8 @@ util::optional<void> parse_opt(char **& argp, char shortopt, const char* longopt
         return true;
     }
     else {
-        return util::nullopt;
+        return nullopt;
     }
 }
 
-
-} // namespace to;
-} // namespace arb
+} // namespace to
diff --git a/aux/with_mpi.hpp b/aux/with_mpi.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f9c8cb706d716d5900cd647368b605cad75b084c
--- /dev/null
+++ b/aux/with_mpi.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+#include <mpi.h>
+
+#include <arbor/communication/mpi_error.hpp>
+
+struct with_mpi {
+    with_mpi(int& argc, char**& argv, bool fatal_errors = true) {
+        init(&argc, &argv, fatal_errors);
+    }
+
+    with_mpi(bool fatal_errors = true) {
+        init(nullptr, nullptr, fatal_errors);
+    }
+
+    ~with_mpi() {
+        MPI_Finalize();
+    }
+
+private:
+    void init(int* argcp, char*** argvp, bool fatal_errors) {
+        int provided;
+        int ev = MPI_Init_thread(argcp, argvp, MPI_THREAD_SERIALIZED, &provided);
+        if (ev) {
+            throw arb::mpi_error(ev, "MPI_Init_thread");
+        }
+        else if (provided<MPI_THREAD_SERIALIZED) {
+            throw arb::mpi_error(MPI_ERR_OTHER, "MPI_Init_thread: MPI_THREAD_SERIALIZED unsupported");
+        }
+
+        if (!fatal_errors) {
+            MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
+        }
+    }
+};
diff --git a/cmake/CompilerOptions.cmake b/cmake/CompilerOptions.cmake
index b892417f4cae18d41081eeef2e14ef818a308dd1..84cba10115b627dff0bb729e06a535f91a949ff2 100644
--- a/cmake/CompilerOptions.cmake
+++ b/cmake/CompilerOptions.cmake
@@ -1,9 +1,7 @@
 # Compiler-aware compiler options
 
 set(CXXOPT_DEBUG "-g")
-set(CXXOPT_PTHREAD "-pthread")
 set(CXXOPT_CXX11 "-std=c++11")
-set(CXXOPT_WALL "-Wall")
 
 # CMake (at least sometimes) misidentifies XL 13 for Linux as Clang.
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
@@ -17,59 +15,65 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 endif()
 
 if(CMAKE_CXX_COMPILER_ID MATCHES "XL")
-    # Disable 'missing-braces' warning: this will inappropriately
-    # flag initializations such as
-    #     std::array<int,3> a={1,2,3};
-    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-missing-braces")
-
     # CMake, bless its soul, likes to insert this unsupported flag. Hilarity ensues.
     string(REPLACE "-qhalt=e" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
 
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set(CXXOPT_KNL "-march=knl")
-    set(CXXOPT_AVX2 "-mavx2 -mfma")
-    set(CXXOPT_AVX512 "-mavx512f -mavx512cd")
-
-    # Disable 'missing-braces' warning: this will inappropriately
-    # flag initializations such as
-    #     std::array<int,3> a={1,2,3};
-    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-missing-braces")
-
-    # Disable 'potentially-evaluated-expression' warning: this warns
-    # on expressions of the form `typeid(expr)` when `expr` has side
-    # effects.
-    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-potentially-evaluated-expression")
-
-    # Clang is erroneously warning that T is an 'unused type alias' in code like this:
-    # struct X {
-    #     using T = decltype(expression);
-    #     T x;
-    # };
-    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-unused-local-typedef")
-
-    # Ignore warning if string passed to snprintf is not a string literal.
-    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-format-security")
-endif()
+# Warning options: disable specific spurious warnings as required.
 
-if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-    # Disable 'maybe-uninitialized' warning: this will be raised
-    # inappropriately in some uses of util::optional<T> when T
-    # is a primitive type.
-    set(CXXOPT_WALL "${CXXOPT_WALL} -Wno-maybe-uninitialized")
-endif()
+set(CXXOPT_WALL
+    -Wall
+
+    # XL C:
+    #
+    # * Disable 'missing-braces' warning: this will inappropriately
+    #   flag initializations such as
+    #       std::array<int,3> a={1,2,3};
+
+    $<IF:$<CXX_COMPILER_ID:XL>,-Wno-missing-braces,>
+
+    # Clang:
+    #
+    # * Disable 'potentially-evaluated-expression' warning: this warns
+    #   on expressions of the form `typeid(expr)` when `expr` has side
+    #   effects.
+
+    $<IF:$<CXX_COMPILER_ID:Clang>,-Wno-potentially-evaluated-expression,>
+
+    # * Clang erroneously warns that T is an 'unused type alias'
+    #   in code like this:
+    #       struct X {
+    #           using T = decltype(expression);
+    #           T x;
+    #       };
 
-if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
+    $<IF:$<CXX_COMPILER_ID:Clang>,-Wno-unused-local-typedef,>
+
+    # * Ignore warning if string passed to snprintf is not a string literal.
+
+    $<IF:$<CXX_COMPILER_ID:Clang>,-Wno-format-security,>
+
+    # GCC:
+    #
+    # * Disable 'maybe-uninitialized' warning: this will be raised
+    #   inappropriately in some uses of util::optional<T> when T
+    #   is a primitive type.
+
+    $<IF:$<CXX_COMPILER_ID:GNU>,-Wno-maybe-uninitialized,>
+
+    # Intel:
+    #
     # Disable warning for unused template parameter
     # this is raised by a templated function in the json library.
-    set(CXXOPT_WALL "${CXXOPT_WALL} -wd488")
-endif()
 
-# Set CXXOPT_ARCH in parent scope according to requested architecture.
+    $<IF:$<CXX_COMPILER_ID:Intel>,-wd488,>)
+
+
+# Set ${optvar} in parent scope according to requested architecture.
 # Architectures are given by the same names that GCC uses for its
 # -mcpu or -march options.
 
-function(set_arch_target arch)
+function(set_arch_target optvar arch)
     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU")
         # Correct compiler option unfortunately depends upon the target architecture family.
         # Extract this information from running the configured compiler with --verbose.
@@ -87,9 +91,9 @@ function(set_arch_target arch)
         # Use -mcpu for all supported targets _except_ for x86, where it should be -march.
 
         if(target_model MATCHES "x86" OR target_model MATCHES "amd64")
-            set(CXXOPT_ARCH "-march=${arch}")
+            set(arch_opt "-march=${arch}")
         else()
-            set(CXXOPT_ARCH "-mcpu=${arch}")
+            set(arch_opt "-mcpu=${arch}")
         endif()
 
     elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
@@ -124,9 +128,9 @@ function(set_arch_target arch)
         endif()
 
         if(tune)
-            set(CXXOPT_ARCH "-x${arch};-mtune=${tune}")
+            set(arch_opt "-x${arch};-mtune=${tune}")
         else()
-            set(CXXOPT_ARCH "-x${arch}")
+            set(arch_opt "-x${arch}")
         endif()
 
     elseif(CMAKE_CXX_COMPILER_ID MATCHES "XL")
@@ -136,11 +140,11 @@ function(set_arch_target arch)
         # xlC, gcc, and clang all recognize power8 and power9 as architecture keywords.
 
         if(arch MATCHES "native")
-            set(CXXOPT_ARCH "-qarch=auto")
+            set(arch_opt "-qarch=auto")
         else()
-            set(CXXOPT_ARCH "-mcpu=${arch}")
+            set(arch_opt "-mcpu=${arch}")
         endif()
     endif()
 
-    set(CXXOPT_ARCH "${CXXOPT_ARCH}" PARENT_SCOPE)
+    set("${optvar}" "${arch_opt}" PARENT_SCOPE)
 endfunction()
diff --git a/cmake/ErrorTarget.cmake b/cmake/ErrorTarget.cmake
index 6e4a836bd5e4a14d750366eac17ab20799af3315..4eaca8e0994f87dc8d4859a5a13e295932fde32e 100644
--- a/cmake/ErrorTarget.cmake
+++ b/cmake/ErrorTarget.cmake
@@ -3,9 +3,21 @@
 #   comment : the COMMENT string for the real target, e.g. "Building the Sphinx documentation"
 #   message : the error message
 
-function(add_error_target name comment message)
-    add_custom_target(${name}
-        COMMAND echo "  Error: ${message}."
-        COMMAND exit 1
-        COMMENT "${comment}")
+function(add_error_target target comment error_message)
+    if(NOT TARGET "${target}")
+        add_custom_target("${target}"
+            COMMAND echo "  Error: ${error_message}."
+            COMMAND exit 1
+            COMMENT "${comment}")
+    endif()
 endfunction()
+
+macro(add_target_if condition target comment error_message)
+    if(${condition})
+        add_custom_target("${target}"
+            COMMAND true
+            COMMENT "${comment}")
+    else()
+        add_error_target("${target}" "${comment}" "${error_message}")
+    endif()
+endmacro()
diff --git a/cmake/FindJulia.cmake b/cmake/FindJulia.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..b9c91377b60dd2676b5b87e0c1e3e516eafb4c46
--- /dev/null
+++ b/cmake/FindJulia.cmake
@@ -0,0 +1,37 @@
+# Find Julia executable and check presence of packages.
+#
+# Sets the following variables:
+#
+#   Julia_FOUND       - True if Julia is found together with requested components.
+#   Julia_EXECUTABLE  - Path to Julia executable, or "Julia_EXECUTABLE-NOTFOUND" if not found.
+#
+#   Julia_<component>_FOUND - True if the Julia package <component> is found.
+#
+# The Julia_EXECUTABLE and Julia_<component>_FOUND variables are cached.
+
+include(FindPackageHandleStandardArgs)
+
+if(NOT Julia_FOUND)
+    find_program(Julia_EXECUTABLE julia)
+    if(Julia_EXECUTABLE)
+        foreach(component ${Julia_FIND_COMPONENTS})
+            set(_found_var "Julia_${component}_FOUND")
+            if(NOT ${_found_var})
+                execute_process(
+                    COMMAND ${Julia_EXECUTABLE} -e "using ${component}"
+                    RESULT_VARIABLE _result
+                    OUTPUT_QUIET
+                    ERROR_QUIET)
+                if(_result EQUAL 0)
+                    set(${_found_var} TRUE CACHE BOOL "Found Julia component $component" FORCE)
+                else()
+                    set(${_found_var} FALSE CACHE BOOL "Found Julia component $component" FORCE)
+                endif()
+            endif()
+        endforeach()
+    endif()
+
+    find_package_handle_standard_args(Julia
+        REQUIRED_VARS Julia_EXECUTABLE
+        HANDLE_COMPONENTS)
+endif()
diff --git a/cmake/FindTBB.cmake b/cmake/FindTBB.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..1b4b85fad2a0fd9d8e7fcc950762f485b18508f3
--- /dev/null
+++ b/cmake/FindTBB.cmake
@@ -0,0 +1,60 @@
+# Find the Intel Thread Building Blocks library
+#
+# Sets the following variables:
+#
+#  TBB_FOUND               - True if libtbb and libtbb_malloc found.
+#  TBB_LIBRARIES           - Paths to libtbb and libtbbmalloc.
+#  TBB_INCLUDE_DIR         - Base directory for tbb/ includes.
+# 
+# Generates the import library target TBB:tbb if found.
+#
+# The default search path can be overriden by setting the
+# CMake variable TBB_ROOT_DIR or the environment variables
+# TBBROOT or TBB_ROOT.
+
+if(NOT TBB_FOUND)
+    find_package(Threads REQUIRED)
+
+    set(_tbb_search_path ${TBB_ROOT_DIR} $ENV{TBBROOT} $ENV{TBB_ROOT})
+    set(_tbb_lib_suffixes lib/intel64/gcc4.7 lib/intel64/gcc4.4 lib/gcc4.7 lib/gcc4.4 lib/android lib/mic lib)
+
+    macro(_tbb_findlib libname)
+        find_library(_lib${libname} ${libname}
+            PATHS ${_tbb_search_path} NO_DEFAULT_PATH
+            PATH_SUFFIXES ${_tbb_lib_suffixes})
+        find_library(_lib${libname} ${libname}
+            PATH_SUFFIXES ${_tbb_lib_suffixes})
+    endmacro()
+
+    _tbb_findlib(tbb)
+    _tbb_findlib(tbbmalloc)
+
+    find_path(TBB_INCLUDE_DIR tbb/tbb.h PATHS ${_tbb_search_path} NO_DEFAULT_PATH PATH_SUFFIXES include)
+    find_path(TBB_INCLUDE_DIR tbb/tbb.h)
+
+    include(FindPackageHandleStandardArgs)
+    find_package_handle_standard_args(TBB DEFAULT_MSG TBB_INCLUDE_DIR _libtbb _libtbbmalloc)
+
+    if(TBB_FOUND)
+        set(TBB_INCLUDE_DIRS ${TBB_INCLUDE_DIR})
+        set(TBB_LIBRARIES ${_libtbb} ${_libtbbmalloc})
+        if(NOT TARGET TBB::tbb)
+            if("${_libtbb}" MATCHES "\.a$")
+                add_library(TBB::tbb STATIC IMPORTED GLOBAL)
+            else()
+                add_library(TBB::tbb SHARED IMPORTED GLOBAL)
+            endif()
+            set_target_properties(TBB::tbb PROPERTIES
+                    IMPORTED_LOCATION "${_libtbb}"
+                    INTERFACE_LINK_LIBRARIES "${_libtbbmalloc}" Threads::Threads ${CMAKE_DL_LIBS}
+                    INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIR}"
+            )
+        endif()
+    endif()
+    mark_as_advanced(TBB_INCLUDE_DIR)
+
+    unset(_tbb_search_path)
+    unset(_tbb_lib_suffixes)
+    unset(_libtbb)
+    unset(_libtbbmalloc)
+endif()
diff --git a/cmake/FindThreadsCudaFix.cmake b/cmake/FindThreadsCudaFix.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..44906f0e94886450901c1e8c24814cbac27791ff
--- /dev/null
+++ b/cmake/FindThreadsCudaFix.cmake
@@ -0,0 +1,17 @@
+# FindThreads improperly passes -pthread to nvcc instead of e.g. -Xcompiler=-pthread.
+# (see: https://gitlab.kitware.com/cmake/cmake/issues/18008)
+
+function(find_threads_cuda_fix)
+    if(TARGET Threads::Threads)
+        get_property(_languages GLOBAL PROPERTY ENABLED_LANGUAGES)
+        if("CUDA" IN_LIST _languages)
+            get_property(_threads_options TARGET Threads::Threads PROPERTY INTERFACE_COMPILE_OPTIONS)
+            if(_threads_options STREQUAL "-pthread")
+                set_property(TARGET Threads::Threads
+                    PROPERTY INTERFACE_COMPILE_OPTIONS
+                    $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>
+                    $<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>)
+            endif()
+        endif()
+    endif()
+endfunction()
diff --git a/cmake/FindUnwind.cmake b/cmake/FindUnwind.cmake
index 35fd9c2a19b8f5d3f7cf5086c4e6a105eb3fda23..a6ddd341a8805746d3c8fecd020e844a8ced4e78 100644
--- a/cmake/FindUnwind.cmake
+++ b/cmake/FindUnwind.cmake
@@ -1,18 +1,20 @@
 # Find the libunwind library
 #
-#  UNWIND_FOUND       - True if libunwind was found
-#  UNWIND_LIBRARIES   - The libraries needed to use libunwind
-#  UNWIND_INCLUDE_DIR - Location of unwind.h and libunwind.h
+#  Unwind_FOUND       - True if libunwind was found
+#  Unwind_LIBRARIES   - The libraries needed to use libunwind
+#  Unwind_INCLUDE_DIR - Location of unwind.h and libunwind.h
 #
-# The environment and cmake variables UNWIND_ROOT and UNWIND_ROOT_DIR
+# The environment and cmake variables Unwind_ROOT and Unwind_ROOT_DIR
 # respectively can be used to help CMake finding the library if it
 # is not installed in any of the usual locations.
+#
+# Registers "Unwind::unwind" as an import library.
 
-if(NOT UNWIND_FOUND)
-    set(UNWIND_SEARCH_DIR ${UNWIND_ROOT_DIR} $ENV{UNWIND_ROOT})
+if(NOT Unwind_FOUND)
+    set(Unwind_SEARCH_DIR ${Unwind_ROOT_DIR} $ENV{Unwind_ROOT})
 
-    find_path(UNWIND_INCLUDE_DIR libunwind.h
-        HINTS ${UNWIND_SEARCH_DIR}
+    find_path(Unwind_INCLUDE_DIR libunwind.h
+        HINTS ${Unwind_SEARCH_DIR}
         PATH_SUFFIXES include
     )
 
@@ -20,32 +22,44 @@ if(NOT UNWIND_FOUND)
     # a target-specific library libunwind-target.so/a.
     # This code sets the "target" string above in libunwind_arch.
     if (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
-        set(libunwind_arch "arm")
+        set(_libunwind_arch "arm")
     elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
-        set(libunwind_arch "x86_64")
+        set(_libunwind_arch "x86_64")
     elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$")
-        set(libunwind_arch "x86")
+        set(_libunwind_arch "x86")
     endif()
 
-    find_library(unwind_library_generic unwind
-        HINTS ${UNWIND_SEARCH_DIR}
+    find_library(_unwind_library_generic unwind
+        HINTS ${Unwind_SEARCH_DIR}
         PATH_SUFFIXES lib64 lib
     )
 
-    find_library(unwind_library_target unwind-${libunwind_arch}
-        HINTS ${UNWIND_SEARCH_DIR}
+    find_library(_unwind_library_target unwind-${libunwind_arch}
+        HINTS ${Unwind_SEARCH_DIR}
         PATH_SUFFIXES lib64 lib
     )
 
-    set(UNWIND_LIBRARIES ${unwind_library_generic} ${unwind_library_target})
+    set(Unwind_LIBRARIES ${_unwind_library_generic} ${_unwind_library_target})
 
     include(FindPackageHandleStandardArgs)
-    find_package_handle_standard_args(UNWIND DEFAULT_MSG UNWIND_INCLUDE_DIR UNWIND_LIBRARIES)
+    find_package_handle_standard_args(Unwind DEFAULT_MSG Unwind_INCLUDE_DIR Unwind_LIBRARIES)
+
+    mark_as_advanced(Unwind_LIBRARIES Unwind_INCLUDE_DIR)
 
-    mark_as_advanced(UNWIND_LIBRARIES UNWIND_INCLUDE_DIR)
+    if(Unwind_FOUND)
+        set(Unwind_INCLUDE_DIRS ${Unwind_INCLUDE_DIR})
+        if(NOT TARGET Unwind::unwind)
+            add_library(Unwind::unwind UNKNOWN IMPORTED)
+            set_target_properties(Unwind::unwind PROPERTIES
+                    IMPORTED_LOCATION "${_unwind_library_generic}"
+                    INTERFACE_LINK_LIBRARIES "${_unwind_library_target}"
+                    INTERFACE_INCLUDE_DIRECTORIES "${Unwind_INCLUDE_DIR}"
+            )
+        endif()
+    endif()
 
-    unset(unwind_search_dir)
-    unset(unwind_library_generic)
-    unset(unwind_library_target)
-    unset(libunwind_arch)
+    unset(_unwind_search_dir)
+    unset(_unwind_library_generic)
+    unset(_unwind_library_target)
+    unset(_libunwind_arch)
 endif()
diff --git a/cmake/GitSubmodule.cmake b/cmake/GitSubmodule.cmake
index b693f37072898ad0fe7aa8e5ddcbd51f7ecc420a..b64b6efed4787d70b67f9472ceaa8fde49647b72 100644
--- a/cmake/GitSubmodule.cmake
+++ b/cmake/GitSubmodule.cmake
@@ -8,7 +8,8 @@ function(check_git_submodule name path)
     set(success_var "${name}_avail")
     set(${success_var} ON PARENT_SCOPE)
 
-    if(NOT EXISTS "${path}/.git")
+    get_filename_component(dotgit "${path}/.git" ABSOLUTE)
+    if(NOT EXISTS ${dotgit})
         message(
             "\nThe git submodule for ${name} is not available.\n"
             "To check out all submodules use the following commands:\n"
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index 7ca6bd5abde2d1ce4183060ef3f5a19c1a29748e..d90dcb80c675b9078cdcf4b7e64f39d01f94effd 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -1,27 +1,29 @@
-# a static path is required to avoid warning messages from sphinx-build
-file(MAKE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/static")
-
-# configure target for the sphinx-generated html docs
 find_package(Sphinx)
+add_target_if(SPHINX_FOUND
+    check-sphinx
+    "Finding Sphinx"
+    "Sphinx must be installed to build documentation")
+
+set(html_dir "${CMAKE_CURRENT_BINARY_DIR}/html")
+set(doctree_dir "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
+
+get_property(theme_path TARGET ext-sphinx_rtd_theme PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
+string(REPLACE ";" "," theme_path "${theme_path}")
+
+add_custom_target(html
+    COMMAND
+        ${SPHINX_EXECUTABLE}
+        -b html
+        -d ${doctree_dir}
+        -D "html_theme_path=${theme_path}"
+        -q                          # Quiet: no output other than errors and warnings.
+        ${CMAKE_CURRENT_SOURCE_DIR} # Source directory
+        ${html_dir}                 # Output directory
+    DEPENDS check-sphinx ext-sphinx_rtd_theme
+    COMMENT
+        "Generating Sphinx documentation")
 
-set(DOCS_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
-set(DOCS_DOC_TREE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
-if (SPHINX_FOUND)
-    add_custom_target(docs
-        COMMAND
-            ${SPHINX_EXECUTABLE}
-            -b html
-            -d "${DOCS_DOC_TREE_DIR}"
-            -q # Quiet: no output other than errors and warnings.
-            "${CMAKE_CURRENT_SOURCE_DIR}" # Source
-            "${DOCS_BUILD_DIR}" # Output
-        COMMENT
-            "Generating Sphinx documentation")
-else()
-    add_error_target(docs
-        "Generating Sphinx documentation"
-        "Sphinx must be installed to build documentation")
-endif()
+# Remove generated documentation when make clean is run.
+set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${doctree_dir}" "${html_dir}")
 
-# remove generated documentation when make clean is run
-set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${DOCS_BUILD_DIR}")
+install(DIRECTORY ${html_dir} DESTINATION ${CMAKE_INSTALL_DOCDIR} OPTIONAL)
diff --git a/doc/conf.py b/doc/conf.py
index e81cb603bed70a431153222676f2c871f60822c0..73060fb68ad252125efe5ca0e6074127f3a5c4d0 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,167 +1,17 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-#
-# Arbor documentation build configuration file, created by
-# sphinx-quickstart on Wed Jul 26 18:29:23 2017.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
 
 def setup(app):
     app.add_stylesheet('custom.css')
 
-# -- General configuration ------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.todo',
-    'sphinx.ext.mathjax']
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
+extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax']
 source_suffix = '.rst'
-
-# The master toctree document.
 master_doc = 'index'
 
-# General information about the project.
 project = 'Arbor'
 copyright = '2017, ETHZ & FZ Julich'
 author = 'ETHZ & FZ Julich'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = '0.1'
-# The full version, including alpha/beta/rc tags.
-release = '0.1'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['build', 'Thumbs.db', '.DS_Store', 'rtd_theme']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
 
-
-# -- Options for HTML output ----------------------------------------------
-
-# Use the ReadTheDocs theme
 html_theme = "sphinx_rtd_theme"
-html_theme_path = ["rtd_theme", ]
-
-# html_theme_options = {}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['static']
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# This is required for the alabaster theme
-# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
-#html_sidebars = {
-#    '**': [
-#        'about.html',
-#        'navigation.html',
-#        'relations.html',  # needs 'show_related': True theme option to display
-#        'searchbox.html',
-#        'donate.html',
-#    ]
-#}
-
-
-# -- Options for HTMLHelp output ------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Arbordoc'
-
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'Arbor.tex', 'Arbor Documentation',
-     'ETHZ \\& FZ Julich', 'manual'),
-]
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'arbor', 'Arbor Documentation',
-     [author], 1)
-]
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'Arbor', 'Arbor Documentation',
-     author, 'Arbor', 'One line description of project.',
-     'Miscellaneous'),
-]
diff --git a/example/bench/CMakeLists.txt b/example/bench/CMakeLists.txt
index e5bf57c647328a8a36a381a4a4e7907ee66d649a..1423c792f55be187b026912f311cfe857b4326a6 100644
--- a/example/bench/CMakeLists.txt
+++ b/example/bench/CMakeLists.txt
@@ -1,15 +1,6 @@
-add_executable(bench.exe bench.cpp recipe.cpp parameters.cpp)
+add_executable(bench bench.cpp recipe.cpp parameters.cpp)
 
-target_link_libraries(bench.exe LINK_PUBLIC ${ARB_LIBRARIES})
-target_link_libraries(bench.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES})
+target_link_libraries(bench PRIVATE arbor arbor-aux ext-tclap ext-json)
 
-if(ARB_WITH_MPI)
-    target_link_libraries(bench.exe LINK_PUBLIC ${MPI_C_LIBRARIES})
-    set_property(TARGET bench.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-endif()
-
-set_target_properties(
-    bench.exe
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example"
-)
+# TODO: resolve public headers
+target_link_libraries(bench PRIVATE arbor-private-headers)
diff --git a/example/bench/bench.cpp b/example/bench/bench.cpp
index 58b806337424794acaa25a8eb6c1cb4cacc0dffb..4f5bff6ab6a6aa9f7fbe6a48a2adbdc097b54f0e 100644
--- a/example/bench/bench.cpp
+++ b/example/bench/bench.cpp
@@ -6,16 +6,19 @@
 #include <iomanip>
 #include <iostream>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
+
+#include <arbor/profile/meter_manager.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/distributed_context.hpp>
 
-#include <common_types.hpp>
-#include <communication/distributed_context.hpp>
 #include <hardware/node_info.hpp>
 #include <load_balance.hpp>
-#include <profiling/meter_manager.hpp>
 #include <simulation.hpp>
 #include <util/ioutil.hpp>
 
+#include "json_meter.hpp"
+
 #include "recipe.hpp"
 
 using namespace arb;
@@ -35,7 +38,7 @@ int main(int argc, char** argv) {
 
         std::cout << params << "\n";
 
-        util::meter_manager meters(&context);
+        profile::meter_manager meters(&context);
         meters.start();
 
         // Create an instance of our recipe.
@@ -56,18 +59,18 @@ int main(int argc, char** argv) {
         meters.checkpoint("model-run");
 
         // write meters
-        auto report = util::make_meter_report(meters);
+        auto report = profile::make_meter_report(meters);
         std::cout << report << "\n";
 
         if (is_root==0) {
             std::ofstream fid;
             fid.exceptions(std::ios_base::badbit | std::ios_base::failbit);
             fid.open("meters.json");
-            fid << std::setw(1) << util::to_json(report) << "\n";
+            fid << std::setw(1) << aux::to_json(report) << "\n";
         }
 
         // output profile and diagnostic feedback
-        auto profile = util::profiler_summary();
+        auto profile = profile::profiler_summary();
         std::cout << profile << "\n";
 
         std::cout << "there were " << sim.num_spikes() << " spikes\n";
diff --git a/example/bench/parameters.cpp b/example/bench/parameters.cpp
index dea52a7003823d32482aa9836ec5515212703ae4..74328feae748023d7ac9773050fc138b5774ce6c 100644
--- a/example/bench/parameters.cpp
+++ b/example/bench/parameters.cpp
@@ -2,9 +2,9 @@
 #include <fstream>
 #include <string>
 
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
 
 #include "parameters.hpp"
 
diff --git a/example/bench/parameters.hpp b/example/bench/parameters.hpp
index 52c0d16418753c11b521896dd2ae101487f2c03d..9f6fdc9fd8a7177b13d2a09ac40ac0ead3aeb428 100644
--- a/example/bench/parameters.hpp
+++ b/example/bench/parameters.hpp
@@ -3,7 +3,7 @@
 #include <ostream>
 #include <string>
 
-#include <common_types.hpp>
+#include <arbor/common_types.hpp>
 
 using arb::time_type;
 
diff --git a/example/bench/recipe.cpp b/example/bench/recipe.cpp
index c3358c226d1c8cb43a24bf8bba5694d77724ce4f..50787c08d43be3a1828c0c23a0a8dd2073243570 100644
--- a/example/bench/recipe.cpp
+++ b/example/bench/recipe.cpp
@@ -1,7 +1,8 @@
 #include <random>
 
+#include <arbor/common_types.hpp>
+
 #include <benchmark_cell.hpp>
-#include <common_types.hpp>
 #include <time_sequence.hpp>
 
 #include "recipe.hpp"
diff --git a/example/bench/recipe.hpp b/example/bench/recipe.hpp
index 76a1a84f5e2f2f29a068e07bee8bf00343e16682..f06952be902c6f0f60930aa4a70825386f1c661b 100644
--- a/example/bench/recipe.hpp
+++ b/example/bench/recipe.hpp
@@ -1,6 +1,7 @@
 #pragma once
 
-#include <common_types.hpp>
+#include <arbor/common_types.hpp>
+
 #include <recipe.hpp>
 
 #include "parameters.hpp"
diff --git a/example/brunel/CMakeLists.txt b/example/brunel/CMakeLists.txt
index dcb5855c3704dad665dd00f8d83f98bcd074dc40..15ed1ee7d71288e1bf8a2e4e7130058c3a60ee7c 100644
--- a/example/brunel/CMakeLists.txt
+++ b/example/brunel/CMakeLists.txt
@@ -1,23 +1,8 @@
-set(HEADERS
-    io.hpp
-    partitioner.hpp
-)
-set(MINIAPP_SOURCES
+add_executable(brunel-miniapp
     brunel_miniapp.cpp
-    io.cpp
-)
+    io.cpp)
 
-add_executable(brunel_miniapp.exe ${MINIAPP_SOURCES} ${HEADERS})
+target_link_libraries(brunel-miniapp PRIVATE arbor arbor-aux ext-tclap)
 
-target_link_libraries(brunel_miniapp.exe LINK_PUBLIC ${ARB_LIBRARIES})
-target_link_libraries(brunel_miniapp.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES})
-
-if(ARB_WITH_MPI)
-    target_link_libraries(brunel_miniapp.exe LINK_PUBLIC ${MPI_C_LIBRARIES})
-    set_property(TARGET brunel_miniapp.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-endif()
-
-set_target_properties(brunel_miniapp.exe
-   PROPERTIES
-   RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example"
-)
+# TODO: resolve public headers
+target_link_libraries(brunel-miniapp PRIVATE arbor-private-headers)
diff --git a/example/brunel/brunel_miniapp.cpp b/example/brunel/brunel_miniapp.cpp
index e324c8f64bb753239036658ac9fbf2810f244214..6ddbebebd7b0487e9c85b4a7123b75da02b0bdd3 100644
--- a/example/brunel/brunel_miniapp.cpp
+++ b/example/brunel/brunel_miniapp.cpp
@@ -6,24 +6,27 @@
 #include <set>
 #include <vector>
 
-#include <common_types.hpp>
-#include <communication/communicator.hpp>
-#include <communication/distributed_context.hpp>
-#include <event_generator.hpp>
-#include <hardware/gpu.hpp>
-#include <hardware/node_info.hpp>
-#include <io/exporter_spike_file.hpp>
-#include <json/json.hpp>
-#include <lif_cell_description.hpp>
-#include <profiling/profiler.hpp>
-#include <profiling/meter_manager.hpp>
-#include <recipe.hpp>
-#include <simulation.hpp>
-#include <threading/threading.hpp>
-#include <util/config.hpp>
-#include <util/debug.hpp>
-#include <util/ioutil.hpp>
-#include <util/nop.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/distributed_context.hpp>
+#include <arbor/profile/meter_manager.hpp>
+#include <arbor/profile/profiler.hpp>
+#include <arbor/threadinfo.hpp>
+#include <arbor/version.hpp>
+
+#include "json_meter.hpp"
+#ifdef ARB_MPI_ENABLED
+#include "with_mpi.hpp"
+#endif
+
+#include "communication/communicator.hpp"
+#include "event_generator.hpp"
+#include "hardware/gpu.hpp"
+#include "hardware/node_info.hpp"
+#include "io/exporter_spike_file.hpp"
+#include "lif_cell_description.hpp"
+#include "recipe.hpp"
+#include "simulation.hpp"
+#include "util/ioutil.hpp"
 
 #include "partitioner.hpp"
 #include "io.hpp"
@@ -191,17 +194,17 @@ int main(int argc, char** argv) {
     distributed_context context;
 
     try {
-#ifdef ARB_HAVE_MPI
-        mpi::scoped_guard guard(&argc, &argv);
+#ifdef ARB_MPI_ENABLED
+        with_mpi guard(argc, argv, false);
         context = mpi_context(MPI_COMM_WORLD);
 #endif
-        arb::util::meter_manager meters(&context);
+        arb::profile::meter_manager meters(&context);
         meters.start();
         std::cout << util::mask_stream(context.id()==0);
         // read parameters
         io::cl_options options = io::read_options(argc, argv, context.id()==0);
         hw::node_info nd;
-        nd.num_cpu_cores = threading::num_threads();
+        nd.num_cpu_cores = arb::num_threads();
         nd.num_gpus = hw::num_gpus()>0? 1: 0;
         banner(nd, &context);
 
@@ -277,16 +280,16 @@ int main(int argc, char** argv) {
         meters.checkpoint("model-simulate");
 
         // output profile and diagnostic feedback
-        std::cout << util::profiler_summary() << "\n";
+        std::cout << profile::profiler_summary() << "\n";
         std::cout << "\nThere were " << sim.num_spikes() << " spikes\n";
 
-        auto report = util::make_meter_report(meters);
+        auto report = profile::make_meter_report(meters);
         std::cout << report;
         if (context.id()==0) {
             std::ofstream fid;
             fid.exceptions(std::ios_base::badbit | std::ios_base::failbit);
             fid.open("meters.json");
-            fid << std::setw(1) << util::to_json(report) << "\n";
+            fid << std::setw(1) << aux::to_json(report) << "\n";
         }
     }
     catch (io::usage_error& e) {
@@ -308,7 +311,7 @@ void banner(hw::node_info nd, const distributed_context* ctx) {
     std::cout << "  - distributed : " << ctx->size()
               << " (" << ctx->name() << ")\n";
     std::cout << "  - threads     : " << nd.num_cpu_cores
-              << " (" << threading::description() << ")\n";
+              << " (" << arb::thread_implementation() << ")\n";
     std::cout << "  - gpus        : " << nd.num_gpus << "\n";
     std::cout << "==========================================\n";
 }
diff --git a/example/brunel/io.cpp b/example/brunel/io.cpp
index afb406761a6f35ed76aa62bfdbeff1f4d1d8afba..c4940320f5b813c1f248d0d97e6b8d2a9b5ac046 100644
--- a/example/brunel/io.cpp
+++ b/example/brunel/io.cpp
@@ -6,9 +6,11 @@
 #include <sstream>
 #include <string>
 #include <type_traits>
+
 #include <tclap/CmdLine.h>
-#include <util/meta.hpp>
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "util/meta.hpp"
 #include "io.hpp"
 
 // Let TCLAP understand value arguments that are of an optional type.
diff --git a/example/brunel/io.hpp b/example/brunel/io.hpp
index 45c0c65c7d1b90c4b7201866a82281d0dc3d535e..deeedb5fd4b291f578e2f7ed1f4650aaf2c8248f 100644
--- a/example/brunel/io.hpp
+++ b/example/brunel/io.hpp
@@ -4,9 +4,9 @@
 #include <string>
 #include <utility>
 #include <vector>
-#include <common_types.hpp>
-#include <util/optional.hpp>
-#include <util/path.hpp>
+
+#include <arbor/common_types.hpp>
+#include <arbor/util/optional.hpp>
 
 namespace arb {
 namespace io {
diff --git a/example/brunel/partitioner.hpp b/example/brunel/partitioner.hpp
index f73e1dee58b677fe7acccf441db1663af80d0218..1aaae53ae1665af17092a1759f64c775ec204cd4 100644
--- a/example/brunel/partitioner.hpp
+++ b/example/brunel/partitioner.hpp
@@ -1,7 +1,8 @@
-#include <communication/distributed_context.hpp>
-#include <domain_decomposition.hpp>
-#include <hardware/node_info.hpp>
-#include <recipe.hpp>
+#include <arbor/distributed_context.hpp>
+
+#include "domain_decomposition.hpp"
+#include "hardware/node_info.hpp"
+#include "recipe.hpp"
 
 namespace arb {
     static
diff --git a/example/generators/CMakeLists.txt b/example/generators/CMakeLists.txt
index 1e743f11c286f926f294fd0fa68bc48cca67e91e..7235b1047a6b13f8a43e1cef1f772f9b273f2ef2 100644
--- a/example/generators/CMakeLists.txt
+++ b/example/generators/CMakeLists.txt
@@ -1,15 +1,6 @@
-add_executable(event_gen.exe event_gen.cpp)
+add_executable(event-gen event_gen.cpp)
 
-target_link_libraries(event_gen.exe LINK_PUBLIC ${ARB_LIBRARIES})
-target_link_libraries(event_gen.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES})
+target_link_libraries(event-gen PRIVATE arbor arbor-aux ext-json)
 
-if(ARB_WITH_MPI)
-    target_link_libraries(event_gen.exe LINK_PUBLIC ${MPI_C_LIBRARIES})
-    set_property(TARGET event_gen.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-endif()
-
-set_target_properties(
-    event_gen.exe
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example"
-)
+# TODO: resolve public headers
+target_link_libraries(event-gen PRIVATE arbor-private-headers)
diff --git a/example/generators/event_gen.cpp b/example/generators/event_gen.cpp
index 6f6aeb4420cb526a5c6bc785a31f2ff35abdb178..3deb5ba096415bce724816274203a3e99010ef73 100644
--- a/example/generators/event_gen.cpp
+++ b/example/generators/event_gen.cpp
@@ -10,17 +10,18 @@
 #include <iomanip>
 #include <iostream>
 
-#include <json/json.hpp>
-
-#include <cell.hpp>
-#include <common_types.hpp>
-#include <communication/distributed_context.hpp>
-#include <event_generator.hpp>
-#include <hardware/node_info.hpp>
-#include <load_balance.hpp>
-#include <simulation.hpp>
-#include <recipe.hpp>
-#include <simple_sampler.hpp>
+#include <nlohmann/json.hpp>
+
+#include <arbor/common_types.hpp>
+#include <arbor/distributed_context.hpp>
+
+#include "cell.hpp"
+#include "event_generator.hpp"
+#include "hardware/node_info.hpp"
+#include "load_balance.hpp"
+#include "simulation.hpp"
+#include "recipe.hpp"
+#include "simple_sampler.hpp"
 
 using arb::cell_gid_type;
 using arb::cell_lid_type;
@@ -61,19 +62,19 @@ public:
     }
 
     cell_kind get_cell_kind(cell_gid_type gid) const override {
-        EXPECTS(gid==0); // There is only one cell in the model
+        arb_assert(gid==0); // There is only one cell in the model
         return cell_kind::cable1d_neuron;
     }
 
     // The cell has one target synapse, which receives both inhibitory and exchitatory inputs.
     cell_size_type num_targets(cell_gid_type gid) const override {
-        EXPECTS(gid==0); // There is only one cell in the model
+        arb_assert(gid==0); // There is only one cell in the model
         return 1;
     }
 
     // Return two generators attached to the one cell.
     std::vector<arb::event_generator> event_generators(cell_gid_type gid) const override {
-        EXPECTS(gid==0); // There is only one cell in the model
+        arb_assert(gid==0); // There is only one cell in the model
 
         using RNG = std::mt19937_64;
         using pgen = arb::poisson_generator<RNG>;
@@ -107,13 +108,13 @@ public:
 
     // There is one probe (for measuring voltage at the soma) on the cell
     cell_size_type num_probes(cell_gid_type gid)  const override {
-        EXPECTS(gid==0); // There is only one cell in the model
+        arb_assert(gid==0); // There is only one cell in the model
         return 1;
     }
 
     arb::probe_info get_probe(cell_member_type id) const override {
-        EXPECTS(id.gid==0);     // There is one cell,
-        EXPECTS(id.index==0);   // with one probe.
+        arb_assert(id.gid==0);     // There is one cell,
+        arb_assert(id.index==0);   // with one probe.
 
         // Get the appropriate kind for measuring voltage
         cell_probe_address::probe_kind kind = cell_probe_address::membrane_voltage;
diff --git a/example/miniapp/CMakeLists.txt b/example/miniapp/CMakeLists.txt
index b309737a8b253fe8927189a88da1ba979edbd23c..f2d8e9bd6aa1bfd9d5ae489aa7e788a04f06be97 100644
--- a/example/miniapp/CMakeLists.txt
+++ b/example/miniapp/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(MINIAPP_SOURCES
+add_executable(miniapp
     miniapp.cpp
     io.cpp
     miniapp_recipes.cpp
@@ -6,19 +6,7 @@ set(MINIAPP_SOURCES
     trace.cpp
 )
 
-add_executable(miniapp.exe ${MINIAPP_SOURCES})
-
-target_link_libraries(miniapp.exe LINK_PUBLIC ${ARB_LIBRARIES})
-target_link_libraries(miniapp.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES})
-
-if(ARB_WITH_MPI)
-    target_link_libraries(miniapp.exe LINK_PUBLIC ${MPI_C_LIBRARIES})
-    set_property(TARGET miniapp.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-endif()
-
-set_target_properties(
-    miniapp.exe
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/example"
-)
+target_link_libraries(miniapp PRIVATE arbor arbor-aux ext-tclap ext-json)
 
+# TODO: resolve public headers
+target_link_libraries(miniapp PRIVATE arbor-private-headers)
diff --git a/example/miniapp/io.cpp b/example/miniapp/io.cpp
index 95ec7690af9c584cb6b0aeba0b376f3b931fbe35..e17cfdfc5c5f49a3050cfb7118d161fd3ef541fb 100644
--- a/example/miniapp/io.cpp
+++ b/example/miniapp/io.cpp
@@ -9,11 +9,12 @@
 #include <type_traits>
 
 #include <tclap/CmdLine.h>
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
 
-#include <util/meta.hpp>
-#include <util/optional.hpp>
-#include <util/strprintf.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "util/meta.hpp"
+#include "util/strprintf.hpp"
 
 #include "io.hpp"
 
diff --git a/example/miniapp/io.hpp b/example/miniapp/io.hpp
index 6283ac4d34b860fd4772ffeee7c4dd31ad8178b8..f9e89f22ca377752828c242baae210c7fb2573db 100644
--- a/example/miniapp/io.hpp
+++ b/example/miniapp/io.hpp
@@ -7,10 +7,12 @@
 #include <utility>
 #include <vector>
 
-#include <common_types.hpp>
-#include <util/optional.hpp>
-#include <util/path.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/util/optional.hpp>
 
+#include "util/path.hpp"
+
+// TODO: this shouldn't be in arb namespace
 namespace arb {
 namespace io {
 
diff --git a/example/miniapp/miniapp.cpp b/example/miniapp/miniapp.cpp
index 943ad04de59bb6d307e8253fead694cec064f436..9d90872df92e38ddae637037f04bfaff9f6d73fc 100644
--- a/example/miniapp/miniapp.cpp
+++ b/example/miniapp/miniapp.cpp
@@ -5,26 +5,29 @@
 #include <memory>
 #include <vector>
 
-#include <json/json.hpp>
-
-#include <common_types.hpp>
-#include <communication/distributed_context.hpp>
-#include <cell.hpp>
-#include <hardware/gpu.hpp>
-#include <hardware/node_info.hpp>
-#include <io/exporter_spike_file.hpp>
-#include <load_balance.hpp>
-#include <simulation.hpp>
-#include <profiling/profiler.hpp>
-#include <profiling/meter_manager.hpp>
-#include <sampling.hpp>
-#include <schedule.hpp>
-#include <threading/threading.hpp>
-#include <util/any.hpp>
-#include <util/config.hpp>
-#include <util/debug.hpp>
-#include <util/ioutil.hpp>
-#include <util/nop.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/distributed_context.hpp>
+#include <arbor/profile/meter_manager.hpp>
+#include <arbor/profile/profiler.hpp>
+#include <arbor/threadinfo.hpp>
+#include <arbor/version.hpp>
+
+#include "communication/communicator.hpp"
+#include "cell.hpp"
+#include "hardware/gpu.hpp"
+#include "hardware/node_info.hpp"
+#include "io/exporter_spike_file.hpp"
+#include "load_balance.hpp"
+#include "simulation.hpp"
+#include "sampling.hpp"
+#include "schedule.hpp"
+#include "util/any.hpp"
+#include "util/ioutil.hpp"
+
+#include "json_meter.hpp"
+#ifdef ARB_MPI_ENABLED
+#include "with_mpi.hpp"
+#endif
 
 #include "io.hpp"
 #include "miniapp_recipes.hpp"
@@ -46,12 +49,12 @@ int main(int argc, char** argv) {
     distributed_context context;
 
     try {
-        #ifdef ARB_HAVE_MPI
-        mpi::scoped_guard guard(&argc, &argv);
+#ifdef ARB_MPI_ENABLED
+        with_mpi guard(argc, argv, false);
         context = mpi_context(MPI_COMM_WORLD);
-        #endif
+#endif
 
-        util::meter_manager meters(&context);
+        profile::meter_manager meters(&context);
         meters.start();
 
         std::cout << util::mask_stream(context.id()==0);
@@ -63,7 +66,7 @@ int main(int argc, char** argv) {
         // Use a node description that uses the number of threads used by the
         // threading back end, and 1 gpu if available.
         hw::node_info nd;
-        nd.num_cpu_cores = threading::num_threads();
+        nd.num_cpu_cores = arb::num_threads();
         nd.num_gpus = hw::num_gpus()>0? 1: 0;
         banner(nd, &context);
 
@@ -146,7 +149,7 @@ int main(int argc, char** argv) {
         meters.checkpoint("model-simulate");
 
         // output profile and diagnostic feedback
-        auto profile = util::profiler_summary();
+        auto profile = profile::profiler_summary();
         std::cout << profile << "\n";
         std::cout << "\nthere were " << sim.num_spikes() << " spikes\n";
 
@@ -156,13 +159,13 @@ int main(int argc, char** argv) {
             write_trace(trace, options.trace_prefix);
         }
 
-        auto report = util::make_meter_report(meters);
+        auto report = profile::make_meter_report(meters);
         std::cout << report;
         if (context.id()==0) {
             std::ofstream fid;
             fid.exceptions(std::ios_base::badbit | std::ios_base::failbit);
             fid.open("meters.json");
-            fid << std::setw(1) << util::to_json(report) << "\n";
+            fid << std::setw(1) << aux::to_json(report) << "\n";
         }
     }
     catch (io::usage_error& e) {
@@ -184,7 +187,7 @@ void banner(hw::node_info nd, const distributed_context* ctx) {
     std::cout << "  - distributed : " << ctx->size()
               << " (" << ctx->name() << ")\n";
     std::cout << "  - threads     : " << nd.num_cpu_cores
-              << " (" << threading::description() << ")\n";
+              << " (" << arb::thread_implementation() << ")\n";
     std::cout << "  - gpus        : " << nd.num_gpus << "\n";
     std::cout << "==========================================\n";
 }
diff --git a/example/miniapp/miniapp_recipes.cpp b/example/miniapp/miniapp_recipes.cpp
index 17e852cbb8ec2fd1fb8c9c433f31f06f630930a3..b8ab4e149407c52774200ee643f3d78d68c2d42b 100644
--- a/example/miniapp/miniapp_recipes.cpp
+++ b/example/miniapp/miniapp_recipes.cpp
@@ -3,12 +3,13 @@
 #include <vector>
 #include <utility>
 
-#include <cell.hpp>
-#include <event_generator.hpp>
-#include <morphology.hpp>
-#include <spike_source_cell.hpp>
-#include <time_sequence.hpp>
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+
+#include "cell.hpp"
+#include "event_generator.hpp"
+#include "morphology.hpp"
+#include "spike_source_cell.hpp"
+#include "time_sequence.hpp"
 
 #include "io.hpp"
 #include "miniapp_recipes.hpp"
@@ -60,7 +61,7 @@ cell make_basic_cell(
         }
     }
 
-    EXPECTS(!terminals.empty());
+    arb_assert(!terminals.empty());
 
     arb::mechanism_desc syn_default(syn_type);
     for (unsigned i=0; i<num_synapses; ++i) {
@@ -76,7 +77,7 @@ public:
     basic_cell_recipe(cell_gid_type ncell, basic_recipe_param param, probe_distribution pdist):
         ncell_(ncell), param_(std::move(param)), pdist_(std::move(pdist))
     {
-        EXPECTS(param_.morphologies.size()>0);
+        arb_assert(param_.morphologies.size()>0);
         delay_distribution_param_ = exp_param{param_.mean_connection_delay_ms
                             - param_.min_connection_delay_ms};
     }
@@ -99,9 +100,9 @@ public:
         auto cell = make_basic_cell(morph, param_.num_compartments, param_.num_synapses,
                         param_.synapse_type, gen);
 
-        EXPECTS(cell.num_segments()==cell_segments);
-        EXPECTS(cell.synapses().size()==num_targets(i));
-        EXPECTS(cell.detectors().size()==num_sources(i));
+        arb_assert(cell.num_segments()==cell_segments);
+        arb_assert(cell.synapses().size()==num_targets(i));
+        arb_assert(cell.detectors().size()==num_sources(i));
 
         return util::unique_any(std::move(cell));
     }
@@ -124,7 +125,7 @@ public:
                 cell_probe_address::membrane_voltage: cell_probe_address::membrane_current;
         }
         else {
-            EXPECTS(stride==2);
+            arb_assert(stride==2);
             // Both kinds available.
             kind = (probe_id.index%stride==0)?
                 cell_probe_address::membrane_voltage: cell_probe_address::membrane_current;
@@ -319,7 +320,7 @@ public:
 
         for (unsigned t=0; t<param_.num_synapses; ++t) {
             cell_gid_type source = t>=i? t+1: t;
-            EXPECTS(source<ncell_);
+            arb_assert(source<ncell_);
 
             cell_connection cc = draw_connection_params(conn_param_gen);
             cc.source = {source, 0};
diff --git a/example/miniapp/miniapp_recipes.hpp b/example/miniapp/miniapp_recipes.hpp
index 0ed62f6b2942b8b3de0ae200e1608dcf1a830221..684b58b8b24c475a55385ace9350fbfd057e9eea 100644
--- a/example/miniapp/miniapp_recipes.hpp
+++ b/example/miniapp/miniapp_recipes.hpp
@@ -4,8 +4,9 @@
 #include <memory>
 #include <stdexcept>
 
-#include <recipe.hpp>
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
+
+#include "recipe.hpp"
 
 #include "morphology_pool.hpp"
 
diff --git a/example/miniapp/trace.cpp b/example/miniapp/trace.cpp
index b834ed40ff4ed7938dc6a6c188d6dcadd3e232fd..5fc878982bcb919b0f7e6c3b13f0bbeaa06ea3a8 100644
--- a/example/miniapp/trace.cpp
+++ b/example/miniapp/trace.cpp
@@ -1,10 +1,11 @@
 #include <fstream>
 #include <string>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
 
-#include <common_types.hpp>
-#include <util/strprintf.hpp>
+#include <arbor/common_types.hpp>
+
+#include "util/strprintf.hpp"
 
 #include "trace.hpp"
 
diff --git a/example/miniapp/trace.hpp b/example/miniapp/trace.hpp
index af42528de69d84aa4e7137df1b14c9f95d468262..23db694ac02adacf88da29bc06e7d3b784ca9a7d 100644
--- a/example/miniapp/trace.hpp
+++ b/example/miniapp/trace.hpp
@@ -7,8 +7,9 @@
 #include <string>
 #include <vector>
 
-#include <common_types.hpp>
-#include <simple_sampler.hpp>
+#include <arbor/common_types.hpp>
+
+#include "simple_sampler.hpp"
 
 struct sample_trace {
     arb::cell_member_type probe_id;
diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..05f93fa1108f60b92111588b4fbfead8cf5606af
--- /dev/null
+++ b/ext/CMakeLists.txt
@@ -0,0 +1,91 @@
+# Niels Lohmann's json library (single-header version).
+
+add_library(ext-json INTERFACE)
+target_include_directories(ext-json INTERFACE json/single_include)
+
+# TCLAP command line parsing libary (header-only).
+
+add_library(ext-tclap INTERFACE)
+target_include_directories(ext-tclap INTERFACE tclap/include)
+
+# Intel TBB:
+# Alias system TBB or build locally and export that, according
+# to ARB_PRIVATE_TBBLIB setting.
+
+find_package(TBB)
+if(ARB_PRIVATE_TBBLIB OR NOT TBB_FOUND)
+    check_git_submodule(tbb tbb)
+    add_target_if(tbb_avail check-tbb-submodule "Checking TBB submodule" "TBB git submodule required")
+
+    # Turn off proxy malloc library and test compilation.
+    option(TBB_BUILD_TBBMALLOC_PROXY "" OFF)
+    option(TBB_BUILD_TESTS           "" OFF)
+    # Only make static libraries.
+    option(TBB_BUILD_SHARED          "" OFF)
+    option(TBB_BUILD_STATIC          "" ON)
+
+    add_subdirectory(tbb EXCLUDE_FROM_ALL)
+
+    add_library(ext-tbb INTERFACE)
+    add_dependencies(ext-tbb check-tbb-submodule)
+    target_link_libraries(ext-tbb INTERFACE tbb_static tbbmalloc_static)
+    target_include_directories(ext-tbb SYSTEM INTERFACE tbb/include)
+
+    # Can't use install(TARGETS) because 1) tbb targets are defined in
+    # a subdirectory and 2) having been excluded-from-all, the behaviour
+    # might have been undefined anyway. Seriously.
+
+    install(FILES "$<TARGET_FILE:tbb_static>" "$<TARGET_FILE:tbbmalloc_static>" DESTINATION ${CMAKE_INSTALL_LIBDIR} OPTIONAL)
+else()
+    add_library(ext-tbb INTERFACE)
+    target_link_libraries(ext-tbb INTERFACE TBB::tbb)
+endif()
+
+
+# Google benchmark for microbenchmarks:
+
+check_git_submodule(gbench google-benchmark)
+if(gbench_avail)
+    # Set up google benchmark as an external project.
+
+    include(ExternalProject)
+    set(gbench_src_dir "${CMAKE_CURRENT_SOURCE_DIR}/google-benchmark")
+    set(gbench_install_dir "${CMAKE_CURRENT_BINARY_DIR}/google-benchmark")
+    set(gbench_cmake_args
+        "-DCMAKE_BUILD_TYPE=release"
+        "-DCMAKE_INSTALL_PREFIX=${gbench_install_dir}"
+        "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
+        "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
+
+    ExternalProject_Add(gbench-build
+        # Add dummy DOWNLOAD_COMMAND to stop ExternalProject_Add terminating CMake if the
+        # git submodule had not been udpated.
+        DOWNLOAD_COMMAND "${CMAKE_COMMAND}" -E echo "Warning: ${gbench_src_dir} empty or missing."
+        SOURCE_DIR "${gbench_src_dir}"
+        CMAKE_ARGS "${gbench_cmake_args}"
+        INSTALL_DIR "${gbench_install_dir}"
+    )
+    set_target_properties(gbench-build PROPERTIES EXCLUDE_FROM_ALL TRUE)
+
+else()
+    add_error_target(gbench-build
+        "Building Google benchmark library"
+        "The git submodule for google benchmark is not available")
+endif()
+
+add_library(ext-benchmark INTERFACE)
+add_dependencies(ext-benchmark gbench-build)
+target_include_directories(ext-benchmark INTERFACE "${gbench_install_dir}/include")
+target_link_libraries(ext-benchmark INTERFACE "${gbench_install_dir}/lib/libbenchmark.a")
+
+# Sphinx RTD theme
+
+check_git_submodule(rtdtheme sphinx_rtd_theme)
+add_target_if(rtdtheme_avail
+    check-sphinx_rtd_theme
+    "Checking for Read the Docs Sphinx theme"
+    "The git submodule for read the docs is not available")
+
+add_library(ext-sphinx_rtd_theme INTERFACE)
+add_dependencies(ext-sphinx_rtd_theme check-sphinx_rtd_theme)
+target_include_directories(ext-sphinx_rtd_theme INTERFACE sphinx_rtd_theme)
diff --git a/tests/ubench/google-benchmark b/ext/google-benchmark
similarity index 100%
rename from tests/ubench/google-benchmark
rename to ext/google-benchmark
diff --git a/json/LICENSE.MIT b/ext/json/LICENSE.MIT
similarity index 100%
rename from json/LICENSE.MIT
rename to ext/json/LICENSE.MIT
diff --git a/json/README.md b/ext/json/README.md
similarity index 100%
rename from json/README.md
rename to ext/json/README.md
diff --git a/json/json.hpp b/ext/json/single_include/nlohmann/json.hpp
similarity index 100%
rename from json/json.hpp
rename to ext/json/single_include/nlohmann/json.hpp
diff --git a/doc/rtd_theme b/ext/sphinx_rtd_theme
similarity index 100%
rename from doc/rtd_theme
rename to ext/sphinx_rtd_theme
diff --git a/tbb b/ext/tbb
similarity index 100%
rename from tbb
rename to ext/tbb
diff --git a/tclap/AUTHORS b/ext/tclap/AUTHORS
similarity index 100%
rename from tclap/AUTHORS
rename to ext/tclap/AUTHORS
diff --git a/tclap/COPYING b/ext/tclap/COPYING
similarity index 100%
rename from tclap/COPYING
rename to ext/tclap/COPYING
diff --git a/tclap/README b/ext/tclap/README
similarity index 100%
rename from tclap/README
rename to ext/tclap/README
diff --git a/tclap/Arg.h b/ext/tclap/include/tclap/Arg.h
similarity index 100%
rename from tclap/Arg.h
rename to ext/tclap/include/tclap/Arg.h
diff --git a/tclap/ArgException.h b/ext/tclap/include/tclap/ArgException.h
similarity index 100%
rename from tclap/ArgException.h
rename to ext/tclap/include/tclap/ArgException.h
diff --git a/tclap/ArgTraits.h b/ext/tclap/include/tclap/ArgTraits.h
similarity index 100%
rename from tclap/ArgTraits.h
rename to ext/tclap/include/tclap/ArgTraits.h
diff --git a/tclap/CmdLine.h b/ext/tclap/include/tclap/CmdLine.h
similarity index 100%
rename from tclap/CmdLine.h
rename to ext/tclap/include/tclap/CmdLine.h
diff --git a/tclap/CmdLineInterface.h b/ext/tclap/include/tclap/CmdLineInterface.h
similarity index 100%
rename from tclap/CmdLineInterface.h
rename to ext/tclap/include/tclap/CmdLineInterface.h
diff --git a/tclap/CmdLineOutput.h b/ext/tclap/include/tclap/CmdLineOutput.h
similarity index 100%
rename from tclap/CmdLineOutput.h
rename to ext/tclap/include/tclap/CmdLineOutput.h
diff --git a/tclap/Constraint.h b/ext/tclap/include/tclap/Constraint.h
similarity index 100%
rename from tclap/Constraint.h
rename to ext/tclap/include/tclap/Constraint.h
diff --git a/tclap/DocBookOutput.h b/ext/tclap/include/tclap/DocBookOutput.h
similarity index 100%
rename from tclap/DocBookOutput.h
rename to ext/tclap/include/tclap/DocBookOutput.h
diff --git a/tclap/HelpVisitor.h b/ext/tclap/include/tclap/HelpVisitor.h
similarity index 100%
rename from tclap/HelpVisitor.h
rename to ext/tclap/include/tclap/HelpVisitor.h
diff --git a/tclap/IgnoreRestVisitor.h b/ext/tclap/include/tclap/IgnoreRestVisitor.h
similarity index 100%
rename from tclap/IgnoreRestVisitor.h
rename to ext/tclap/include/tclap/IgnoreRestVisitor.h
diff --git a/tclap/MultiArg.h b/ext/tclap/include/tclap/MultiArg.h
similarity index 100%
rename from tclap/MultiArg.h
rename to ext/tclap/include/tclap/MultiArg.h
diff --git a/tclap/MultiSwitchArg.h b/ext/tclap/include/tclap/MultiSwitchArg.h
similarity index 100%
rename from tclap/MultiSwitchArg.h
rename to ext/tclap/include/tclap/MultiSwitchArg.h
diff --git a/tclap/OptionalUnlabeledTracker.h b/ext/tclap/include/tclap/OptionalUnlabeledTracker.h
similarity index 100%
rename from tclap/OptionalUnlabeledTracker.h
rename to ext/tclap/include/tclap/OptionalUnlabeledTracker.h
diff --git a/tclap/StandardTraits.h b/ext/tclap/include/tclap/StandardTraits.h
similarity index 100%
rename from tclap/StandardTraits.h
rename to ext/tclap/include/tclap/StandardTraits.h
diff --git a/tclap/StdOutput.h b/ext/tclap/include/tclap/StdOutput.h
similarity index 100%
rename from tclap/StdOutput.h
rename to ext/tclap/include/tclap/StdOutput.h
diff --git a/tclap/SwitchArg.h b/ext/tclap/include/tclap/SwitchArg.h
similarity index 100%
rename from tclap/SwitchArg.h
rename to ext/tclap/include/tclap/SwitchArg.h
diff --git a/tclap/UnlabeledMultiArg.h b/ext/tclap/include/tclap/UnlabeledMultiArg.h
similarity index 100%
rename from tclap/UnlabeledMultiArg.h
rename to ext/tclap/include/tclap/UnlabeledMultiArg.h
diff --git a/tclap/UnlabeledValueArg.h b/ext/tclap/include/tclap/UnlabeledValueArg.h
similarity index 100%
rename from tclap/UnlabeledValueArg.h
rename to ext/tclap/include/tclap/UnlabeledValueArg.h
diff --git a/tclap/ValueArg.h b/ext/tclap/include/tclap/ValueArg.h
similarity index 100%
rename from tclap/ValueArg.h
rename to ext/tclap/include/tclap/ValueArg.h
diff --git a/tclap/ValuesConstraint.h b/ext/tclap/include/tclap/ValuesConstraint.h
similarity index 100%
rename from tclap/ValuesConstraint.h
rename to ext/tclap/include/tclap/ValuesConstraint.h
diff --git a/tclap/VersionVisitor.h b/ext/tclap/include/tclap/VersionVisitor.h
similarity index 100%
rename from tclap/VersionVisitor.h
rename to ext/tclap/include/tclap/VersionVisitor.h
diff --git a/tclap/Visitor.h b/ext/tclap/include/tclap/Visitor.h
similarity index 100%
rename from tclap/Visitor.h
rename to ext/tclap/include/tclap/Visitor.h
diff --git a/tclap/XorHandler.h b/ext/tclap/include/tclap/XorHandler.h
similarity index 100%
rename from tclap/XorHandler.h
rename to ext/tclap/include/tclap/XorHandler.h
diff --git a/tclap/ZshCompletionOutput.h b/ext/tclap/include/tclap/ZshCompletionOutput.h
similarity index 100%
rename from tclap/ZshCompletionOutput.h
rename to ext/tclap/include/tclap/ZshCompletionOutput.h
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9476c74c2822553e9f5e8c62719a108f12450706
--- /dev/null
+++ b/include/CMakeLists.txt
@@ -0,0 +1,65 @@
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/arbor)
+if(ARB_WITH_ASSERTIONS)
+    configure_file(arbor/assert_macro.hpp.enabled arbor/assert_macro.hpp COPYONLY)
+else()
+    configure_file(arbor/assert_macro.hpp.disabled arbor/assert_macro.hpp COPYONLY)
+endif()
+
+add_library(arbor-public-headers INTERFACE)
+
+# At build time, public headers found in this directory and in
+# binary directory (viz. assert.hpp, version.hpp). For consumers
+# of an installed arbor package, the include files will be found
+# in the 'include' subdirectory of the instalation path.
+
+target_include_directories(arbor-public-headers INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+    $<INSTALL_INTERFACE:include>
+)
+
+# Always make version.hpp-test, but only update version.hpp if it differs.
+
+add_custom_command(
+    OUTPUT _always_rebuild
+    COMMAND true
+)
+
+set(arb_features)
+if(ARB_WITH_ASSERTIONS)
+    list(APPEND arb_features ASSERT)
+endif()
+if(ARB_WITH_MPI)
+    list(APPEND arb_features MPI)
+endif()
+if(ARB_WITH_CUDA)
+    list(APPEND arb_features GPU)
+endif()
+if(ARB_WITH_PROFILING)
+    list(APPEND arb_features PROFILE)
+endif()
+if(ARB_WITH_TBB)
+    list(APPEND arb_features TBB)
+elseif(ARB_WITH_CTHREAD)
+    list(APPEND arb_features CTHREAD)
+endif()
+
+add_custom_command(
+    OUTPUT version.hpp-test
+    DEPENDS _always_rebuild
+    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/git-source-id ${PROJECT_VERSION} ${arb_features} > version.hpp-test
+)
+
+set(version_hpp_path arbor/version.hpp)
+add_custom_command(
+    OUTPUT ${version_hpp_path}
+    DEPENDS version.hpp-test
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different version.hpp-test ${version_hpp_path}
+)
+
+add_custom_target(generate_version_hpp DEPENDS ${version_hpp_path})
+add_dependencies(arbor-public-headers generate_version_hpp)
+
+install(DIRECTORY arbor ${CMAKE_CURRENT_BINARY_DIR}/arbor
+    DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+    FILES_MATCHING PATTERN "*.hpp")
diff --git a/include/arbor/assert.hpp b/include/arbor/assert.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..ac52566964b3c64306ce098bea29d926acebab3d
--- /dev/null
+++ b/include/arbor/assert.hpp
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <arbor/assert_macro.hpp>
+
+namespace arb {
+
+using failed_assertion_handler_t =
+    void (*)(const char* assertion, const char* file, int line, const char* func);
+
+void abort_on_failed_assertion(const char* assertion, const char* file, int line, const char* func);
+void ignore_failed_assertion(const char* assertion, const char* file, int line, const char* func);
+
+// defaults to abort_on_failed_assertion;
+extern failed_assertion_handler_t global_failed_assertion_handler;
+
+} // namespace arb
diff --git a/include/arbor/assert_macro.hpp.disabled b/include/arbor/assert_macro.hpp.disabled
new file mode 100644
index 0000000000000000000000000000000000000000..e23199b416fd05a2cf7584b3339d55481d0e596c
--- /dev/null
+++ b/include/arbor/assert_macro.hpp.disabled
@@ -0,0 +1,4 @@
+#pragma once
+
+#define arb_assert(condition) \
+(void)(false && (condition))
diff --git a/include/arbor/assert_macro.hpp.enabled b/include/arbor/assert_macro.hpp.enabled
new file mode 100644
index 0000000000000000000000000000000000000000..7701f9145fdebeea84941a2bf7d2d167df183269
--- /dev/null
+++ b/include/arbor/assert_macro.hpp.enabled
@@ -0,0 +1,11 @@
+#pragma once
+
+#ifdef __GNUC__
+    #define ARB_DEBUG_FUNCTION_NAME_ __PRETTY_FUNCTION__
+#else
+    #define ARB_DEBUG_FUNCTION_NAME_ __func__
+#endif
+
+#define arb_assert(condition) \
+(void)((condition) || \
+(arb::global_failed_assertion_handler(#condition, __FILE__, __LINE__, ARB_DEBUG_FUNCTION_NAME_), true))
diff --git a/src/common_types.hpp b/include/arbor/common_types.hpp
similarity index 95%
rename from src/common_types.hpp
rename to include/arbor/common_types.hpp
index c365883c0ef15a096e9a7cd675ed9559bab3a00a..aed91121afc1117609dc029d10cab95e228d8854 100644
--- a/src/common_types.hpp
+++ b/include/arbor/common_types.hpp
@@ -11,7 +11,7 @@
 #include <iosfwd>
 #include <type_traits>
 
-#include <util/lexcmp_def.hpp>
+#include <arbor/util/lexcmp_def.hpp>
 
 namespace arb {
 
@@ -49,7 +49,7 @@ struct cell_member_type {
     cell_lid_type index;
 };
 
-DEFINE_LEXICOGRAPHIC_ORDERING(cell_member_type,(a.gid,a.index),(b.gid,b.index))
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING(cell_member_type,(a.gid,a.index),(b.gid,b.index))
 
 // For storing time values [ms]
 
diff --git a/src/communication/gathered_vector.hpp b/include/arbor/communication/gathered_vector.hpp
similarity index 82%
rename from src/communication/gathered_vector.hpp
rename to include/arbor/communication/gathered_vector.hpp
index 3f5b550528e2aa892968bd75baf697ec6b4d3f48..6ab7ecd21a926cfc967ac04f40b90de99df99f18 100644
--- a/src/communication/gathered_vector.hpp
+++ b/include/arbor/communication/gathered_vector.hpp
@@ -1,10 +1,10 @@
 #pragma once
 
-#include <cstdint>
-#include <numeric>
+#include <cstddef>
+#include <algorithm>
 #include <vector>
 
-#include <util/rangeutil.hpp>
+#include <arbor/assert.hpp>
 
 namespace arb {
 
@@ -18,8 +18,8 @@ public:
         values_(std::move(v)),
         partition_(std::move(p))
     {
-        EXPECTS(util::is_sorted(partition_));
-        EXPECTS(partition_.back() == values_.size());
+        arb_assert(std::is_sorted(partition_.begin(), partition_.end()));
+        arb_assert(partition_.back() == values_.size());
     }
 
     /// the partition of distribution
diff --git a/include/arbor/communication/mpi_error.hpp b/include/arbor/communication/mpi_error.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fcbfc50b5070dc258fc9f40386df3c4708213f70
--- /dev/null
+++ b/include/arbor/communication/mpi_error.hpp
@@ -0,0 +1,112 @@
+#pragma once
+
+#include <string>
+#include <system_error>
+
+#include <mpi.h>
+
+namespace arb {
+
+enum class mpi_errc {
+    success = MPI_SUCCESS,
+    invalid_buffer = MPI_ERR_BUFFER,
+    invalid_count = MPI_ERR_COUNT,
+    invalid_datatype = MPI_ERR_TYPE,
+    invalid_tag = MPI_ERR_TAG,
+    invalid_communicator = MPI_ERR_COMM,
+    invalid_rank = MPI_ERR_RANK,
+    invalid_request =MPI_ERR_REQUEST,
+    invalid_root = MPI_ERR_ROOT,
+    invalid_group = MPI_ERR_GROUP,
+    invalid_operation = MPI_ERR_OP,
+    invalid_topology = MPI_ERR_TOPOLOGY,
+    invalid_dimension = MPI_ERR_DIMS,
+    invalid_argument = MPI_ERR_ARG,
+    unknown_error = MPI_ERR_UNKNOWN,
+    message_truncated = MPI_ERR_TRUNCATE,
+    other_error = MPI_ERR_OTHER,
+    internal_error = MPI_ERR_INTERN,
+    error_in_status = MPI_ERR_IN_STATUS,
+    pending = MPI_ERR_PENDING,
+    invalid_keyval = MPI_ERR_KEYVAL,
+    not_enough_memory = MPI_ERR_NO_MEM,
+    invalid_base = MPI_ERR_BASE,
+    key_too_long = MPI_ERR_INFO_KEY,
+    value_too_long = MPI_ERR_INFO_VALUE,
+    invalid_key = MPI_ERR_INFO_NOKEY,
+    spawn_error = MPI_ERR_SPAWN,
+    invalid_port = MPI_ERR_PORT,
+    invalid_service = MPI_ERR_SERVICE,
+    invalid_name = MPI_ERR_NAME,
+    invalid_win = MPI_ERR_WIN,
+    invalid_size = MPI_ERR_SIZE,
+    invalid_disp = MPI_ERR_DISP,
+    invalid_info = MPI_ERR_INFO,
+    invalid_locktype = MPI_ERR_LOCKTYPE,
+    invalid_assert = MPI_ERR_ASSERT,
+    rma_access_conflict = MPI_ERR_RMA_CONFLICT,
+    rma_synchronization_error = MPI_ERR_RMA_SYNC,
+#if MPI_VERSION >= 3
+    rma_range_error = MPI_ERR_RMA_RANGE,
+    rma_attach_failure = MPI_ERR_RMA_ATTACH,
+    rma_share_failure = MPI_ERR_RMA_SHARED,
+    rma_wrong_flavor = MPI_ERR_RMA_FLAVOR,
+#endif
+    invalid_file_handle = MPI_ERR_FILE,
+    not_same = MPI_ERR_NOT_SAME,
+    amode_error = MPI_ERR_AMODE,
+    unsupported_datarep = MPI_ERR_UNSUPPORTED_DATAREP,
+    unsupported_operation = MPI_ERR_UNSUPPORTED_OPERATION,
+    no_such_file = MPI_ERR_NO_SUCH_FILE,
+    file_exists = MPI_ERR_FILE_EXISTS,
+    invalid_filename = MPI_ERR_BAD_FILE,
+    permission_denied = MPI_ERR_ACCESS,
+    no_space = MPI_ERR_NO_SPACE,
+    quota_exceeded = MPI_ERR_QUOTA,
+    read_only = MPI_ERR_READ_ONLY,
+    file_in_use = MPI_ERR_FILE_IN_USE,
+    duplicate_datarep = MPI_ERR_DUP_DATAREP,
+    conversion_error = MPI_ERR_CONVERSION,
+    other_io_error = MPI_ERR_IO,
+};
+
+} // namespace arb
+
+namespace std {
+template <> struct is_error_condition_enum<arb::mpi_errc>: true_type {};
+}
+
+namespace arb {
+
+class mpi_error_category_impl;
+const mpi_error_category_impl& mpi_error_category();
+
+class mpi_error_category_impl: public std::error_category {
+    const char* name() const noexcept override { return "MPI"; }
+    std::string message(int ev) const override {
+        char err[MPI_MAX_ERROR_STRING];
+        int r;
+        MPI_Error_string(ev, err, &r);
+        return err;
+    }
+    std::error_condition default_error_condition(int ev) const noexcept override {
+        int eclass;
+        MPI_Error_class(ev, &eclass);
+        return std::error_condition(eclass, mpi_error_category());
+    }
+};
+
+inline std::error_condition make_error_condition(mpi_errc ec) {
+    return std::error_condition(static_cast<int>(ec), mpi_error_category());
+}
+
+struct mpi_error: std::system_error {
+    explicit mpi_error(int mpi_err):
+        std::system_error(mpi_err, mpi_error_category()) {}
+
+    mpi_error(int mpi_err, const std::string& what_arg):
+        std::system_error(mpi_err, mpi_error_category(), what_arg) {}
+};
+
+} // namespace arb
+
diff --git a/src/communication/distributed_context.hpp b/include/arbor/distributed_context.hpp
similarity index 69%
rename from src/communication/distributed_context.hpp
rename to include/arbor/distributed_context.hpp
index ac599391070c6eac309553d73c048d0e6094fc8d..a59846b560a687fbb9902c773437e514561e5612 100644
--- a/src/communication/distributed_context.hpp
+++ b/include/arbor/distributed_context.hpp
@@ -1,16 +1,11 @@
 #pragma once
 
+#include <memory>
 #include <string>
 
-#include <spike.hpp>
-#include <communication/gathered_vector.hpp>
-#include <util/pp_util.hpp>
-
-#if defined(ARB_HAVE_MPI)
-#   include "mpi_context.hpp"
-#endif
-#include "local_context.hpp"
-
+#include <arbor/spike.hpp>
+#include <arbor/communication/gathered_vector.hpp>
+#include <arbor/util/pp_util.hpp>
 
 namespace arb {
 
@@ -34,8 +29,6 @@ namespace arb {
 
 #define ARB_COLLECTIVE_TYPES_ float, double, int, std::uint32_t, std::uint64_t
 
-// distributed_context
-//
 // Defines the concept/interface for a distributed communication context.
 //
 // Uses value-semantic type erasure to define the interface, so that
@@ -49,8 +42,8 @@ class distributed_context {
 public:
     using spike_vector = std::vector<arb::spike>;
 
-    // default constructor uses a local context
-    distributed_context(): distributed_context(local_context()) {}
+    // default constructor uses a local context: see below.
+    distributed_context();
 
     template <typename Impl>
     distributed_context(Impl&& impl):
@@ -107,19 +100,19 @@ private:
         explicit wrap(Impl&& impl): wrapped(std::move(impl)) {}
 
         gathered_vector<arb::spike>
-        gather_spikes(const spike_vector& local_spikes) const {
+        gather_spikes(const spike_vector& local_spikes) const override {
             return wrapped.gather_spikes(local_spikes);
         }
-        int id() const {
+        int id() const override {
             return wrapped.id();
         }
-        int size() const {
+        int size() const override {
             return wrapped.size();
         }
-        void barrier() const {
+        void barrier() const override {
             wrapped.barrier();
         }
-        std::string name() const {
+        std::string name() const override {
             return wrapped.name();
         }
 
@@ -135,5 +128,47 @@ private:
     std::unique_ptr<interface> impl_;
 };
 
+struct local_context {
+    gathered_vector<arb::spike>
+    gather_spikes(const std::vector<arb::spike>& local_spikes) const {
+        using count_type = typename gathered_vector<arb::spike>::count_type;
+        return gathered_vector<arb::spike>(
+            std::vector<arb::spike>(local_spikes),
+            {0u, static_cast<count_type>(local_spikes.size())}
+        );
+    }
+
+    int id() const { return 0; }
+
+    int size() const { return 1; }
+
+    template <typename T>
+    T min(T value) const { return value; }
+
+    template <typename T>
+    T max(T value) const { return value; }
+
+    template <typename T>
+    T sum(T value) const { return value; }
+
+    template <typename T>
+    std::vector<T> gather(T value, int) const { return {std::move(value)}; }
+
+    void barrier() const {}
+
+    std::string name() const { return "local"; }
+};
+
+inline distributed_context::distributed_context():
+    distributed_context(local_context())
+{}
+
+// MPI context creation functions only provided if built with MPI support.
+
+distributed_context mpi_context();
+
+template <typename MPICommType>
+distributed_context mpi_context(MPICommType);
+
 } // namespace arb
 
diff --git a/include/arbor/fvm_types.hpp b/include/arbor/fvm_types.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9a5bf56dffd875c8bbfa9604540d5f7487b3f059
--- /dev/null
+++ b/include/arbor/fvm_types.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <arbor/common_types.hpp>
+
+// Basic types shared across FVM implementations/backends.
+
+namespace arb {
+
+using fvm_value_type = double;
+using fvm_size_type = cell_local_size_type;
+using fvm_index_type = int;
+
+} // namespace arb
diff --git a/src/ion.hpp b/include/arbor/ion.hpp
similarity index 100%
rename from src/ion.hpp
rename to include/arbor/ion.hpp
diff --git a/src/mechanism.hpp b/include/arbor/mechanism.hpp
similarity index 90%
rename from src/mechanism.hpp
rename to include/arbor/mechanism.hpp
index 8b4f2fd5b935443dcf8a18f15298e35f913f12f0..ff28b43df1f2716f620bde858adeb0df571635cc 100644
--- a/src/mechanism.hpp
+++ b/include/arbor/mechanism.hpp
@@ -4,9 +4,9 @@
 #include <string>
 #include <vector>
 
-#include <backends/fvm_types.hpp>
-#include <ion.hpp>
-#include <mechinfo.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/ion.hpp>
+#include <arbor/mechinfo.hpp>
 
 namespace arb {
 
@@ -70,12 +70,12 @@ public:
     virtual ~mechanism() = default;
 
     // Per-cell group identifier for an instantiated mechanism.
-    fvm_size_type mechanism_id() const { return mechanism_id_; }
+    unsigned  mechanism_id() const { return mechanism_id_; }
 
 protected:
     // Per-cell group identifier for an instantiation of a mechanism; set by
     // concrete_mechanism<B>::instantiate()
-    fvm_size_type mechanism_id_ = -1;
+    unsigned  mechanism_id_ = -1;
 };
 
 // Backend-specific implementations provide mechanisms that are derived from `concrete_mechanism<Backend>`,
@@ -87,7 +87,7 @@ public:
     using backend = Backend;
 
     // Instantiation: allocate per-instance state; set views/pointers to shared data.
-    virtual void instantiate(fvm_size_type id, typename backend::shared_state&, const layout&) = 0;
+    virtual void instantiate(unsigned  id, typename backend::shared_state&, const layout&) = 0;
 };
 
 
diff --git a/src/mechcat.hpp b/include/arbor/mechcat.hpp
similarity index 98%
rename from src/mechcat.hpp
rename to include/arbor/mechcat.hpp
index 9e30eb98e84fd743964f76faf7a636b97e0c7d8f..981112f443496e433fc87dceff4b3d544eae930c 100644
--- a/src/mechcat.hpp
+++ b/include/arbor/mechcat.hpp
@@ -6,8 +6,8 @@
 #include <typeindex>
 #include <vector>
 
-#include <mechinfo.hpp>
-#include <mechanism.hpp>
+#include <arbor/mechinfo.hpp>
+#include <arbor/mechanism.hpp>
 
 // Mechanism catalogue maintains:
 //
diff --git a/src/mechinfo.hpp b/include/arbor/mechinfo.hpp
similarity index 97%
rename from src/mechinfo.hpp
rename to include/arbor/mechinfo.hpp
index 6e64bd88c42d09c358ba31ff27dc6a8f83030eb3..2ee9faa928b0ccaf6685a6c57bb587caa6eadc47 100644
--- a/src/mechinfo.hpp
+++ b/include/arbor/mechinfo.hpp
@@ -10,8 +10,8 @@
 #include <utility>
 #include <vector>
 
-#include <ion.hpp>
-#include <util/enumhash.hpp>
+#include <arbor/ion.hpp>
+#include <arbor/util/enumhash.hpp>
 
 namespace arb {
 
diff --git a/include/arbor/profile/clock.hpp b/include/arbor/profile/clock.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..c4e9c5c9ca8a3e32dc27ac7303f74a65557db3ef
--- /dev/null
+++ b/include/arbor/profile/clock.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+typedef unsigned long long tick_type;
+
+// Assuming POSIX monotonic clock is available; add
+// feature test if we need to fall back to generic or
+// other implementation.
+
+namespace arb {
+namespace profile {
+
+tick_type posix_clock_gettime_monotonic_ns();
+
+struct posix_clock_monotonic {
+    static constexpr double seconds_per_tick() { return 1.e-9; }
+    static unsigned long long now() {
+        return posix_clock_gettime_monotonic_ns();
+    }
+};
+
+using default_clock = posix_clock_monotonic;
+
+} // namespace profile
+} // namespace arb
diff --git a/src/profiling/meter.hpp b/include/arbor/profile/meter.hpp
similarity index 96%
rename from src/profiling/meter.hpp
rename to include/arbor/profile/meter.hpp
index f5ead8f161ae15e7b0d0120fd865d2c1909cda34..054c7d3ee25a8fd50882097c2c4b3f63de8237f1 100644
--- a/src/profiling/meter.hpp
+++ b/include/arbor/profile/meter.hpp
@@ -5,7 +5,7 @@
 #include <vector>
 
 namespace arb {
-namespace util {
+namespace profile {
 
 // A meter can be used to take a measurement of resource consumption, for
 // example wall time, memory or energy consumption.
@@ -39,5 +39,5 @@ public:
 
 using meter_ptr = std::unique_ptr<meter>;
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/src/profiling/meter_manager.hpp b/include/arbor/profile/meter_manager.hpp
similarity index 83%
rename from src/profiling/meter_manager.hpp
rename to include/arbor/profile/meter_manager.hpp
index f5d3eef2f1bb399cb99a98772a757f68fa0467f8..63e90f279ee7529bcd71b2cca1639f48aa165b4e 100644
--- a/src/profiling/meter_manager.hpp
+++ b/include/arbor/profile/meter_manager.hpp
@@ -1,16 +1,15 @@
 #pragma once
 
 #include <memory>
+#include <string>
 #include <vector>
 
-#include <communication/distributed_context.hpp>
-#include <json/json.hpp>
-
-#include "meter.hpp"
-#include "profiler.hpp"
+#include <arbor/distributed_context.hpp>
+#include <arbor/profile/meter.hpp>
+#include <arbor/profile/timer.hpp>
 
 namespace arb {
-namespace util {
+namespace profile {
 
 // A measurement has the following:
 //  * name
@@ -31,12 +30,9 @@ struct measurement {
 
 class meter_manager {
 private:
-    using timer_type = arb::threading::timer;
-    using time_point = timer_type::time_point;
-
     bool started_ = false;
 
-    time_point start_time_;
+    tick_type start_time_;
     std::vector<double> times_;
 
     std::vector<std::unique_ptr<meter>> meters_;
@@ -64,9 +60,8 @@ struct meter_report {
     std::vector<std::string> hosts;
 };
 
-nlohmann::json to_json(const meter_report&);
 meter_report make_meter_report(const meter_manager& manager);
 std::ostream& operator<<(std::ostream& o, const meter_report& report);
 
-} // namespace util
+} // namespace profile
 } // namespace arb
diff --git a/src/profiling/profiler.hpp b/include/arbor/profile/profiler.hpp
similarity index 67%
rename from src/profiling/profiler.hpp
rename to include/arbor/profile/profiler.hpp
index 220438114af531287fe2cc80ccde7948e37e9878..6d83189202b3b639afed0888effeb2a74724d991 100644
--- a/src/profiling/profiler.hpp
+++ b/include/arbor/profile/profiler.hpp
@@ -5,10 +5,10 @@
 #include <unordered_map>
 #include <vector>
 
-#include <threading/threading.hpp>
+#include <arbor/profile/timer.hpp>
 
 namespace arb {
-namespace util {
+namespace profile {
 
 // type used for region identifiers
 using region_id_type = std::size_t;
@@ -40,25 +40,6 @@ std::size_t profiler_region_id(const char* name);
 
 std::ostream& operator<<(std::ostream&, const profile&);
 
-#ifdef ARB_HAVE_PROFILING
-
-    // enter a profiling region
-    #define PE(name) \
-        { \
-            static std::size_t region_id__ = arb::util::profiler_region_id(#name); \
-            arb::util::profiler_enter(region_id__); \
-        }
-
-    // leave a profling region
-    #define PL arb::util::profiler_leave
-
-#else
-
-    #define PE(name)
-    #define PL()
-
-#endif
-
-} // namespace util
+} // namespace profile
 } // namespace arb
 
diff --git a/include/arbor/profile/timer.hpp b/include/arbor/profile/timer.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..25ff7dd6dad20fc5113ffca32698107d317ac667
--- /dev/null
+++ b/include/arbor/profile/timer.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <arbor/profile/clock.hpp>
+
+namespace arb {
+namespace profile {
+
+template <typename Clock = default_clock>
+struct timer {
+    static inline tick_type tic() {
+        return Clock::now();
+    }
+
+    static inline double toc(tick_type t) {
+        return (Clock::now()-t)*Clock::seconds_per_tick();
+    }
+};
+
+} // namespace profile
+} // namespace arb
diff --git a/src/spike.hpp b/include/arbor/spike.hpp
similarity index 95%
rename from src/spike.hpp
rename to include/arbor/spike.hpp
index 422e5aad62c3ae86cc82094fbcf88ae8d5268494..4ae56ee043946839f510bdc886bc374827ca14df 100644
--- a/src/spike.hpp
+++ b/include/arbor/spike.hpp
@@ -3,7 +3,7 @@
 #include <ostream>
 #include <type_traits>
 
-#include <common_types.hpp>
+#include <arbor/common_types.hpp>
 
 namespace arb {
 
diff --git a/include/arbor/threadinfo.hpp b/include/arbor/threadinfo.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..95de0c14f611bbe490482998c14b3a7e84f14ea5
--- /dev/null
+++ b/include/arbor/threadinfo.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <string>
+
+// Query underlying threading implementation for information.
+// (Stop-gap until we virtualize threading interface.)
+
+namespace arb {
+
+int num_threads();
+std::string thread_implementation();
+
+} // namespace arb
diff --git a/src/util/compat.hpp b/include/arbor/util/compat.hpp
similarity index 100%
rename from src/util/compat.hpp
rename to include/arbor/util/compat.hpp
diff --git a/src/util/enumhash.hpp b/include/arbor/util/enumhash.hpp
similarity index 100%
rename from src/util/enumhash.hpp
rename to include/arbor/util/enumhash.hpp
diff --git a/include/arbor/util/lexcmp_def.hpp b/include/arbor/util/lexcmp_def.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6f0760c0d26b65279dbc79b4c8838688fdd11e1
--- /dev/null
+++ b/include/arbor/util/lexcmp_def.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+/*
+ * Macro definitions for defining comparison operators for
+ * record-like types.
+ *
+ * Use:
+ *
+ * To define comparison operations for a record type xyzzy
+ * with fields foo, bar and baz:
+ *
+ * DEFINE_LEXICOGRAPHIC_ORDERING(xyzzy,(a.foo,a.bar,a.baz),(b.foo,b.bar,b.baz))
+ *
+ * The explicit use of 'a' and 'b' in the second and third parameters
+ * is needed only to save a heroic amount of preprocessor macro
+ * deep magic.
+ *
+ */
+
+#include <tuple>
+
+#define ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(proxy,op,type,a_fields,b_fields) \
+inline bool operator op(const type& a,const type& b) { return proxy a_fields op proxy b_fields; }
+
+#define ARB_DEFINE_LEXICOGRAPHIC_ORDERING(type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,<,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,>,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,<=,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,>=,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,!=,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,==,type,a_fields,b_fields)
+
+#define ARB_DEFINE_LEXICOGRAPHIC_ORDERING_BY_VALUE(type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,<,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,>,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,<=,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,>=,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,!=,type,a_fields,b_fields) \
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,==,type,a_fields,b_fields)
+
diff --git a/src/util/optional.hpp b/include/arbor/util/optional.hpp
similarity index 97%
rename from src/util/optional.hpp
rename to include/arbor/util/optional.hpp
index 6166d8e49c6b5a64dad5ff89ff1131040773b913..0822ef0beb910c8f3cdaa1f4e7480a2565f78064 100644
--- a/src/util/optional.hpp
+++ b/include/arbor/util/optional.hpp
@@ -40,8 +40,7 @@
 #include <stdexcept>
 #include <utility>
 
-#include "util/meta.hpp"
-#include "util/uninitialized.hpp"
+#include <arbor/util/uninitialized.hpp>
 
 namespace arb {
 namespace util {
@@ -78,7 +77,7 @@ namespace detail {
     struct optional_tag {};
 
     template <typename X>
-    using is_optional = std::is_base_of<optional_tag, decay_t<X>>;
+    using is_optional = std::is_base_of<optional_tag, typename std::decay<X>::type>;
 
     template <typename D, typename X>
     struct wrapped_type_impl {
@@ -92,7 +91,7 @@ namespace detail {
 
     template <typename X>
     struct wrapped_type {
-       using type = typename wrapped_type_impl<decay_t<X>, X>::type;
+        using type = typename wrapped_type_impl<typename std::decay<X>::type, X>::type;
     };
 
     template <typename X>
@@ -170,7 +169,7 @@ namespace detail {
 
     // type utilities
     template <typename T>
-    using enable_unless_optional_t = enable_if_t<!is_optional<T>::value>;
+    using enable_unless_optional_t = typename std::enable_if<!is_optional<T>::value>::type;
 
     // avoid nonnull address warnings when using operator| with e.g. char array constants
     template <typename T>
diff --git a/src/util/pp_util.hpp b/include/arbor/util/pp_util.hpp
similarity index 100%
rename from src/util/pp_util.hpp
rename to include/arbor/util/pp_util.hpp
diff --git a/src/util/uninitialized.hpp b/include/arbor/util/uninitialized.hpp
similarity index 87%
rename from src/util/uninitialized.hpp
rename to include/arbor/util/uninitialized.hpp
index a1c42d55da9cac9970105480780e6d916b4ff071..a94b3c62ec487764fc56686146b97f339e54f736 100644
--- a/src/util/uninitialized.hpp
+++ b/include/arbor/util/uninitialized.hpp
@@ -12,12 +12,19 @@
 #include <type_traits>
 #include <utility>
 
-#include "util/compat.hpp"
-#include "util/meta.hpp"
+#include <arbor/util/compat.hpp>
 
 namespace arb {
 namespace util {
 
+template <typename T>
+using enable_if_copy_constructible_t =
+    typename std::enable_if<std::is_copy_constructible<T>::value>::type;
+
+template <typename... T>
+using enable_if_constructible_t =
+    typename std::enable_if<std::is_constructible<T...>::value>::type;
+
 /*
  * Maintains storage for a value of type X, with explicit
  * construction and destruction.
@@ -73,11 +80,11 @@ public:
 
     // Apply the one-parameter functor F to the value by reference.
     template <typename F>
-    result_of_t<F(reference)> apply(F&& f) { return f(ref()); }
+    typename std::result_of<F(reference)>::type apply(F&& f) { return f(ref()); }
 
     // Apply the one-parameter functor F to the value by const reference.
     template <typename F>
-    result_of_t<F(const_reference)> apply(F&& f) const { return f(cref()); }
+    typename std::result_of<F(const_reference)>::type apply(F&& f) const { return f(cref()); }
 };
 
 /*
diff --git a/include/git-source-id b/include/git-source-id
new file mode 100755
index 0000000000000000000000000000000000000000..dcfe9b710d899c81683736deb99bbf5173792305
--- /dev/null
+++ b/include/git-source-id
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+# arguments: version-string [feature...]
+
+version="$1"
+shift
+
+if gitlog=$(git log -1 --pretty=format:'%ci %H' 2>/dev/null); then
+    git diff --quiet HEAD 2>/dev/null || gitlog="${gitlog} modified"
+else
+    gitlog='unknown commit'
+fi
+
+cat << __end__
+#pragma once
+
+#define ARB_VERSION "${version}"
+#define ARB_SOURCE_ID "${gitlog}"
+
+namespace arb {
+extern const char version[];
+extern const char source_id[];
+}
+__end__
+
+for feature in "$@"; do
+    echo "#define ARB_${feature}_ENABLED"
+done
+
diff --git a/lmorpho/CMakeLists.txt b/lmorpho/CMakeLists.txt
index 67271afaf156482e1d7685efbaf61c898a416ede..0f338b9ad2e90694de7bbdcb638fec20de9f864b 100644
--- a/lmorpho/CMakeLists.txt
+++ b/lmorpho/CMakeLists.txt
@@ -1,5 +1,8 @@
 add_executable(lmorpho lmorpho.cpp lsystem.cpp lsys_models.cpp morphio.cpp)
 
-target_link_libraries(lmorpho LINK_PUBLIC arbor)
-target_link_libraries(lmorpho LINK_PUBLIC ${EXTERNAL_LIBRARIES})
+target_link_libraries(lmorpho PRIVATE arbor arbor-aux)
 
+# TODO: resolve public headers
+target_link_libraries(lmorpho PRIVATE arbor-private-headers)
+
+install(TARGETS lmorpho RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/lmorpho/lmorpho.cpp b/lmorpho/lmorpho.cpp
index efe7615175257ac68fdb509fdf63b5002a43ac8e..960981a1b72435fe2d546b7bf96729c10efc8106 100644
--- a/lmorpho/lmorpho.cpp
+++ b/lmorpho/lmorpho.cpp
@@ -6,14 +6,13 @@
 #include <vector>
 
 #include <tinyopt.hpp>
-#include <util/optional.hpp>
+#include <arbor/util/optional.hpp>
 
 #include "morphology.hpp"
 #include "morphio.hpp"
 #include "lsystem.hpp"
 #include "lsys_models.hpp"
 
-namespace to = arb::to;
 using arb::util::optional;
 using arb::util::nullopt;
 using arb::util::just;
diff --git a/mechanisms/BuildModules.cmake b/mechanisms/BuildModules.cmake
index d047e8128da1bcaf113bdeb2e7bed575b75fdf9e..a10850f66eed3fbe72afc797942b52d53d459add 100644
--- a/mechanisms/BuildModules.cmake
+++ b/mechanisms/BuildModules.cmake
@@ -1,10 +1,11 @@
 include(CMakeParseArguments)
 
-# Uses CMake variables modcc and use_external_modcc as set in top level CMakeLists.txt
+# If a MODCC executable is explicitly provided, don't make the in-tree modcc a dependency.
 
 function(build_modules)
-    cmake_parse_arguments(build_modules "" "TARGET;SOURCE_DIR;DEST_DIR;MECH_SUFFIX" "MODCC_FLAGS;GENERATES" ${ARGN})
+    cmake_parse_arguments(build_modules "" "MODCC;TARGET;SOURCE_DIR;DEST_DIR;MECH_SUFFIX" "MODCC_FLAGS;GENERATES" ${ARGN})
 
+    set(all_generated)
     foreach(mech ${build_modules_UNPARSED_ARGUMENTS})
         set(mod "${build_modules_SOURCE_DIR}/${mech}.mod")
         set(out "${build_modules_DEST_DIR}/${mech}")
@@ -14,8 +15,11 @@ function(build_modules)
         endforeach()
 
         set(depends "${mod}")
-        if(NOT use_external_modcc)
+        if(build_modules_MODCC)
+            set(modcc_bin ${build_modules_MODCC})
+        else()
             list(APPEND depends modcc)
+            set(modcc_bin $<TARGET_FILE:modcc>)
         endif()
 
         set(flags ${build_modules_MODCC_FLAGS} -o "${out}")
@@ -27,17 +31,17 @@ function(build_modules)
             OUTPUT ${generated}
             DEPENDS ${depends}
             WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-            COMMAND ${modcc} ${flags} ${mod}
+            COMMAND ${modcc_bin} ${flags} ${mod}
             COMMENT "modcc generating: ${generated}"
         )
-        set_source_files_properties(${generated}  PROPERTIES GENERATED TRUE)
-        list(APPEND all_mod_hpps ${generated})
+        set_source_files_properties(${generated} PROPERTIES GENERATED TRUE)
+        list(APPEND all_generated ${generated})
     endforeach()
 
     # Fake target to always trigger .mod -> .hpp/.cu dependencies because CMake
     if (build_modules_TARGET)
-        set(depends ${all_mod_hpps})
-        if(NOT use_external_modcc)
+        set(depends ${all_generated})
+        if(NOT build_modules_MODCC)
             list(APPEND depends modcc)
         endif()
         add_custom_target(${build_modules_TARGET} DEPENDS ${depends})
diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt
index b08342c633e9d8e9c642c27a6f63f7505742c190..3932382e4efd1336b3767affeef08338f681d613 100644
--- a/mechanisms/CMakeLists.txt
+++ b/mechanisms/CMakeLists.txt
@@ -1,32 +1,36 @@
 include(BuildModules.cmake)
 
-# the list of built-in mechanisms to be provided by default
+# The list of library-provided mechanisms used to populate the default catalogue:
 set(mechanisms pas hh expsyn exp2syn test_kin1 test_kinlva test_ca nax kdrmt kamt)
 
 set(mod_srcdir "${CMAKE_CURRENT_SOURCE_DIR}/mod")
 
-# Generate mechanism implementations for host/cpu environment
+# Generate mechanism implementations.
 
 set(mech_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
 file(MAKE_DIRECTORY "${mech_dir}")
 
+set(modcc_flags)
 if(ARB_VECTORIZE)
-    set(modcc_simd "-s")
+    list(APPEND modcc_flags "--simd")
+endif()
+if(ARB_WITH_PROFILING)
+    list(APPEND modcc_flags "--profile")
 endif()
 
 build_modules(
     ${mechanisms}
     SOURCE_DIR "${mod_srcdir}"
     DEST_DIR "${mech_dir}"
-    MODCC_FLAGS -t cpu -t gpu ${modcc_simd}
+    MODCC_FLAGS -t cpu -t gpu ${modcc_flags}
     GENERATES .hpp _cpu.cpp _gpu.cpp _gpu.cu
     TARGET build_all_mods
 )
 
-# Generate source for default mechanism catalogue
+# Generate source for default mechanism catalogue.
 
 set(catsrc ${CMAKE_CURRENT_BINARY_DIR}/default_catalogue.cpp)
-set(default_catalogue_options -I ${mech_dir} -o ${catsrc} -B multicore)
+set(default_catalogue_options -A arbor -I ${mech_dir} -o ${catsrc} -B multicore)
 if(ARB_WITH_CUDA)
     list(APPEND default_catalogue_options -B gpu)
 endif()
@@ -34,42 +38,21 @@ endif()
 add_custom_command(
     OUTPUT ${catsrc}
     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/generate_default_catalogue ${default_catalogue_options} ${mechanisms}
-    DEPENDS build_all_mods generate_default_catalogue
+    DEPENDS generate_default_catalogue
 )
 
+add_custom_target(default_catalogue_cpp_target DEPENDS ${catsrc})
+add_dependencies(build_all_mods default_catalogue_cpp_target)
+
 # Make libraries with the implementations of the mechanism kernels.
 
+set(mech_sources ${catsrc})
 foreach(mech ${mechanisms})
-    list(APPEND cpu_mech_sources ${mech_dir}/${mech}_cpu.cpp)
-endforeach()
-
-add_library(arbormech ${cpu_mech_sources} ${catsrc})
-target_compile_options(arbormech PRIVATE ${CXXOPT_ARCH})
-set(mech_libs arbormech)
-
-if (ARB_AUTO_RUN_MODCC_ON_CHANGES)
-    add_dependencies(arbormech build_all_mods)
-endif()
-
-if(ARB_WITH_CUDA)
-    foreach(mech ${mechanisms})
-        list(APPEND cuda_mech_sources "${mech_dir}/${mech}_gpu.cpp" "${mech_dir}/${mech}_gpu.cu")
-    endforeach()
-
-    cuda_add_library(arbormechcu ${cuda_mech_sources})
-    set(mech_libs arbormech arbormechcu)
-
-    # force recompilation on changes to modcc or the underlying .mod files
-    if (ARB_AUTO_RUN_MODCC_ON_CHANGES)
-        add_dependencies(arbormechcu build_all_mods)
+    list(APPEND mech_sources ${mech_dir}/${mech}_cpu.cpp)
+    if(ARB_WITH_CUDA)
+        list(APPEND mech_sources ${mech_dir}/${mech}_gpu.cpp)
+        list(APPEND mech_sources ${mech_dir}/${mech}_gpu.cu)
     endif()
+endforeach()
 
-endif()
-
-# Until we merge our myriad static libraries, we prepend mech libs and also append mech libs
-# to capture generated default catalogue code interdependencies with arbor lib.
-
-list(INSERT ARB_LIBRARIES 0 ${mech_libs})
-list(APPEND ARB_LIBRARIES ${mech_libs})
-
-set(ARB_LIBRARIES "${ARB_LIBRARIES}" PARENT_SCOPE)
+set(arbor_mechanism_sources ${mech_sources} PARENT_SCOPE)
diff --git a/mechanisms/generate_default_catalogue b/mechanisms/generate_default_catalogue
index 1f7cba510a3bd23eb939aae7e7541dfe1e1fc7cc..31bc974d0792574b74fd467839361b575b0b375c 100755
--- a/mechanisms/generate_default_catalogue
+++ b/mechanisms/generate_default_catalogue
@@ -106,13 +106,17 @@ const mechanism_catalogue& global_default_catalogue() {
     def indent(n, lines):
         return '{{:<{0!s}}}'.format(n+1).format('\n').join(lines)
 
+    # TODO: use the commented include list below when private/public
+    # headers are resolved.
+
     return src.safe_substitute(dict(
         cmdline = " ".join(sys.argv),
         arbpfx = arbpfx,
         backend_includes = indent(0,
-            ['#include <{}backends/{}/fvm.hpp>'.format(arbpfx, b) for b in backends]),
+            # ['#include <{}backends/{}/fvm.hpp>'.format(arbpfx, b) for b in backends]),
+            ['#include "backends/{}/fvm.hpp"'.format(b) for b in backends]),
         module_includes = indent(0,
-            ['#include <{}{}.hpp>'.format(modpfx, m) for m in modules]),
+            ['#include "{}{}.hpp"'.format(modpfx, m) for m in modules]),
         add_modules = indent(4,
             ['cat.add("{0}", mechanism_{0}_info());'.format(m) for m in modules]),
         register_modules = indent(4,
diff --git a/modcc/CMakeLists.txt b/modcc/CMakeLists.txt
index 360c66cdc55df523c7b6db0bb3d92fd670aa6179..85b55cf9a114d67053176b491fce18aa8c37997a 100644
--- a/modcc/CMakeLists.txt
+++ b/modcc/CMakeLists.txt
@@ -1,4 +1,8 @@
-set(MODCC_SOURCES
+# The modcc compiler is split into a staric library and driver;
+# unit tests for the driver also use this library.
+
+set(libmodcc_sources
+
     astmanip.cpp
     errorvisitor.cpp
     expression.cpp
@@ -12,9 +16,7 @@ set(MODCC_SOURCES
     symdiff.cpp
     symge.cpp
     token.cpp
-
     io/prefixbuf.cpp
-
     printer/cexpr_emit.cpp
     printer/cprinter.cpp
     printer/cudaprinter.cpp
@@ -22,15 +24,16 @@ set(MODCC_SOURCES
     printer/printerutil.cpp
 )
 
-include_directories("${PROJECT_SOURCE_DIR}/modcc")
+set(modcc_sources modcc.cpp)
 
-add_library(compiler ${MODCC_SOURCES})
+add_library(libmodcc STATIC ${libmodcc_sources})
+target_include_directories(libmodcc PUBLIC .)
+set_target_properties(libmodcc PROPERTIES OUTPUT_NAME modcc)
 
-add_executable(modcc modcc.cpp)
+add_executable(modcc ${modcc_sources})
+target_link_libraries(modcc libmodcc ext-tclap)
 
-target_link_libraries(modcc LINK_PUBLIC compiler)
+set_target_properties(modcc libmodcc PROPERTIES EXCLUDE_FROM_ALL ${ARB_WITH_EXTERNAL_MODCC})
+
+install(TARGETS modcc RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
-set_target_properties(modcc
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/modcc"
-)
diff --git a/modcc/modcc.cpp b/modcc/modcc.cpp
index 16d19d8583ccb5ffe742e33f37ddd4183cdff495..b3789bd6f2a8bf93e1266206eeafd1129348cdf0 100644
--- a/modcc/modcc.cpp
+++ b/modcc/modcc.cpp
@@ -9,6 +9,7 @@
 #include "printer/cprinter.hpp"
 #include "printer/cudaprinter.hpp"
 #include "printer/infoprinter.hpp"
+#include "printer/printeropt.hpp"
 #include "printer/simd.hpp"
 
 #include "modccutil.hpp"
@@ -67,7 +68,6 @@ struct Options {
     std::string modulename;
     bool verbose = true;
     bool analysis = false;
-    simd_spec simd = simd_spec::none;
     std::unordered_set<targetKind, enum_hash> targets;
 };
 
@@ -89,7 +89,6 @@ std::ostream& operator<<(std::ostream& out, simd_spec simd) {
 std::ostream& operator<<(std::ostream& out, const Options& opt) {
     static const char* noyes[2] = {"no", "yes"};
     static const std::string line_end = cyan(" |") + "\n";
-    static const std::string tableline = cyan("."+std::string(60, '-')+".")+"\n";
 
     std::string targets;
     for (targetKind t: opt.targets) {
@@ -97,14 +96,21 @@ std::ostream& operator<<(std::ostream& out, const Options& opt) {
     }
 
     return out <<
-        tableline <<
         table_prefix{"file"} << opt.modfile << line_end <<
         table_prefix{"output"} << (opt.outprefix.empty()? "-": opt.outprefix) << line_end <<
         table_prefix{"verbose"} << noyes[opt.verbose] << line_end <<
         table_prefix{"targets"} << targets << line_end <<
-        table_prefix{"simd"} << opt.simd << line_end <<
-        table_prefix{"analysis"} << noyes[opt.analysis] << line_end <<
-        tableline;
+        table_prefix{"analysis"} << noyes[opt.analysis] << line_end;
+}
+
+std::ostream& operator<<(std::ostream& out, const printer_options& popt) {
+    static const char* noyes[2] = {"no", "yes"};
+    static const std::string line_end = cyan(" |") + "\n";
+
+    return out <<
+        table_prefix{"namespace"} << popt.cpp_namespace << line_end <<
+        table_prefix{"profile"} << noyes[popt.profile] << line_end <<
+        table_prefix{"simd"} << popt.simd << line_end;
 }
 
 // Constraints for TCLAP arguments that are names for enumertion values.
@@ -155,6 +161,7 @@ struct SimdAbiConstraint: public TCLAP::Constraint<std::string> {
 
 int main(int argc, char **argv) {
     Options opt;
+    printer_options popt;
 
     try {
         TCLAP::CmdLine cmd("modcc code generator for arbor", ' ', "0.1");
@@ -165,6 +172,9 @@ int main(int argc, char **argv) {
         TCLAP::ValueArg<std::string>
             fout_arg("o", "output", "prefix for output file names", false, "", "filename", cmd);
 
+        TCLAP::ValueArg<std::string>
+            namespace_arg("N", "namespace", "namespace for generated code", false, "arb", "name", cmd);
+
         MapConstraint targets_arg_constraint(targetKindMap);
         TCLAP::MultiArg<std::string>
             target_arg("t", "target", "build module for cpu or gpu back-end", false, &targets_arg_constraint, cmd);
@@ -176,6 +186,8 @@ int main(int argc, char **argv) {
         TCLAP::ValueArg<std::string>
             simd_abi_arg("S", "simd-abi", "override SIMD ABI in generated code. Use /n suffix to force SIMD width to be size n. Examples: 'avx2', 'native/4', ...", false, "", &simd_abi_constraint, cmd);
 
+        TCLAP::SwitchArg profile_arg("P","profile","build with profiled kernels", cmd, false);
+
         TCLAP::SwitchArg verbose_arg("V","verbose","toggle verbose mode", cmd, false);
 
         TCLAP::SwitchArg analysis_arg("A","analyse","toggle analysis mode", cmd, false);
@@ -191,10 +203,13 @@ int main(int argc, char **argv) {
         opt.verbose = verbose_arg.getValue();
         opt.analysis = analysis_arg.getValue();
 
+        popt.cpp_namespace = namespace_arg.getValue();
+        popt.profile = profile_arg.getValue();
+
         if (simd_arg.getValue()) {
-            opt.simd = simd_spec(simd_spec::native);
+            popt.simd = simd_spec(simd_spec::native);
             if (!simd_abi_arg.getValue().empty()) {
-                opt.simd = parse_simd_spec(simd_abi_arg.getValue());
+                popt.simd = parse_simd_spec(simd_abi_arg.getValue());
             }
         }
 
@@ -214,7 +229,11 @@ int main(int argc, char **argv) {
         };
 
         if (opt.verbose) {
+            static const std::string tableline = cyan("."+std::string(60, '-')+".")+"\n";
+            cout << tableline;
             cout << opt;
+            cout << popt;
+            cout << tableline;
         }
 
         // Load module file and initialize Module object.
@@ -254,17 +273,16 @@ int main(int argc, char **argv) {
         // If no output prefix given, use the module name.
         std::string prefix = opt.outprefix.empty()? m.module_name(): opt.outprefix;
 
-        io::write_all(build_info_header(m, "arb"), prefix+".hpp");
-
+        io::write_all(build_info_header(m, popt), prefix+".hpp");
         for (targetKind target: opt.targets) {
             std::string outfile = prefix;
             switch (target) {
             case targetKind::gpu:
-                io::write_all(emit_cuda_cpp_source(m, "arb"), outfile+"_gpu.cpp");
-                io::write_all(emit_cuda_cu_source(m, "arb"), outfile+"_gpu.cu");
+                io::write_all(emit_cuda_cpp_source(m, popt), outfile+"_gpu.cpp");
+                io::write_all(emit_cuda_cu_source(m, popt), outfile+"_gpu.cu");
                 break;
             case targetKind::cpu:
-                io::write_all(emit_cpp_source(m, "arb", opt.simd), outfile+"_cpu.cpp");
+                io::write_all(emit_cpp_source(m, popt), outfile+"_cpu.cpp");
                 break;
             }
         }
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index e7b478b6dba5cbc18848551bba97047b9d48d8b7..d9ba01ac643ccb58c52166dd0d523bf479128268 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -1,5 +1,6 @@
 #include <cmath>
 #include <iostream>
+#include <regex>
 #include <string>
 #include <unordered_set>
 
@@ -8,6 +9,7 @@
 #include "io/prefixbuf.hpp"
 #include "printer/cexpr_emit.hpp"
 #include "printer/cprinter.hpp"
+#include "printer/printeropt.hpp"
 #include "printer/printerutil.hpp"
 
 using io::indent;
@@ -81,10 +83,10 @@ static std::string ion_state_index(std::string ion_name) {
     return "ion_"+ion_name+"_index_";
 }
 
-std::string emit_cpp_source(const Module& module_, const std::string& ns, simd_spec simd) {
+std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     std::string name = module_.module_name();
     std::string class_name = "mechanism_cpu_"+name;
-    auto ns_components = namespace_components(ns);
+    auto ns_components = namespace_components(opt.cpp_namespace);
 
     NetReceiveExpression* net_receive = find_net_receive(module_);
     APIMethod* init_api = find_api_method(module_, "nrn_init");
@@ -92,7 +94,7 @@ std::string emit_cpp_source(const Module& module_, const std::string& ns, simd_s
     APIMethod* current_api = find_api_method(module_, "nrn_current");
     APIMethod* write_ions_api = find_api_method(module_, "write_ions");
 
-    bool with_simd = simd.abi!=simd_spec::none;
+    bool with_simd = opt.simd.abi!=simd_spec::none;
 
     // init_api, state_api, current_api methods are mandatory:
 
@@ -104,20 +106,42 @@ std::string emit_cpp_source(const Module& module_, const std::string& ns, simd_s
     auto ion_deps = module_.ion_deps();
     std::string fingerprint = "<placeholder>";
 
+    auto profiler_enter = [name, opt](const char* region_prefix) -> std::string {
+        static std::regex invalid_profile_chars("[^a-zA-Z0-9]");
+
+        if (opt.profile) {
+            std::string region_name = region_prefix;
+            region_name += '_';
+            region_name += std::regex_replace(name, invalid_profile_chars, "");
+
+            return
+                "{\n"
+                "    static auto id = ::arb::profile::profiler_region_id(\""
+                + region_name + "\");\n"
+                "    ::arb::profile::profiler_enter(id);\n"
+                "}\n";
+        }
+        else return "";
+    };
+
+    auto profiler_leave = [opt]() -> std::string {
+        return opt.profile? "::arb::profile::profiler_leave();\n": "";
+    };
+
     io::pfxstringstream out;
 
     out <<
         "#include <cmath>\n"
         "#include <cstddef>\n"
         "#include <memory>\n"
-        "#include <" << arb_header_prefix() << "backends/multicore/mechanism.hpp>\n"
-        "#include <" << arb_header_prefix() << "math.hpp>\n";
+        "#include <" << arb_private_header_prefix() << "backends/multicore/mechanism.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "math.hpp>\n";
 
-    with_profiling() &&
-        out << "#include <" << arb_header_prefix() << "profiling/profiler.hpp>\n";
+    opt.profile &&
+        out << "#include <" << arb_header_prefix() << "profile/profiler.hpp>\n";
 
     if (with_simd) {
-        out << "#include <" << arb_header_prefix() << "simd/simd.hpp>\n";
+        out << "#include <" << arb_private_header_prefix() << "simd/simd.hpp>\n";
     }
 
     out <<
@@ -145,15 +169,15 @@ std::string emit_cpp_source(const Module& module_, const std::string& ns, simd_s
             "using S::index_constraint;\n"
             "static constexpr unsigned simd_width_ = ";
 
-        if (!simd.width) {
+        if (!opt.simd.width) {
             out << "S::simd_abi::native_width<fvm_value_type>::value;\n";
         }
         else {
-            out << simd.width << ";\n";
+            out << opt.simd.width << ";\n";
         }
 
         std::string abi = "S::simd_abi::";
-        switch (simd.abi) {
+        switch (opt.simd.abi) {
         case simd_spec::avx:    abi += "avx";    break;
         case simd_spec::avx2:   abi += "avx2";   break;
         case simd_spec::avx512: abi += "avx512"; break;
@@ -316,15 +340,15 @@ std::string emit_cpp_source(const Module& module_, const std::string& ns, simd_s
     out << popindent << "}\n\n";
 
     out << "void " << class_name << "::nrn_state() {\n" << indent;
-    with_profiling() && out << "PE(advance_integrate_state_" << name << ");\n";
+    out << profiler_enter("advance_integrate_state");
     emit_body(state_api);
-    with_profiling() && out <<  "PL();\n";
+    out << profiler_leave();
     out << popindent << "}\n\n";
 
     out << "void " << class_name << "::nrn_current() {\n" << indent;
-    with_profiling() && out << "PE(advance_integrate_current_" << name << ");\n";
+    out << profiler_enter("advance_integrate_current");
     emit_body(current_api);
-    with_profiling() && out <<  "PL();\n";
+    out << profiler_leave();
     out << popindent << "}\n\n";
 
     out << "void " << class_name << "::write_ions() {\n" << indent;
diff --git a/modcc/printer/cprinter.hpp b/modcc/printer/cprinter.hpp
index e7e96ab81196624192fc8e3f4cf574dac3156d7f..e7712b87c9644551e95a9242aacde41f410edfe5 100644
--- a/modcc/printer/cprinter.hpp
+++ b/modcc/printer/cprinter.hpp
@@ -7,9 +7,9 @@
 #include "visitor.hpp"
 
 #include "printer/cexpr_emit.hpp"
-#include "printer/simd.hpp"
+#include "printer/printeropt.hpp"
 
-std::string emit_cpp_source(const Module& m, const std::string& ns, simd_spec simd);
+std::string emit_cpp_source(const Module& m, const printer_options& opt);
 
 // CPrinter and SimdPrinter visitors exposed in header for testing purposes only.
 
diff --git a/modcc/printer/cudaprinter.cpp b/modcc/printer/cudaprinter.cpp
index 583a8501692b4d6a79d3dc657704e7581c706504..86849396a4bc524919e226e2f2f518bf0f0692bb 100644
--- a/modcc/printer/cudaprinter.cpp
+++ b/modcc/printer/cudaprinter.cpp
@@ -48,11 +48,11 @@ static std::string ion_state_index(std::string ion_name) {
     return "ion_"+ion_name+"_index_";
 }
 
-std::string emit_cuda_cpp_source(const Module& module_, const std::string& ns) {
+std::string emit_cuda_cpp_source(const Module& module_, const printer_options& opt) {
     std::string name = module_.module_name();
     std::string class_name = make_class_name(name);
     std::string ppack_name = make_ppack_name(name);
-    auto ns_components = namespace_components(ns);
+    auto ns_components = namespace_components(opt.cpp_namespace);
 
     NetReceiveExpression* net_receive = find_net_receive(module_);
 
@@ -64,12 +64,12 @@ std::string emit_cuda_cpp_source(const Module& module_, const std::string& ns) {
     io::pfxstringstream out;
 
     net_receive && out <<
-        "#include <" << arb_header_prefix() << "backends/event.hpp>\n"
-        "#include <" << arb_header_prefix() << "backends/multi_event_stream_state.hpp>\n";
+        "#include <" << arb_private_header_prefix() << "backends/event.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "backends/multi_event_stream_state.hpp>\n";
 
     out <<
-        "#include <" << arb_header_prefix() << "backends/gpu/mechanism.hpp>\n"
-        "#include <" << arb_header_prefix() << "backends/gpu/mechanism_ppack_base.hpp>\n";
+        "#include <" << arb_private_header_prefix() << "backends/gpu/mechanism.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "backends/gpu/mechanism_ppack_base.hpp>\n";
 
     out << "\n" << namespace_declaration_open(ns_components) << "\n";
 
@@ -194,11 +194,11 @@ std::string emit_cuda_cpp_source(const Module& module_, const std::string& ns) {
     return out.str();
 }
 
-std::string emit_cuda_cu_source(const Module& module_, const std::string& ns) {
+std::string emit_cuda_cu_source(const Module& module_, const printer_options& opt) {
     std::string name = module_.module_name();
     std::string class_name = make_class_name(name);
     std::string ppack_name = make_ppack_name(name);
-    auto ns_components = namespace_components(ns);
+    auto ns_components = namespace_components(opt.cpp_namespace);
     const bool is_point_proc = module_.kind() == moduleKind::point;
 
     NetReceiveExpression* net_receive = find_net_receive(module_);
@@ -215,14 +215,14 @@ std::string emit_cuda_cu_source(const Module& module_, const std::string& ns) {
 
     out <<
         "#include <iostream>\n"
-        "#include <" << arb_header_prefix() << "backends/event.hpp>\n"
-        "#include <" << arb_header_prefix() << "backends/multi_event_stream_state.hpp>\n"
-        "#include <" << arb_header_prefix() << "backends/gpu/cuda_common.hpp>\n"
-        "#include <" << arb_header_prefix() << "backends/gpu/math.hpp>\n"
-        "#include <" << arb_header_prefix() << "backends/gpu/mechanism_ppack_base.hpp>\n";
+        "#include <" << arb_private_header_prefix() << "backends/event.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "backends/multi_event_stream_state.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "backends/gpu/cuda_common.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "backends/gpu/math_cu.hpp>\n"
+        "#include <" << arb_private_header_prefix() << "backends/gpu/mechanism_ppack_base.hpp>\n";
 
     is_point_proc && out <<
-        "#include <" << arb_header_prefix() << "backends/gpu/reduce_by_key.hpp>\n";
+        "#include <" << arb_private_header_prefix() << "backends/gpu/reduce_by_key.hpp>\n";
 
     out << "\n" << namespace_declaration_open(ns_components) << "\n";
 
diff --git a/modcc/printer/cudaprinter.hpp b/modcc/printer/cudaprinter.hpp
index a4ec7df462875d65fdc71050a18b89f64e8bebe4..2b41f38fa6c1d9851f89a89d6e1090278f8aa390 100644
--- a/modcc/printer/cudaprinter.hpp
+++ b/modcc/printer/cudaprinter.hpp
@@ -6,8 +6,8 @@
 #include "module.hpp"
 #include "cexpr_emit.hpp"
 
-std::string emit_cuda_cpp_source(const Module& m, const std::string& ns);
-std::string emit_cuda_cu_source(const Module& m, const std::string& ns);
+std::string emit_cuda_cpp_source(const Module& m, const printer_options& opt);
+std::string emit_cuda_cu_source(const Module& m, const printer_options& opt);
 
 class CudaPrinter: public CPrinter {
 public:
diff --git a/modcc/printer/infoprinter.cpp b/modcc/printer/infoprinter.cpp
index afeee15953f690424de1c53784c2b62e0cfa9b54..cd9f413c4c3044a4d8d5ede2b89873b5f49b428e 100644
--- a/modcc/printer/infoprinter.cpp
+++ b/modcc/printer/infoprinter.cpp
@@ -49,16 +49,13 @@ std::ostream& operator<<(std::ostream& out, const ion_dep_info& wrap) {
         << boolalpha[ion.writes_concentration_ext()] << "}}";
 }
 
-std::string build_info_header(const Module& m, const std::string& qual_namespace) {
+std::string build_info_header(const Module& m, const printer_options& opt) {
     using io::indent;
     using io::popindent;
 
-    // TODO: When arbor headers are moved into a named hierarchy, change this prefix.
-    const char* arb_header_prefix = "";
-
     std::string name = m.module_name();
     auto ids = public_variable_ids(m);
-    auto ns_components = namespace_components(qual_namespace);
+    auto ns_components = namespace_components(opt.cpp_namespace);
 
     io::pfxstringstream out;
 
@@ -66,8 +63,8 @@ std::string build_info_header(const Module& m, const std::string& qual_namespace
         "#pragma once\n"
         "#include <memory>\n"
         "\n"
-        "#include <" << arb_header_prefix << "mechanism.hpp>\n"
-        "#include <" << arb_header_prefix << "mechinfo.hpp>\n"
+        "#include <" << arb_header_prefix() << "mechanism.hpp>\n"
+        "#include <" << arb_header_prefix() << "mechinfo.hpp>\n"
         "\n"
         << namespace_declaration_open(ns_components) <<
         "\n"
diff --git a/modcc/printer/infoprinter.hpp b/modcc/printer/infoprinter.hpp
index 24ee11fcf544c5a97492def5b91f32d0af53a2c1..8f6e4e48eb1257da0c9686ceb5988caea6e1284c 100644
--- a/modcc/printer/infoprinter.hpp
+++ b/modcc/printer/infoprinter.hpp
@@ -3,9 +3,10 @@
 #include <string>
 
 #include "module.hpp"
+#include "printer/printeropt.hpp"
 
 // Build header file comprising mechanism metadata
 // and declarations of backend-specific mechanism implementations.
 
-std::string build_info_header(const Module& m, const std::string& qual_namespace);
+std::string build_info_header(const Module& m, const printer_options& opt);
 
diff --git a/modcc/printer/printeropt.hpp b/modcc/printer/printeropt.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c4b9a32b30c54f288ece59d5bd07f750af5f38c
--- /dev/null
+++ b/modcc/printer/printeropt.hpp
@@ -0,0 +1,20 @@
+#pragma once
+
+// Flags controlling printer behaviour and code generation.
+// (Not all flags need be recognized by all printers.)
+
+#include <string>
+
+#include "simd.hpp"
+struct printer_options {
+    // C++ namespace for generated code.
+    std::string cpp_namespace;
+
+    // Explicit vectorization (C printer only)? Default is none.
+    simd_spec simd;
+
+    // Instrument kernels? True => use ::arb::profile regions.
+    // Currently only supported for C printer.
+
+    bool profile = false;
+};
diff --git a/modcc/printer/printerutil.hpp b/modcc/printer/printerutil.hpp
index ccc0c593924b6d72917bf653f9cdb283d9ca214c..6881d2cd4a2da21764caa0065dbcfb29333e0ea7 100644
--- a/modcc/printer/printerutil.hpp
+++ b/modcc/printer/printerutil.hpp
@@ -14,6 +14,14 @@
 std::vector<std::string> namespace_components(const std::string& qualified_namespace);
 
 inline const char* arb_header_prefix() {
+    static const char* prefix = "arbor/";
+    return prefix;
+}
+
+// TODO: this function will be obsoleted once arbor private/public headers are
+// properly split.
+
+inline const char* arb_private_header_prefix() {
     static const char* prefix = "";
     return prefix;
 }
diff --git a/scripts/travis/build.sh b/scripts/travis/build.sh
index 9175044c3bb89c0e7ca16063c7ef88a665ab2cb9..3a6e3dbcdb84ad481dc84d53a3043a7c52e6b4b9 100755
--- a/scripts/travis/build.sh
+++ b/scripts/travis/build.sh
@@ -28,9 +28,11 @@ if [[ "${WITH_DISTRIBUTED}" = "mpi" ]]; then
     CC="mpicc"
     CXX="mpicxx"
     launch="mpiexec -n 4"
+    WITH_MPI="ON"
 else
     echo "mpi        : off"
     launch=""
+    WITH_MPI="OFF"
 fi
 
 #
@@ -44,22 +46,28 @@ cd $build_path
 #
 progress "Configuring with cmake"
 
-cmake_flags="-DARB_WITH_ASSERTIONS=on -DARB_THREADING_MODEL=${WITH_THREAD} -DARB_DISTRIBUTED_MODEL=${WITH_DISTRIBUTED} ${CXX_FLAGS}"
+cmake_flags="-DARB_WITH_ASSERTIONS=on -DARB_THREADING_MODEL=${WITH_THREAD} -DARB_WITH_MPI=${WITH_MPI} ${CXX_FLAGS}"
 echo "cmake flags: ${cmake_flags}"
 cmake .. ${cmake_flags} || error "unable to configure cmake"
 
 export NMC_NUM_THREADS=2
 
 progress "Unit tests"
-make test.exe -j4  || error "building unit tests"
-./tests/test.exe --gtest_color=no || error "running unit tests"
+make unit -j4                || error "building unit tests"
+./bin/unit --gtest_color=no  || error "running unit tests"
 
-progress "Global communication tests"
-make global_communication.exe -j4          || error "building global communication tests"
-${launch} ./tests/global_communication.exe || error "running global communication tests"
+progress "Distributed unit tests (local)"
+make unit-local -j4          || error "building local distributed unit tests"
+./bin/unit-local             || error "running local distributed unit tests"
+
+if [[ "${WITH_DISTRIBUTED}" = "mpi" ]]; then
+    progress "Distributed unit tests (MPI)"
+    make unit-mpi -j4        || error "building MPI distributed unit tests"
+    ${launch} ./bin/unit-mpi || error "running MPI distributed unit tests"
+fi
 
 progress "Miniapp spike comparison test"
-make miniapp.exe -j4                         || error "building miniapp"
-${launch} ./example/miniapp.exe -n 20 -t 100 || error "running miniapp"
+make miniapp -j4                     || error "building miniapp"
+${launch} ./bin/miniapp -n 20 -t 100 || error "running miniapp"
 
 cd $base_path
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
deleted file mode 100644
index aad5e5ed2ae02b6315c33fd7b9fd045b41312869..0000000000000000000000000000000000000000
--- a/src/CMakeLists.txt
+++ /dev/null
@@ -1,87 +0,0 @@
-set(arbor_cxx_sources
-    backends/multicore/mechanism.cpp
-    backends/multicore/shared_state.cpp
-    backends/multicore/stimulus.cpp
-    benchmark_cell_group.cpp
-    builtin_mechanisms.cpp
-    cell_group_factory.cpp
-    common_types_io.cpp
-    cell.cpp
-    event_binner.cpp
-    fvm_layout.cpp
-    fvm_lowered_cell_impl.cpp
-    hardware/affinity.cpp
-    hardware/gpu.cpp
-    hardware/memory.cpp
-    hardware/node_info.cpp
-    hardware/power.cpp
-    lif_cell_group.cpp
-    mc_cell_group.cpp
-    mechcat.cpp
-    merge_events.cpp
-    simulation.cpp
-    morphology.cpp
-    partition_load_balance.cpp
-    profiling/memory_meter.cpp
-    profiling/meter_manager.cpp
-    profiling/power_meter.cpp
-    profiling/profiler.cpp
-    schedule.cpp
-    spike_source_cell_group.cpp
-    swcio.cpp
-    threading/threading.cpp
-    util/debug.cpp
-    util/hostname.cpp
-    util/path.cpp
-    util/unwind.cpp
-)
-
-if(ARB_WITH_CUDA)
-    list(APPEND arbor_cxx_sources
-        backends/gpu/mechanism.cpp
-        backends/gpu/shared_state.cpp
-        backends/gpu/stimulus.cpp
-        backends/gpu/stimulus.cu
-        backends/gpu/threshold_watcher.cu
-    )
-endif()
-
-if(ARB_WITH_MPI)
-    list(APPEND arbor_cxx_sources
-        communication/mpi.cpp)
-endif()
-
-if(ARB_WITH_CTHREAD)
-    list(APPEND arbor_cxx_sources
-        threading/cthread.cpp)
-endif()
-
-set(arbor_cuda_sources
-    memory/fill.cu
-    backends/gpu/matrix_assemble.cu
-    backends/gpu/matrix_interleave.cu
-    backends/gpu/matrix_solve.cu
-    backends/gpu/multi_event_stream.cu
-    backends/gpu/shared_state.cu
-    backends/gpu/stimulus.cu
-    backends/gpu/threshold_watcher.cu
-)
-
-add_library(arbor ${arbor_cxx_sources})
-target_compile_options(arbor PRIVATE ${CXXOPT_ARCH})
-list(APPEND ARB_LIBRARIES arbor)
-
-if(ARB_WITH_CUDA)
-    cuda_add_library(arborcu ${arbor_cuda_sources})
-    list(APPEND ARB_LIBRARIES arborcu)
-endif()
-
-if (ARB_AUTO_RUN_MODCC_ON_CHANGES)
-    add_dependencies(arbor build_all_mods)
-    if (ARB_WITH_CUDA)
-        add_dependencies(arborcu build_all_mods)
-    endif()
-endif()
-
-# Make changes to the ARB_LIBRARIES visible to the parent scope.
-set(ARB_LIBRARIES "${ARB_LIBRARIES}" PARENT_SCOPE)
diff --git a/src/communication/local_context.hpp b/src/communication/local_context.hpp
deleted file mode 100644
index 8bae0840b2ecb6df71795e6274a4d699d191243a..0000000000000000000000000000000000000000
--- a/src/communication/local_context.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <communication/gathered_vector.hpp>
-#include <spike.hpp>
-
-namespace arb {
-
-struct local_context {
-    gathered_vector<arb::spike>
-    gather_spikes(const std::vector<arb::spike>& local_spikes) const {
-        using count_type = typename gathered_vector<arb::spike>::count_type;
-        return gathered_vector<arb::spike>(
-            std::vector<arb::spike>(local_spikes),
-            {0u, static_cast<count_type>(local_spikes.size())}
-        );
-    }
-
-    int id() const {
-        return 0;
-    }
-
-    int size() const {
-        return 1;
-    }
-
-    template <typename T>
-    T min(T value) const {
-        return value;
-    }
-
-    template <typename T>
-    T max(T value) const {
-        return value;
-    }
-
-    template <typename T>
-    T sum(T value) const {
-        return value;
-    }
-
-    template <typename T>
-    std::vector<T> gather(T value, int) const {
-        return {std::move(value)};
-    }
-
-    void barrier() const {}
-
-    std::string name() const {
-        return "serial";
-    }
-};
-
-} // namespace arb
diff --git a/src/communication/mpi.cpp b/src/communication/mpi.cpp
deleted file mode 100644
index 18bc425c337e3e63c3029804ba1da5a4bb367281..0000000000000000000000000000000000000000
--- a/src/communication/mpi.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <mpi.h>
-
-#include <communication/mpi.hpp>
-
-namespace arb {
-namespace mpi {
-
-// global guard for initializing mpi.
-
-scoped_guard::scoped_guard(int *argc, char ***argv) {
-    init(argc, argv);
-}
-
-scoped_guard::~scoped_guard() {
-    finalize();
-}
-
-// MPI exception class.
-
-mpi_error::mpi_error(const char* msg, int code):
-    error_code_(code)
-{
-    thread_local char buffer[MPI_MAX_ERROR_STRING];
-    int n;
-    MPI_Error_string(error_code_, buffer, &n);
-    message_ = "MPI error (";
-    message_ += buffer;
-    message_ += "): ";
-    message_ += msg;
-}
-
-void handle_mpi_error(const char* msg, int code) {
-    if (code!=MPI_SUCCESS) {
-        throw mpi_error(msg, code);
-    }
-}
-
-const char* mpi_error::what() const throw() {
-    return message_.c_str();
-}
-
-int mpi_error::error_code() const {
-    return error_code_;
-}
-
-void init(int* argc, char*** argv) {
-    int provided;
-
-    // initialize with thread serialized level of thread safety
-    MPI_Init_thread(argc, argv, MPI_THREAD_SERIALIZED, &provided);
-
-    if(provided<MPI_THREAD_SERIALIZED) {
-        throw mpi_error("Unable to initialize MPI with MPI_THREAD_SERIALIZED", MPI_ERR_OTHER);
-    }
-}
-
-void finalize() {
-    MPI_Finalize();
-}
-
-int rank(MPI_Comm comm) {
-    int r;
-    handle_mpi_error("MPI_Rank", MPI_Comm_rank(comm, &r));
-    return r;
-}
-
-int size(MPI_Comm comm) {
-    int s;
-    handle_mpi_error("MPI_Size", MPI_Comm_size(comm, &s));
-    return s;
-}
-
-void barrier(MPI_Comm comm) {
-    handle_mpi_error("MPI_Barrier", MPI_Barrier(comm));
-}
-
-} // namespace mpi
-} // namespace arb
diff --git a/src/communication/mpi_context.hpp b/src/communication/mpi_context.hpp
deleted file mode 100644
index c745990e141a0fe3ee4abdf0444818bef5e733db..0000000000000000000000000000000000000000
--- a/src/communication/mpi_context.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <communication/gathered_vector.hpp>
-#include <communication/mpi.hpp>
-#include <spike.hpp>
-
-namespace arb {
-
-struct mpi_context {
-    int size_;
-    int rank_;
-    MPI_Comm  comm_;
-
-    // throws std::runtime_error if MPI calls fail
-    mpi_context(MPI_Comm comm=MPI_COMM_WORLD): comm_(comm) {
-        size_ = arb::mpi::size(comm_);
-        rank_ = arb::mpi::rank(comm_);
-    }
-
-    gathered_vector<arb::spike>
-    gather_spikes(const std::vector<arb::spike>& local_spikes) const {
-        return mpi::gather_all_with_partition(local_spikes, comm_);
-    }
-
-    int id() const {
-        return rank_;
-    }
-
-    int size() const {
-        return size_;
-    }
-
-    template <typename T>
-    T min(T value) const {
-        return arb::mpi::reduce(value, MPI_MIN, comm_);
-    }
-
-    template <typename T>
-    T max(T value) const {
-        return arb::mpi::reduce(value, MPI_MAX, comm_);
-    }
-
-    template <typename T>
-    T sum(T value) const {
-        return arb::mpi::reduce(value, MPI_SUM, comm_);
-    }
-
-    template <typename T>
-    std::vector<T> gather(T value, int root) const {
-        return mpi::gather(value, root, comm_);
-    }
-
-    void barrier() const {
-        mpi::barrier(comm_);
-    }
-
-    std::string name() const {
-        return "MPI";
-    }
-};
-
-} // namespace arb
-
diff --git a/src/memory/gpu.hpp b/src/memory/gpu.hpp
deleted file mode 100644
index 6ba8699b96918985975db120c84844c3bcf5ac6b..0000000000000000000000000000000000000000
--- a/src/memory/gpu.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-#pragma once
-
-#ifdef ARB_HAVE_GPU
-
-#include <string>
-#include <cstdint>
-
-#include "util.hpp"
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-namespace arb {
-namespace memory {
-namespace gpu {
-
-//
-// helpers for memory where at least one of the target or source is on the gpu
-//
-template <typename T>
-void memcpy_d2h(const T* from, T* to, std::size_t size) {
-    auto bytes = sizeof(T)*size;
-    if (size==0) return;
-    auto status = cudaMemcpy(
-        reinterpret_cast<void*>(to), reinterpret_cast<const void*>(from),
-        bytes, cudaMemcpyDeviceToHost
-    );
-    if(status != cudaSuccess) {
-        LOG_ERROR("cudaMemcpy(d2h, " + std::to_string(bytes) + ") " + cudaGetErrorString(status));
-        abort();
-    }
-}
-
-template <typename T>
-void memcpy_h2d(const T* from, T* to, std::size_t size) {
-    auto bytes = sizeof(T)*size;
-    if (size==0) return;
-    auto status = cudaMemcpy(
-        reinterpret_cast<void*>(to), reinterpret_cast<const void*>(from),
-        bytes, cudaMemcpyHostToDevice
-    );
-    if(status != cudaSuccess) {
-        LOG_ERROR("cudaMemcpy(h2d, " + std::to_string(bytes) + ") " + cudaGetErrorString(status));
-        abort();
-    }
-}
-
-template <typename T>
-void memcpy_d2d(const T* from, T* to, std::size_t size) {
-    auto bytes = sizeof(T)*size;
-    if (size==0) return;
-    auto status = cudaMemcpy(
-        reinterpret_cast<void*>(to), reinterpret_cast<const void*>(from),
-        bytes, cudaMemcpyDeviceToDevice
-    );
-    if(status != cudaSuccess) {
-        LOG_ERROR("cudaMemcpy(d2d, " + std::to_string(bytes) + ") " + cudaGetErrorString(status));
-        abort();
-    }
-}
-
-} // namespace gpu
-} // namespace memory
-} // namespace arb
-
-#endif
diff --git a/src/thread_private_spike_store.hpp b/src/thread_private_spike_store.hpp
deleted file mode 100644
index 336ac9f4e048d517ed6502494431397360dbb905..0000000000000000000000000000000000000000
--- a/src/thread_private_spike_store.hpp
+++ /dev/null
@@ -1,75 +0,0 @@
-#pragma once
-
-#include <vector>
-
-#include <common_types.hpp>
-#include <spike.hpp>
-#include <threading/threading.hpp>
-
-namespace arb {
-
-/// Handles the complexity of managing thread private buffers of spikes.
-/// Internally stores one thread private buffer of spikes for each hardware thread.
-/// This can be accessed directly using the get() method, which returns a reference to
-/// The thread private buffer of the calling thread.
-/// The insert() and gather() methods add a vector of spikes to the buffer,
-/// and collate all of the buffers into a single vector respectively.
-class thread_private_spike_store {
-public :
-    /// Collate all of the individual buffers into a single vector of spikes.
-    /// Does not modify the buffer contents.
-    std::vector<spike> gather() const {
-        std::vector<spike> spikes;
-        unsigned num_spikes = 0u;
-        for (auto& b : buffers_) {
-            num_spikes += b.size();
-        }
-        spikes.reserve(num_spikes);
-
-        for (auto& b : buffers_) {
-            spikes.insert(spikes.begin(), b.begin(), b.end());
-        }
-
-        return spikes;
-    }
-
-    /// Return a reference to the thread private buffer of the calling thread
-    std::vector<spike>& get() {
-        return buffers_.local();
-    }
-
-    /// Clear all of the thread private buffers
-    void clear() {
-        for (auto& b : buffers_) {
-            b.clear();
-        }
-    }
-
-    /// Append the passed spikes to the end of the thread private buffer of the
-    /// calling thread
-    void insert(const std::vector<spike>& spikes) {
-        auto& buff = get();
-        buff.insert(buff.end(), spikes.begin(), spikes.end());
-    }
-
-private :
-    /// thread private storage for accumulating spikes
-    using local_spike_store_type =
-        threading::enumerable_thread_specific<std::vector<spike>>;
-
-    local_spike_store_type buffers_;
-
-public :
-    using iterator = typename local_spike_store_type::iterator;
-    using const_iterator = typename local_spike_store_type::const_iterator;
-
-    // make the container iterable
-    // we iterate of threads, not individual containers
-
-    iterator begin() { return buffers_.begin(); }
-    iterator end() { return buffers_.begin(); }
-    const_iterator begin() const { return buffers_.begin(); }
-    const_iterator end() const { return buffers_.begin(); }
-};
-
-} // namespace arb
diff --git a/src/threading/tbb.hpp b/src/threading/tbb.hpp
deleted file mode 100644
index 585ac42e7ee65ce73e9864b641d2d44feb32ef09..0000000000000000000000000000000000000000
--- a/src/threading/tbb.hpp
+++ /dev/null
@@ -1,87 +0,0 @@
-#pragma once
-
-#if !defined(ARB_HAVE_TBB)
-    #error this header can only be loaded if ARB_HAVE_TBB is set
-#endif
-
-#include <atomic>
-#include <string>
-
-#include <tbb/tbb.h>
-#include <tbb/tbb_stddef.h>
-#include <tbb/compat/thread>
-#include <tbb/enumerable_thread_specific.h>
-
-namespace arb {
-namespace threading {
-
-template <typename T>
-using enumerable_thread_specific = tbb::enumerable_thread_specific<T>;
-
-struct parallel_for {
-    template <typename F>
-    static void apply(int left, int right, F f) {
-        tbb::parallel_for(left, right, f);
-    }
-};
-
-inline std::string description() {
-    return "TBBv" + std::to_string(tbb::TBB_runtime_interface_version());
-}
-
-struct timer {
-    using time_point = tbb::tick_count;
-
-    static inline time_point tic() {
-        return tbb::tick_count::now();
-    }
-
-    static inline double toc(time_point t) {
-        return (tic() - t).seconds();
-    }
-
-    static inline double difference(time_point b, time_point e) {
-        return (e-b).seconds();
-    }
-};
-
-constexpr bool multithreaded() { return true; }
-
-template <typename T>
-using parallel_vector = tbb::concurrent_vector<T>;
-
-using task_group = tbb::task_group;
-
-inline
-std::size_t thread_id() {
-    static std::atomic<std::size_t> num_threads(0);
-    thread_local std::size_t thread_id = num_threads++;
-    return thread_id;
-}
-
-template <typename RandomIt>
-void sort(RandomIt begin, RandomIt end) {
-    tbb::parallel_sort(begin, end);
-}
-
-template <typename RandomIt, typename Compare>
-void sort(RandomIt begin, RandomIt end, Compare comp) {
-    tbb::parallel_sort(begin, end, comp);
-}
-
-template <typename Container>
-void sort(Container& c) {
-    tbb::parallel_sort(c.begin(), c.end());
-}
-
-} // namespace threading
-} // namespace arb
-
-namespace tbb {
-    /// comparison operator for tbb::tick_count type
-    /// returns true iff time stamp l occurred before timestamp r
-    inline bool operator< (tbb::tick_count l, tbb::tick_count r) {
-        return (l-r).seconds() < 0.;
-    }
-}
-
diff --git a/src/threading/timer.hpp b/src/threading/timer.hpp
deleted file mode 100644
index 027da9f698f37a90e8272e944a931352d989e571..0000000000000000000000000000000000000000
--- a/src/threading/timer.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#pragma once
-
-#include <chrono>
-
-namespace arb {
-namespace threading {
-namespace impl{
-
-struct timer {
-    using time_point = std::chrono::time_point<std::chrono::system_clock>;
-
-    static inline time_point tic() {
-        return std::chrono::system_clock::now();
-    }
-
-    static inline double toc(time_point t) {
-        return std::chrono::duration<double>{tic() - t}.count();
-    }
-
-    static inline double difference(time_point b, time_point e) {
-        return std::chrono::duration<double>{e-b}.count();
-    }
-};
-
-}
-}
-}
diff --git a/src/util/debug.cpp b/src/util/debug.cpp
deleted file mode 100644
index cecaa2a49e860213f950a0cb039510cabaa25dd3..0000000000000000000000000000000000000000
--- a/src/util/debug.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#include <chrono>
-#include <cstdlib>
-#include <cstring>
-#include <iomanip>
-#include <iostream>
-#include <mutex>
-
-#include <util/debug.hpp>
-#include <util/ioutil.hpp>
-#include <util/unwind.hpp>
-
-namespace arb {
-namespace util {
-
-std::mutex global_debug_cerr_mutex;
-
-bool abort_on_failed_assertion(
-    const char* assertion,
-    const char* file,
-    int line,
-    const char* func)
-{
-    // If libunwind is being used, make a file with a backtrace and print information
-    // to stdcerr.
-    backtrace().print();
-
-    // Explicit flush, as we can't assume default buffering semantics on stderr/cerr,
-    // and abort() might not flush streams.
-    std::cerr << file << ':' << line << " " << func
-              << ": Assertion `" << assertion << "' failed." << std::endl;
-    std::abort();
-    return false;
-}
-
-failed_assertion_handler_t global_failed_assertion_handler = abort_on_failed_assertion;
-
-std::ostream& debug_emit_trace_leader(
-    std::ostream& out,
-    const char* file,
-    int line,
-    const char* varlist)
-{
-    iosfmt_guard guard(out);
-
-    const char* leaf = std::strrchr(file, '/');
-    out << (leaf?leaf+1:file) << ':' << line << " ";
-
-    using namespace std::chrono;
-    auto tstamp = system_clock::now().time_since_epoch();
-    auto tstamp_usec = duration_cast<microseconds>(tstamp).count();
-
-    out << std::right << '[';
-    out << std::setw(11) << std::setfill('0') << (tstamp_usec/1000000) << '.';
-    out << std::setw(6)  << std::setfill('0') << (tstamp_usec%1000000) << ']';
-
-    if (varlist && *varlist) {
-        out << ' ' << varlist << ": ";
-    }
-    return out;
-}
-
-} // namespace util
-} // namespace arb
diff --git a/src/util/debug.hpp b/src/util/debug.hpp
deleted file mode 100644
index a2d53b513053b277f2a30a8c260587cbdbe7377d..0000000000000000000000000000000000000000
--- a/src/util/debug.hpp
+++ /dev/null
@@ -1,195 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <mutex>
-#include <utility>
-
-extern "C" {
-// The system header endian.h is not in /sys/include/ on Mac OS X.
-// Include sys/types.h, which pulls in endian.h on all systems.
-#include <sys/types.h>
-}
-
-#include <threading/threading.hpp>
-#include "unwind.hpp"
-
-namespace arb {
-namespace util {
-
-constexpr inline bool is_debug_mode() {
-#ifndef NDEBUG
-    return true;
-#else
-    return false;
-#endif
-}
-using failed_assertion_handler_t =
-    bool (*)(const char* assertion, const char* file, int line, const char* func);
-
-bool abort_on_failed_assertion(const char* assertion, const char* file, int line, const char* func);
-inline bool ignore_failed_assertion(const char*, const char*, int, const char*) {
-    return false;
-}
-
-// defaults to abort_on_failed_assertion;
-extern failed_assertion_handler_t global_failed_assertion_handler;
-
-std::ostream& debug_emit_trace_leader(std::ostream& out, const char* file, int line, const char* varlist);
-
-inline void debug_emit(std::ostream& out) {
-    out << "\n";
-}
-
-template <typename Head, typename... Tail>
-void debug_emit(std::ostream& out, const Head& head, const Tail&... tail) {
-    out << head;
-    if (sizeof...(tail)) {
-        out << ", ";
-    }
-    debug_emit(out, tail...);
-}
-
-extern std::mutex global_debug_cerr_mutex;
-
-template <typename... Args>
-void debug_emit_trace(const char* file, int line, const char* varlist, const Args&... args) {
-    if (arb::threading::multithreaded()) {
-        std::stringstream buffer;
-        buffer.precision(17);
-
-        debug_emit_trace_leader(buffer, file, line, varlist);
-        debug_emit(buffer, args...);
-
-        std::lock_guard<std::mutex> guard(global_debug_cerr_mutex);
-        std::cerr << buffer.rdbuf();
-        std::cerr.flush();
-    }
-    else {
-        debug_emit_trace_leader(std::cerr, file, line, varlist);
-        debug_emit(std::cerr, args...);
-        std::cerr.flush();
-    }
-}
-
-namespace impl {
-    template <typename Seq, typename Separator>
-    struct sepval {
-        const Seq& seq;
-        Separator sep;
-
-        sepval(const Seq& seq, Separator sep): seq(seq), sep(std::move(sep)) {}
-
-        friend std::ostream& operator<<(std::ostream& out, const sepval& sv) {
-            bool emitsep = false;
-            for (const auto& v: sv.seq) {
-                if (emitsep) out << sv.sep;
-                emitsep = true;
-                out << v;
-            }
-            return out;
-        }
-    };
-
-    enum class endian {
-        little = __ORDER_LITTLE_ENDIAN__,
-        big = __ORDER_BIG_ENDIAN__,
-        native = __BYTE_ORDER__
-    };
-
-    // Wrapper for emitting values on an ostream as a sequence of hex digits.
-    struct hexdump_inline_wrap {
-        const unsigned char* from;
-        std::size_t size;
-        unsigned width;
-
-        friend std::ostream& operator<<(std::ostream& out, const hexdump_inline_wrap& h) {
-            using std::ptrdiff_t;
-
-            constexpr bool little = endian::native==endian::little;
-            ptrdiff_t width = h.width;
-            const unsigned char* from = h.from;
-            const unsigned char* end = h.from+h.size;
-            std::string buf;
-
-            auto emit = [&buf](unsigned char c) {
-                const char* digit = "0123456789abcdef";
-                buf += digit[(c>>4)&0xf];
-                buf += digit[c&0xf];
-            };
-
-            constexpr unsigned bufsz = 512;
-            unsigned bufmargin = 4*width+1;
-
-            buf.reserve(bufsz);
-            while (end-from>width) {
-                if (buf.size()+bufmargin>=bufsz) {
-                    out << buf;
-                    buf.clear();
-                }
-                for (ptrdiff_t i = 0; i<width; ++i) {
-                    emit(little? from[width-i-1]: from[i]);
-                }
-                from += width;
-                buf += ' ';
-            }
-            for (ptrdiff_t i = 0; i<end-from; ++i) {
-                emit(little? from[width-i-1]: from[i]);
-            }
-
-            out << buf;
-            return out;
-        }
-    };
-}
-
-// Wrap a sequence or container of values so that they can be printed
-// to an `std::ostream` with the elements separated by the supplied 
-// separator.
-
-template <typename Seq, typename Separator>
-impl::sepval<Seq, Separator> sepval(const Seq& seq, Separator sep) {
-    return impl::sepval<Seq, Separator>(seq, std::move(sep));
-}
-
-template <typename Seq>
-impl::sepval<Seq, const char*> csv(const Seq& seq) {
-    return sepval(seq, ", ");
-}
-
-// Dump something in hex (inline representation).
-
-template <typename T>
-impl::hexdump_inline_wrap hexdump(const T& obj, unsigned width = 4) {
-    return impl::hexdump_inline_wrap{reinterpret_cast<const unsigned char*>(&obj), sizeof obj, width};
-}
-
-template <typename T>
-impl::hexdump_inline_wrap hexdump_n(const T* ptr, std::size_t n, unsigned width = 4) {
-    return impl::hexdump_inline_wrap{reinterpret_cast<const unsigned char*>(ptr), n, width};
-}
-
-} // namespace util
-} // namespace arb
-
-#ifdef ARB_HAVE_TRACE
-    #define TRACE(vars...) arb::util::debug_emit_trace(__FILE__, __LINE__, #vars, ##vars)
-#else
-    #define TRACE(...)
-#endif
-
-#ifdef ARB_HAVE_ASSERTIONS
-    #ifdef __GNUC__
-        #define DEBUG_FUNCTION_NAME __PRETTY_FUNCTION__
-    #else
-        #define DEBUG_FUNCTION_NAME __func__
-    #endif
-
-    #define EXPECTS(condition) \
-       (void)((condition) || \
-       arb::util::global_failed_assertion_handler(#condition, __FILE__, __LINE__, DEBUG_FUNCTION_NAME))
-#else
-    #define EXPECTS(condition) \
-       (void)(false && (condition))
-#endif // def ARB_HAVE_ASSERTIONS
diff --git a/src/util/lexcmp_def.hpp b/src/util/lexcmp_def.hpp
deleted file mode 100644
index cfb297b1d16d3dbd7023bb7f300271eda0af1e41..0000000000000000000000000000000000000000
--- a/src/util/lexcmp_def.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-
-/*
- * Macro definitions for defining comparison operators for
- * record-like types.
- *
- * Use:
- *
- * To define comparison operations for a record type xyzzy
- * with fields foo, bar and baz:
- *
- * DEFINE_LEXICOGRAPHIC_ORDERING(xyzzy,(a.foo,a.bar,a.baz),(b.foo,b.bar,b.baz))
- *
- * The explicit use of 'a' and 'b' in the second and third parameters
- * is needed only to save a heroic amount of preprocessor macro
- * deep magic.
- *
- */
-
-#include <tuple>
-
-#define DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(proxy,op,type,a_fields,b_fields) \
-inline bool operator op(const type& a,const type& b) { return proxy a_fields op proxy b_fields; }
-
-#define DEFINE_LEXICOGRAPHIC_ORDERING(type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,<,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,>,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,<=,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,>=,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,!=,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::tie,==,type,a_fields,b_fields)
-
-#define DEFINE_LEXICOGRAPHIC_ORDERING_BY_VALUE(type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,<,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,>,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,<=,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,>=,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,!=,type,a_fields,b_fields) \
-DEFINE_LEXICOGRAPHIC_ORDERING_IMPL_(std::make_tuple,==,type,a_fields,b_fields)
-
diff --git a/tclap/Makefile.am b/tclap/Makefile.am
deleted file mode 100644
index 0e247bf5bf317de4a0e1a40270598092c56d95fc..0000000000000000000000000000000000000000
--- a/tclap/Makefile.am
+++ /dev/null
@@ -1,28 +0,0 @@
-
-libtclapincludedir = $(includedir)/tclap
-
-libtclapinclude_HEADERS = \
-			 CmdLineInterface.h \
-			 ArgException.h \
-			 CmdLine.h \
-			 XorHandler.h \
-			 MultiArg.h \
-			 UnlabeledMultiArg.h \
-			 ValueArg.h \
-			 UnlabeledValueArg.h \
-			 Visitor.h Arg.h \
-			 HelpVisitor.h \
-			 SwitchArg.h \
-			 MultiSwitchArg.h \
-			 VersionVisitor.h \
-			 IgnoreRestVisitor.h \
-			 CmdLineOutput.h \
-			 StdOutput.h \
-			 DocBookOutput.h \
-			 ZshCompletionOutput.h \
-			 OptionalUnlabeledTracker.h \
-			 Constraint.h \
-			 ValuesConstraint.h \
-			 ArgTraits.h \
-			 StandardTraits.h
-
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85efd242ec6885db00d7c715ddc69612e7baeb84
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,21 @@
+find_package(Threads REQUIRED)
+find_threads_cuda_fix()
+
+add_library(gtest STATIC gtest-all.cpp)
+target_include_directories(gtest PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(gtest PUBLIC Threads::Threads)
+
+# Unit tests.
+add_subdirectory(unit)
+
+# Test validating models, possebly needing other software installed.
+add_subdirectory(validation)
+
+# Test MPI wrappers and distribution operations.
+add_subdirectory(unit-distributed)
+
+# Test modcc internals.
+add_subdirectory(unit-modcc)
+
+## Microbenchmarks.
+add_subdirectory(ubench)
diff --git a/tests/common_cells.hpp b/test/common_cells.hpp
similarity index 98%
rename from tests/common_cells.hpp
rename to test/common_cells.hpp
index 4f5b6b39a6a848da4caef430d6ed51590dd5ecb1..06ac14f3908bb52ca7c48c7ca4411f51d1a86664 100644
--- a/tests/common_cells.hpp
+++ b/test/common_cells.hpp
@@ -1,10 +1,11 @@
 #include <cmath>
 
-#include <cell.hpp>
-#include <recipe.hpp>
-#include <segment.hpp>
-#include <math.hpp>
-#include <mechinfo.hpp>
+#include <arbor/mechinfo.hpp>
+
+#include "cell.hpp"
+#include "recipe.hpp"
+#include "segment.hpp"
+#include "math.hpp"
 
 namespace arb {
 
diff --git a/tests/gtest-all.cpp b/test/gtest-all.cpp
similarity index 100%
rename from tests/gtest-all.cpp
rename to test/gtest-all.cpp
diff --git a/tests/gtest.h b/test/gtest.h
similarity index 100%
rename from tests/gtest.h
rename to test/gtest.h
diff --git a/tests/simple_recipes.hpp b/test/simple_recipes.hpp
similarity index 100%
rename from tests/simple_recipes.hpp
rename to test/simple_recipes.hpp
diff --git a/test/ubench/CMakeLists.txt b/test/ubench/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7757e386ae6a439fac065d82d6ec6a3aae95c888
--- /dev/null
+++ b/test/ubench/CMakeLists.txt
@@ -0,0 +1,29 @@
+include(ExternalProject)
+
+# List of micro benchmarks to build.
+
+set(bench_sources
+    accumulate_functor_values.cpp
+    default_construct.cpp
+    event_setup.cpp
+    event_binning.cpp
+    mech_vec.cpp
+)
+
+if(ARB_WITH_CUDA)
+list(APPEND bench_sources
+    cuda_compare_and_reduce.cu
+    cuda_reduce_by_key.cu
+)
+endif()
+
+# Build benches.
+
+foreach(bench_src ${bench_sources})
+    string(REGEX REPLACE "\\.[^.]*$" "" bench_exe ${bench_src})
+    add_executable(${bench_exe} EXCLUDE_FROM_ALL "${bench_src}")
+    target_link_libraries(${bench_exe} arbor arbor-private-headers ext-benchmark)
+    list(APPEND bench_exe_list ${bench_exe})
+endforeach()
+
+add_custom_target(ubenches DEPENDS ${bench_exe_list})
diff --git a/tests/ubench/README.md b/test/ubench/README.md
similarity index 100%
rename from tests/ubench/README.md
rename to test/ubench/README.md
diff --git a/tests/ubench/accumulate_functor_values.cpp b/test/ubench/accumulate_functor_values.cpp
similarity index 100%
rename from tests/ubench/accumulate_functor_values.cpp
rename to test/ubench/accumulate_functor_values.cpp
diff --git a/tests/ubench/cuda_compare_and_reduce.cu b/test/ubench/cuda_compare_and_reduce.cu
similarity index 100%
rename from tests/ubench/cuda_compare_and_reduce.cu
rename to test/ubench/cuda_compare_and_reduce.cu
diff --git a/tests/ubench/cuda_reduce_by_key.cu b/test/ubench/cuda_reduce_by_key.cu
similarity index 100%
rename from tests/ubench/cuda_reduce_by_key.cu
rename to test/ubench/cuda_reduce_by_key.cu
diff --git a/tests/ubench/default_construct.cpp b/test/ubench/default_construct.cpp
similarity index 100%
rename from tests/ubench/default_construct.cpp
rename to test/ubench/default_construct.cpp
diff --git a/tests/ubench/event_binning.cpp b/test/ubench/event_binning.cpp
similarity index 100%
rename from tests/ubench/event_binning.cpp
rename to test/ubench/event_binning.cpp
diff --git a/tests/ubench/event_setup.cpp b/test/ubench/event_setup.cpp
similarity index 100%
rename from tests/ubench/event_setup.cpp
rename to test/ubench/event_setup.cpp
diff --git a/tests/ubench/mech_vec.cpp b/test/ubench/mech_vec.cpp
similarity index 100%
rename from tests/ubench/mech_vec.cpp
rename to test/ubench/mech_vec.cpp
diff --git a/test/unit-distributed/CMakeLists.txt b/test/unit-distributed/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fdd9b77f8d4b3c6e78082a507f4bb4df123185c3
--- /dev/null
+++ b/test/unit-distributed/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(unit-distributed_sources
+    distributed_listener.cpp
+    test_domain_decomposition.cpp
+    test_exporter_spike_file.cpp
+    test_communicator.cpp
+    test_mpi.cpp
+
+    # unit test driver
+    test.cpp
+)
+
+add_executable(unit-local ${unit-distributed_sources})
+target_compile_options(unit-local PRIVATE ${CXXOPT_ARCH})
+target_compile_definitions(unit-local PRIVATE TEST_LOCAL)
+target_link_libraries(unit-local PRIVATE gtest arbor arbor-aux arbor-private-headers)
+
+if(ARB_WITH_MPI)
+    add_executable(unit-mpi ${unit-distributed_sources})
+    target_compile_options(unit-mpi PRIVATE ${CXXOPT_ARCH})
+    target_compile_definitions(unit-mpi PRIVATE TEST_MPI)
+    target_link_libraries(unit-mpi PRIVATE gtest arbor arbor-aux arbor-private-headers)
+endif()
+
diff --git a/test/unit-distributed/distributed_listener.cpp b/test/unit-distributed/distributed_listener.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..16c3f8ce193a125ea6181de5c8159a8053d36a06
--- /dev/null
+++ b/test/unit-distributed/distributed_listener.cpp
@@ -0,0 +1,104 @@
+#include <fstream>
+#include <iostream>
+#include <stdexcept>
+#include <string>
+
+#include <arbor/distributed_context.hpp>
+
+#include "../gtest.h"
+
+#include "distributed_listener.hpp"
+
+distributed_listener::printer::printer(std::string base_name, int rank) {
+    base_name += "_"+std::to_string(rank)+".out";
+
+    fid_.open(base_name);
+    if (!fid_) {
+        throw std::runtime_error("could not open file " + base_name + " for test output");
+    }
+
+    cout_ = rank==0;
+}
+
+template <typename T>
+distributed_listener::printer& operator<<(distributed_listener::printer& p, const T& item) {
+    if (p.fid_) p.fid_ << item;
+    if (p.cout_) std::cout << item;
+    return p;
+}
+
+distributed_listener::distributed_listener(std::string f_base, const arb::distributed_context* ctx):
+    context_(ctx),
+    rank_(context_->id()),
+    size_(context_->size()),
+    emit_(std::move(f_base), rank_)
+{}
+
+void distributed_listener::OnTestProgramStart(const UnitTest&) {
+    emit_ << "*** test output for rank " << rank_ << " of " << size_ << "\n\n";
+}
+
+void distributed_listener::OnTestProgramEnd(const UnitTest&) {
+    emit_ << "*** end test output for rank " << rank_ << " of " << size_ << "\n\n";
+}
+
+void distributed_listener::OnTestCaseStart(const TestCase& test_case) {
+    test_case_failures_ = 0;
+    test_case_tests_ = 0;
+}
+
+void distributed_listener::OnTestCaseEnd(const TestCase& test_case) {
+    emit_
+        << "    PASSED " << test_case_tests_-test_case_failures_
+        << " of " << test_case_tests_ << " tests"
+        << " in " << test_case.name() << "\n";
+
+    if (test_case_failures_>0) {
+        emit_
+            << "    FAILED " << test_case_failures_
+            << " of " << test_case_tests_ << " tests"
+            << " in " << test_case.name() << "\n";
+    }
+
+    emit_ << "\n";
+}
+
+void distributed_listener::OnTestStart(const TestInfo& test_info) {
+    emit_
+        << "TEST: " << test_info.test_case_name()
+        << "::" << test_info.name() << "\n";
+
+    test_failures_ = 0;
+}
+
+void distributed_listener::OnTestPartResult(const TestPartResult& test_part_result) {
+    // indent all lines in the summary by 4 spaces
+    std::string summary = "    " + std::string(test_part_result.summary());
+    auto pos = summary.find("\n");
+    while (pos!=summary.size() && pos!=std::string::npos) {
+        summary.replace(pos, 1, "\n    ");
+        pos = summary.find("\n", pos+1);
+    }
+
+    emit_
+        << " LOCAL_" << (test_part_result.failed()? "FAIL": "SUCCESS") << "\n"
+        << test_part_result.file_name() << ':' << test_part_result.line_number() << "\n"
+        << summary << "\n";
+
+    // note that there was a failure in this test case
+    if (test_part_result.failed()) {
+        ++test_failures_;
+    }
+}
+
+void distributed_listener::OnTestEnd(const TestInfo& test_info) {
+    ++test_case_tests_;
+
+    // count the number of ranks that had errors
+    int global_errors = context_->sum(test_failures_>0 ? 1 : 0);
+    if (global_errors>0) {
+        ++test_case_failures_;
+        emit_ << "  GLOBAL_FAIL on " << global_errors << "ranks\n";
+    }
+}
+
diff --git a/test/unit-distributed/distributed_listener.hpp b/test/unit-distributed/distributed_listener.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c61f8b46ca4516b267a92a7488ab08a46d500d7
--- /dev/null
+++ b/test/unit-distributed/distributed_listener.hpp
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <fstream>
+#include <string>
+#include <utility>
+
+#include <arbor/distributed_context.hpp>
+
+#include "../gtest.h"
+
+/// A specialized listener desinged for printing test results with MPI
+/// or in other distributed contexts.
+///
+/// When tests are run with e.g. MPI, one instance of each test is run on
+/// each rank. The default behavior of Google Test is for each test
+/// instance to print to stdout. With more than one rank, this creates
+/// the usual mess of output.
+///
+/// This specialization has the first rank (rank 0) print to stdout, and all
+/// ranks print their output to separate text files.
+/// For each test a message is printed showing:
+///     - detailed messages about errors on rank 0,
+///     - a head count of errors that occured on other ranks.
+
+class distributed_listener: public testing::EmptyTestEventListener {
+    using UnitTest = testing::UnitTest;
+    using TestCase = testing::TestCase;
+    using TestInfo = testing::TestInfo;
+    using TestPartResult = testing::TestPartResult;
+
+public:
+    distributed_listener(std::string f_base, const arb::distributed_context* ctx);
+
+    /// Messages that are printed at the start and end of the test program.
+    /// i.e. once only.
+    virtual void OnTestProgramStart(const UnitTest&) override;
+    virtual void OnTestProgramEnd(const UnitTest&) override;
+
+    /// Messages that are printed at the start and end of each test case.
+    /// On startup a counter that counts the number of tests that fail in
+    /// this test case is initialized to zero, and will be incremented for each
+    /// test that fails.
+    virtual void OnTestCaseStart(const TestCase& test_case) override;
+    virtual void OnTestCaseEnd(const TestCase& test_case) override;
+
+    // Called before a test starts.
+    virtual void OnTestStart(const TestInfo& test_info) override;
+
+    // Called after a failed assertion or a SUCCEED() invocation.
+    virtual void OnTestPartResult(const TestPartResult& test_part_result) override;
+
+    // Called after a test ends.
+    virtual void OnTestEnd(const TestInfo& test_info) override;
+
+private:
+    struct printer {
+        std::ofstream fid_;
+        bool cout_;
+
+        printer(std::string base_name, int rank);
+    };
+
+    const arb::distributed_context* context_;
+    int rank_;
+    int size_;
+    printer emit_;
+    int test_case_failures_;
+    int test_case_tests_;
+    int test_failures_;
+};
+
diff --git a/tests/global_communication/test.cpp b/test/unit-distributed/test.cpp
similarity index 79%
rename from tests/global_communication/test.cpp
rename to test/unit-distributed/test.cpp
index a119d347b6769427edc35b554a2a4d57155b44e8..152afa347fc8278667473e3f1a3adefbbded1ded 100644
--- a/tests/global_communication/test.cpp
+++ b/test/unit-distributed/test.cpp
@@ -5,13 +5,17 @@
 
 #include "../gtest.h"
 
-#include "mpi_listener.hpp"
+#include <arbor/distributed_context.hpp>
 
 #include <tinyopt.hpp>
 #include <communication/communicator.hpp>
-#include <communication/distributed_context.hpp>
 #include <util/ioutil.hpp>
 
+#include "distributed_listener.hpp"
+
+#ifdef TEST_MPI
+#include "with_mpi.hpp"
+#endif
 
 using namespace arb;
 
@@ -24,23 +28,26 @@ const char* usage_str =
 "  -h, --help          Display usage information and exit\n";
 
 int main(int argc, char **argv) {
-
     // We need to set the communicator policy at the top level
     // this allows us to build multiple communicators in the tests
-    #ifdef ARB_HAVE_MPI
-    mpi::scoped_guard guard(&argc, &argv);
+
+#ifdef TEST_MPI
+    with_mpi guard(argc, argv, false);
     g_context = mpi_context(MPI_COMM_WORLD);
-    #endif
+#elif defined(TEST_LOCAL)
+    g_context = local_context();
+#else
+#error "define TEST_MPI or TEST_LOCAL for distributed test"
+#endif
 
     // initialize google test environment
     testing::InitGoogleTest(&argc, argv);
 
     // set up a custom listener that prints messages in an MPI-friendly way
     auto& listeners = testing::UnitTest::GetInstance()->listeners();
-    // first delete the original printer
+    // replace original printer with our custom printer
     delete listeners.Release(listeners.default_result_printer());
-    // now add our custom printer
-    listeners.Append(new mpi_listener("results_global_communication", &g_context));
+    listeners.Append(new distributed_listener("run_"+g_context.name(), &g_context));
 
     int return_value = 0;
     try {
@@ -70,13 +77,13 @@ int main(int argc, char **argv) {
         //      1 : failure
         return_value = RUN_ALL_TESTS();
     }
-
     catch (to::parse_opt_error& e) {
         to::usage(argv[0], usage_str, e.what());
         return_value = 1;
     }
     catch (std::exception& e) {
-        std::cerr << "caught exception: " << e.what() << "\n";
+        //std::cerr << "caught exception: " << e.what() << "\n";
+        std::cout << "caught exception: " << e.what() << std::endl;
         return_value = 1;
     }
 
diff --git a/tests/global_communication/test.hpp b/test/unit-distributed/test.hpp
similarity index 76%
rename from tests/global_communication/test.hpp
rename to test/unit-distributed/test.hpp
index 12b98c565e38bda2612d8a4f1c8e5e61b432c3c6..b7d4679a310882344b17e43630f4a8217c2ee2ee 100644
--- a/tests/global_communication/test.hpp
+++ b/test/unit-distributed/test.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <communication/distributed_context.hpp>
+#include <arbor/distributed_context.hpp>
 
 // Global context is a global variable, set in the main() funtion of the main
 // test driver test.cpp.
diff --git a/tests/global_communication/test_communicator.cpp b/test/unit-distributed/test_communicator.cpp
similarity index 99%
rename from tests/global_communication/test_communicator.cpp
rename to test/unit-distributed/test_communicator.cpp
index 55f48bae094092c4be7db329d2ba343882628dc2..151df0f31c2b499aa106f7a81e1262bca092aae2 100644
--- a/tests/global_communication/test_communicator.cpp
+++ b/test/unit-distributed/test_communicator.cpp
@@ -4,8 +4,9 @@
 #include <stdexcept>
 #include <vector>
 
+#include <arbor/distributed_context.hpp>
+
 #include <communication/communicator.hpp>
-#include <communication/distributed_context.hpp>
 #include <hardware/node_info.hpp>
 #include <load_balance.hpp>
 #include <util/filter.hpp>
diff --git a/tests/global_communication/test_domain_decomposition.cpp b/test/unit-distributed/test_domain_decomposition.cpp
similarity index 99%
rename from tests/global_communication/test_domain_decomposition.cpp
rename to test/unit-distributed/test_domain_decomposition.cpp
index 02304dc2958f4b36e2f2048126d083488bfe8d6e..685cb75e981f2f9a03c58ce505372b2ed2d46b8e 100644
--- a/tests/global_communication/test_domain_decomposition.cpp
+++ b/test/unit-distributed/test_domain_decomposition.cpp
@@ -7,8 +7,9 @@
 #include <string>
 #include <vector>
 
+#include <arbor/distributed_context.hpp>
+
 #include <communication/communicator.hpp>
-#include <communication/distributed_context.hpp>
 #include <hardware/node_info.hpp>
 #include <load_balance.hpp>
 
diff --git a/tests/global_communication/test_exporter_spike_file.cpp b/test/unit-distributed/test_exporter_spike_file.cpp
similarity index 97%
rename from tests/global_communication/test_exporter_spike_file.cpp
rename to test/unit-distributed/test_exporter_spike_file.cpp
index 8f421db35b4bae29c8f3b18e6d66bcfdc01ecf42..00d26e5788e0aba5b98a875cf0592e3e5b4edcac 100644
--- a/tests/global_communication/test_exporter_spike_file.cpp
+++ b/test/unit-distributed/test_exporter_spike_file.cpp
@@ -7,10 +7,11 @@
 #include <string>
 #include <vector>
 
+#include <arbor/distributed_context.hpp>
+#include <arbor/spike.hpp>
+
 #include <communication/communicator.hpp>
-#include <communication/distributed_context.hpp>
 #include <io/exporter_spike_file.hpp>
-#include <spike.hpp>
 
 class exporter_spike_file_fixture : public ::testing::Test {
 protected:
diff --git a/tests/global_communication/test_mpi.cpp b/test/unit-distributed/test_mpi.cpp
similarity index 98%
rename from tests/global_communication/test_mpi.cpp
rename to test/unit-distributed/test_mpi.cpp
index 18f98c0a708ff47b8a57114a15d3444d469e3960..935242c27b43ca269202dbc563395e0a1cc1a7da 100644
--- a/tests/global_communication/test_mpi.cpp
+++ b/test/unit-distributed/test_mpi.cpp
@@ -1,4 +1,4 @@
-#ifdef ARB_HAVE_MPI
+#ifdef TEST_MPI
 
 #include "../gtest.h"
 #include "test.hpp"
@@ -140,4 +140,4 @@ TEST(mpi, gather) {
     }
 }
 
-#endif // ARB_HAVE_MPI
+#endif // TEST_MPI
diff --git a/test/unit-modcc/CMakeLists.txt b/test/unit-modcc/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..813f4a911605e4c75dcd2b027f8db21b27c0b88c
--- /dev/null
+++ b/test/unit-modcc/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(unit-modcc_sources
+    # unit tests
+    test_lexer.cpp
+    test_kinetic_rewriter.cpp
+    test_module.cpp
+    test_msparse.cpp
+    test_parser.cpp
+    test_prefixbuf.cpp
+    test_printers.cpp
+    test_removelocals.cpp
+    test_simd_backend.cpp
+    test_symdiff.cpp
+    test_symge.cpp
+    test_visitors.cpp
+
+    # unit test driver
+    driver.cpp
+
+    # utility
+    common.cpp
+    expr_expand.cpp
+)
+
+add_executable(unit-modcc ${unit-modcc_sources})
+target_link_libraries(unit-modcc PRIVATE libmodcc gtest)
+target_compile_definitions(unit-modcc PRIVATE "DATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}\"")
+set_target_properties(unit-modcc PROPERTIES EXCLUDE_FROM_ALL ${ARB_WITH_EXTERNAL_MODCC})
diff --git a/tests/modcc/alg_collect.hpp b/test/unit-modcc/alg_collect.hpp
similarity index 100%
rename from tests/modcc/alg_collect.hpp
rename to test/unit-modcc/alg_collect.hpp
diff --git a/tests/modcc/test.cpp b/test/unit-modcc/common.cpp
similarity index 97%
rename from tests/modcc/test.cpp
rename to test/unit-modcc/common.cpp
index cc337703929a31d97739f79c01082419cf34048e..33bb1e93f4dc78ed4d4b63840005372da0062fdf 100644
--- a/tests/modcc/test.cpp
+++ b/test/unit-modcc/common.cpp
@@ -1,7 +1,7 @@
 #include <regex>
 #include <string>
 
-#include "test.hpp"
+#include "common.hpp"
 
 bool g_verbose_flag = false;
 
diff --git a/tests/modcc/test.hpp b/test/unit-modcc/common.hpp
similarity index 100%
rename from tests/modcc/test.hpp
rename to test/unit-modcc/common.hpp
diff --git a/tests/modcc/driver.cpp b/test/unit-modcc/driver.cpp
similarity index 94%
rename from tests/modcc/driver.cpp
rename to test/unit-modcc/driver.cpp
index dcd36906c77fc5485fa3d80b31e3fd7483156436..792c2cb6f897e6eea8e550497d9f8a85f401cb62 100644
--- a/tests/modcc/driver.cpp
+++ b/test/unit-modcc/driver.cpp
@@ -4,7 +4,7 @@
 
 #include <cstring>
 
-#include "test.hpp"
+#include "common.hpp"
 
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
diff --git a/tests/modcc/expr_expand.cpp b/test/unit-modcc/expr_expand.cpp
similarity index 100%
rename from tests/modcc/expr_expand.cpp
rename to test/unit-modcc/expr_expand.cpp
diff --git a/tests/modcc/expr_expand.hpp b/test/unit-modcc/expr_expand.hpp
similarity index 100%
rename from tests/modcc/expr_expand.hpp
rename to test/unit-modcc/expr_expand.hpp
diff --git a/data/test.mod b/test/unit-modcc/test.mod
similarity index 100%
rename from data/test.mod
rename to test/unit-modcc/test.mod
diff --git a/tests/modcc/test_kinetic_rewriter.cpp b/test/unit-modcc/test_kinetic_rewriter.cpp
similarity index 99%
rename from tests/modcc/test_kinetic_rewriter.cpp
rename to test/unit-modcc/test_kinetic_rewriter.cpp
index 62c2feb2e663eb1536579766cc367a96379ac6db..924d63d5f43ad2567313e7584a5941997b3a6580 100644
--- a/tests/modcc/test_kinetic_rewriter.cpp
+++ b/test/unit-modcc/test_kinetic_rewriter.cpp
@@ -6,8 +6,8 @@
 #include "parser.hpp"
 
 #include "alg_collect.hpp"
+#include "common.hpp"
 #include "expr_expand.hpp"
-#include "test.hpp"
 
 expr_list_type& statements(Expression *e) {
     if (e) {
diff --git a/tests/modcc/test_lexer.cpp b/test/unit-modcc/test_lexer.cpp
similarity index 99%
rename from tests/modcc/test_lexer.cpp
rename to test/unit-modcc/test_lexer.cpp
index 91ee7df7d1ddac0f48e6da079e54ed115279f75a..4641ae3d377ca09318a1006cdedee46e46fb48bf 100644
--- a/tests/modcc/test_lexer.cpp
+++ b/test/unit-modcc/test_lexer.cpp
@@ -4,7 +4,7 @@
 #include <iterator>
 #include <utility>
 
-#include "test.hpp"
+#include "common.hpp"
 #include "lexer.hpp"
 
 class VerboseLexer: public Lexer {
diff --git a/tests/modcc/test_module.cpp b/test/unit-modcc/test_module.cpp
similarity index 95%
rename from tests/modcc/test_module.cpp
rename to test/unit-modcc/test_module.cpp
index f1517653f372e37ee0fafa2ef57a8737975007d1..e82f0058350397ea9fe7ee8cc3bd3be9e368b8ce 100644
--- a/tests/modcc/test_module.cpp
+++ b/test/unit-modcc/test_module.cpp
@@ -1,6 +1,6 @@
-#include "test.hpp"
-#include "module.hpp"
+#include "common.hpp"
 #include "io/bulkio.hpp"
+#include "module.hpp"
 
 TEST(Module, open) {
     Module m(io::read_all(DATADIR "/test.mod"), "test.mod");
diff --git a/tests/modcc/test_msparse.cpp b/test/unit-modcc/test_msparse.cpp
similarity index 99%
rename from tests/modcc/test_msparse.cpp
rename to test/unit-modcc/test_msparse.cpp
index 6229e725a3912046cdd70c0fa152d3a5580f5ad7..9b97187f5143e603553a605ce26e75ff9f5515bd 100644
--- a/tests/modcc/test_msparse.cpp
+++ b/test/unit-modcc/test_msparse.cpp
@@ -1,7 +1,7 @@
 #include <utility>
 
+#include "common.hpp"
 #include "msparse.hpp"
-#include "test.hpp"
 
 using drow = msparse::row<double>;
 using dmatrix = msparse::matrix<double>;
diff --git a/tests/modcc/test_parser.cpp b/test/unit-modcc/test_parser.cpp
similarity index 99%
rename from tests/modcc/test_parser.cpp
rename to test/unit-modcc/test_parser.cpp
index 7b263e7975a7ddf359375bf041b7fff9789a1b9d..75ae2a6243d29042e098ec22dc202f49e9c0e40c 100644
--- a/tests/modcc/test_parser.cpp
+++ b/test/unit-modcc/test_parser.cpp
@@ -2,7 +2,7 @@
 #include <cstring>
 #include <memory>
 
-#include "test.hpp"
+#include "common.hpp"
 #include "module.hpp"
 #include "modccutil.hpp"
 #include "parser.hpp"
diff --git a/tests/modcc/test_prefixbuf.cpp b/test/unit-modcc/test_prefixbuf.cpp
similarity index 99%
rename from tests/modcc/test_prefixbuf.cpp
rename to test/unit-modcc/test_prefixbuf.cpp
index a4a90d8c396462c3334174d418481bb724e285f6..14a0c9335518fb638e3f80bf39dc40f44400d686 100644
--- a/tests/modcc/test_prefixbuf.cpp
+++ b/test/unit-modcc/test_prefixbuf.cpp
@@ -4,7 +4,7 @@
 #include <sstream>
 
 #include "io/prefixbuf.hpp"
-#include "test.hpp"
+#include "common.hpp"
 
 using namespace io;
 
diff --git a/tests/modcc/test_printers.cpp b/test/unit-modcc/test_printers.cpp
similarity index 97%
rename from tests/modcc/test_printers.cpp
rename to test/unit-modcc/test_printers.cpp
index 410c0b184a9b99dc212ffbc59bbc184413cf96e3..8ba6a2b8646ffde0e003b7b454acd4592bf86ebe 100644
--- a/tests/modcc/test_printers.cpp
+++ b/test/unit-modcc/test_printers.cpp
@@ -2,7 +2,7 @@
 #include <string>
 #include <sstream>
 
-#include "test.hpp"
+#include "common.hpp"
 
 #include "printer/cexpr_emit.hpp"
 #include "printer/cprinter.hpp"
@@ -42,7 +42,7 @@ TEST(scalar_printer, constants) {
         {"-1./0.",     "-INFINITY"},
         {"(-1)^0.5",   "NAN"},
         {"1/(-1./0.)", "-0."},
-        {"1-1",        "0"},
+        {"1-1",        "0."},
     };
 
     for (const auto& tc: testcases) {
@@ -59,13 +59,13 @@ TEST(scalar_printer, constants) {
 TEST(scalar_printer, statement) {
     std::vector<testcase> testcases = {
         {"y=x+3",            "y=x+3"},
-        {"y=y^z",            "y=std::pow(y,z)"},
+        {"y=y^z",            "y=pow(y,z)"},
         {"y=exp((x/2) + 3)", "y=exp(x/2+3)"},
         {"z=a/b/c",          "z=a/b/c"},
         {"z=a/(b/c)",        "z=a/(b/c)"},
         {"z=(a*b)/c",        "z=a*b/c"},
         {"z=a-(b+c)",        "z=a-(b+c)"},
-        {"z=(a>0)<(b>0)",    "z=a>0<(b>0)"},
+        {"z=(a>0)<(b>0)",    "z=a>0.<(b>0.)"},
         {"z=a- -2",          "z=a- -2"},
         {"z=abs(x-z)",       "z=fabs(x-z)"},
         {"z=min(x,y)",       "z=min(x,y)"},
diff --git a/tests/modcc/test_removelocals.cpp b/test/unit-modcc/test_removelocals.cpp
similarity index 99%
rename from tests/modcc/test_removelocals.cpp
rename to test/unit-modcc/test_removelocals.cpp
index 140de2f18a969e45e1f0933a4ca2a77c8b538565..f0d931ab2e35890c93cc30bbdf8db628ddc78b1d 100644
--- a/tests/modcc/test_removelocals.cpp
+++ b/test/unit-modcc/test_removelocals.cpp
@@ -8,7 +8,7 @@
 #include "parser.hpp"
 #include "scope.hpp"
 
-#include "test.hpp"
+#include "common.hpp"
 
 symbol_ptr make_global(std::string name) {
     return make_symbol<VariableExpression>(Location(), std::move(name));
diff --git a/tests/modcc/test_simd_backend.cpp b/test/unit-modcc/test_simd_backend.cpp
similarity index 98%
rename from tests/modcc/test_simd_backend.cpp
rename to test/unit-modcc/test_simd_backend.cpp
index 8f2ac112a84affb136c191e77a8edca40466e85e..151c58e4d33633cc2276552966f5d469b6d8b600 100644
--- a/tests/modcc/test_simd_backend.cpp
+++ b/test/unit-modcc/test_simd_backend.cpp
@@ -4,7 +4,7 @@
 #include "backends/simd.hpp"
 #include "textbuffer.hpp"
 #include "token.hpp"
-#include "test.hpp"
+#include "common.hpp"
 
 
 TEST(avx512, emit_binary_op) {
diff --git a/tests/modcc/test_symdiff.cpp b/test/unit-modcc/test_symdiff.cpp
similarity index 99%
rename from tests/modcc/test_symdiff.cpp
rename to test/unit-modcc/test_symdiff.cpp
index 49571d3f22bf871be31efeda8b2a0818126d4c9b..9cc1ded5faa5537c4895736f5cb5f8f059e5cc9d 100644
--- a/tests/modcc/test_symdiff.cpp
+++ b/test/unit-modcc/test_symdiff.cpp
@@ -1,6 +1,6 @@
 #include <cmath>
 
-#include "test.hpp"
+#include "common.hpp"
 
 #include "symdiff.hpp"
 #include "parser.hpp"
diff --git a/tests/modcc/test_symge.cpp b/test/unit-modcc/test_symge.cpp
similarity index 99%
rename from tests/modcc/test_symge.cpp
rename to test/unit-modcc/test_symge.cpp
index 7ce77aebb5423e28a5b26ff38117dbc132f80096..525ec3ad09474739c4f50f1e95fe927f6b58e399 100644
--- a/tests/modcc/test_symge.cpp
+++ b/test/unit-modcc/test_symge.cpp
@@ -3,7 +3,7 @@
 #include <vector>
 
 #include "symge.hpp"
-#include "test.hpp"
+#include "common.hpp"
 
 using namespace symge;
 
diff --git a/tests/modcc/test_visitors.cpp b/test/unit-modcc/test_visitors.cpp
similarity index 99%
rename from tests/modcc/test_visitors.cpp
rename to test/unit-modcc/test_visitors.cpp
index 786d8064b9495bab2a930a1c468e6b41760d583d..8a5160015afa6f9a1afc1ba602de8c0d1e876438 100644
--- a/tests/modcc/test_visitors.cpp
+++ b/test/unit-modcc/test_visitors.cpp
@@ -1,4 +1,4 @@
-#include "test.hpp"
+#include "common.hpp"
 
 #include "perfvisitor.hpp"
 #include "parser.hpp"
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..094e03fd0a6b67480d0db708c2b3fddf6818f94e
--- /dev/null
+++ b/test/unit/CMakeLists.txt
@@ -0,0 +1,110 @@
+# TODO: test_mechanism and mechanism prototype comparisons must
+# be re-jigged.
+
+# Build prototype mechanisms for testing in test_mechanisms.
+#
+#include(${PROJECT_SOURCE_DIR}/mechanisms/BuildModules.cmake)
+#
+# set(proto_mechanisms pas hh expsyn exp2syn test_kin1 test_kinlva test_ca)
+# set(mech_proto_dir "${CMAKE_CURRENT_BINARY_DIR}/mech_proto")
+# file(MAKE_DIRECTORY "${mech_proto_dir}")
+# 
+# build_modules(
+#     ${proto_mechanisms}
+#     SOURCE_DIR "${PROJECT_SOURCE_DIR}/mechanisms/mod"
+#     DEST_DIR "${mech_proto_dir}"
+#     MECH_SUFFIX _proto
+#     MODCC_FLAGS -t cpu
+#     GENERATES _cpu.hpp
+#     TARGET build_test_mods
+# )
+
+# Unit test sources
+
+set(unit_sources
+    test_algorithms.cpp
+    test_any.cpp
+    test_backend.cpp
+    test_double_buffer.cpp
+    test_cell.cpp
+    test_compartments.cpp
+    test_counter.cpp
+    test_cycle.cpp
+    test_domain_decomposition.cpp
+    test_either.cpp
+    test_event_binner.cpp
+    test_event_generators.cpp
+    test_event_queue.cpp
+    test_filter.cpp
+    test_fvm_layout.cpp
+    test_fvm_lowered.cpp
+    test_mc_cell_group.cpp
+    test_lexcmp.cpp
+    test_lif_cell_group.cpp
+    test_maputil.cpp
+    test_mask_stream.cpp
+    test_math.cpp
+    test_matrix.cpp
+    test_mechanisms.cpp
+    test_mechcat.cpp
+    test_merge_events.cpp
+    test_multi_event_stream.cpp
+    test_nop.cpp
+    test_optional.cpp
+    test_mechinfo.cpp
+    test_padded.cpp
+    test_partition.cpp
+    test_partition_by_constraint.cpp
+    test_path.cpp
+    test_point.cpp
+    test_probe.cpp
+    test_range.cpp
+    test_segment.cpp
+    test_schedule.cpp
+    test_spike_source.cpp
+    test_local_context.cpp
+    test_simd.cpp
+    test_span.cpp
+    test_spikes.cpp
+    test_spike_store.cpp
+    test_stats.cpp
+    test_strprintf.cpp
+    test_swcio.cpp
+    test_synapses.cpp
+    test_time_seq.cpp
+    test_tree.cpp
+    test_transform.cpp
+    test_uninitialized.cpp
+    test_unique_any.cpp
+    test_vector.cpp
+    test_version.cpp
+
+    # unit test driver
+    test.cpp
+
+    # common routines
+    stats.cpp
+)
+
+if(ARB_WITH_CUDA)
+    list(APPEND unit_sources
+
+        test_intrin.cu
+        test_gpu_stack.cu
+        test_matrix.cu
+        test_matrix_cpuvsgpu.cpp
+        test_reduce_by_key.cu
+        test_vector.cu
+
+        test_mc_cell_group_gpu.cpp
+        test_multi_event_stream_gpu.cpp
+        test_multi_event_stream_gpu.cu
+        test_spikes_gpu.cpp
+    )
+endif()
+
+add_executable(unit ${unit_sources})
+target_compile_options(unit PRIVATE ${CXXOPT_ARCH})
+target_compile_definitions(unit PRIVATE "-DDATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}/swc\"")
+target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers)
+
diff --git a/tests/unit/common.hpp b/test/unit/common.hpp
similarity index 100%
rename from tests/unit/common.hpp
rename to test/unit/common.hpp
diff --git a/tests/unit/instrument_malloc.hpp b/test/unit/instrument_malloc.hpp
similarity index 100%
rename from tests/unit/instrument_malloc.hpp
rename to test/unit/instrument_malloc.hpp
diff --git a/tests/unit/stats.cpp b/test/unit/stats.cpp
similarity index 100%
rename from tests/unit/stats.cpp
rename to test/unit/stats.cpp
diff --git a/tests/unit/stats.hpp b/test/unit/stats.hpp
similarity index 100%
rename from tests/unit/stats.hpp
rename to test/unit/stats.hpp
diff --git a/data/ball_and_stick.swc b/test/unit/swc/ball_and_stick.swc
similarity index 100%
rename from data/ball_and_stick.swc
rename to test/unit/swc/ball_and_stick.swc
diff --git a/data/example.swc b/test/unit/swc/example.swc
similarity index 100%
rename from data/example.swc
rename to test/unit/swc/example.swc
diff --git a/tests/unit/test.cpp b/test/unit/test.cpp
similarity index 100%
rename from tests/unit/test.cpp
rename to test/unit/test.cpp
diff --git a/tests/unit/test_algorithms.cpp b/test/unit/test_algorithms.cpp
similarity index 98%
rename from tests/unit/test_algorithms.cpp
rename to test/unit/test_algorithms.cpp
index a8919dd7170897ea499f4597690933155ff1144c..d0fe5b01927c05462fdba2c6a588f5d0c412e7cb 100644
--- a/tests/unit/test_algorithms.cpp
+++ b/test/unit/test_algorithms.cpp
@@ -6,11 +6,21 @@
 
 #include "../gtest.h"
 
-#include <algorithms.hpp>
-#include <util/compat.hpp>
-#include <util/debug.hpp>
-#include <util/index_into.hpp>
-#include <util/meta.hpp>
+#include <arbor/util/compat.hpp>
+
+#include "algorithms.hpp"
+#include "util/index_into.hpp"
+#include "util/meta.hpp"
+
+// (Pending abstraction of threading interface)
+#include <arbor/version.hpp>
+#if defined(ARB_TBB_ENABLED)
+    #include "threading/tbb.hpp"
+#elif defined(ARB_CTHREAD_ENABLED)
+    #include "threading/cthread.hpp"
+#else
+    #include "threading/serial.hpp"
+#endif
 
 #include "common.hpp"
 
diff --git a/tests/unit/test_any.cpp b/test/unit/test_any.cpp
similarity index 100%
rename from tests/unit/test_any.cpp
rename to test/unit/test_any.cpp
diff --git a/test/unit/test_backend.cpp b/test/unit/test_backend.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5e1c18d4741d1efaa3f24d4e46e02a0f61cdb05e
--- /dev/null
+++ b/test/unit/test_backend.cpp
@@ -0,0 +1,16 @@
+#include <arbor/version.hpp>
+
+#include "backends.hpp"
+#include "fvm_lowered_cell.hpp"
+
+#include "../gtest.h"
+
+using namespace arb;
+
+TEST(backends, gpu_test) {
+#ifdef ARB_GPU_ENABLED
+    EXPECT_NO_THROW(make_fvm_lowered_cell(backend_kind::gpu));
+#else
+    EXPECT_ANY_THROW(make_fvm_lowered_cell(backend_kind::gpu));
+#endif
+}
diff --git a/tests/unit/test_cell.cpp b/test/unit/test_cell.cpp
similarity index 100%
rename from tests/unit/test_cell.cpp
rename to test/unit/test_cell.cpp
diff --git a/tests/unit/test_compartments.cpp b/test/unit/test_compartments.cpp
similarity index 100%
rename from tests/unit/test_compartments.cpp
rename to test/unit/test_compartments.cpp
diff --git a/tests/unit/test_counter.cpp b/test/unit/test_counter.cpp
similarity index 100%
rename from tests/unit/test_counter.cpp
rename to test/unit/test_counter.cpp
diff --git a/tests/unit/test_cycle.cpp b/test/unit/test_cycle.cpp
similarity index 100%
rename from tests/unit/test_cycle.cpp
rename to test/unit/test_cycle.cpp
diff --git a/tests/unit/test_domain_decomposition.cpp b/test/unit/test_domain_decomposition.cpp
similarity index 97%
rename from tests/unit/test_domain_decomposition.cpp
rename to test/unit/test_domain_decomposition.cpp
index 9c3ec59a9bc4f34db45d72ca529ea98c444727f6..0dae331cb3054afcd52c18dcb4a068e96b8d20cb 100644
--- a/tests/unit/test_domain_decomposition.cpp
+++ b/test/unit/test_domain_decomposition.cpp
@@ -2,11 +2,12 @@
 
 #include <stdexcept>
 
-#include <backends.hpp>
-#include <communication/distributed_context.hpp>
-#include <domain_decomposition.hpp>
-#include <hardware/node_info.hpp>
-#include <load_balance.hpp>
+#include <arbor/distributed_context.hpp>
+
+#include "backends.hpp"
+#include "domain_decomposition.hpp"
+#include "hardware/node_info.hpp"
+#include "load_balance.hpp"
 
 #include "../simple_recipes.hpp"
 
diff --git a/tests/unit/test_double_buffer.cpp b/test/unit/test_double_buffer.cpp
similarity index 100%
rename from tests/unit/test_double_buffer.cpp
rename to test/unit/test_double_buffer.cpp
diff --git a/tests/unit/test_either.cpp b/test/unit/test_either.cpp
similarity index 100%
rename from tests/unit/test_either.cpp
rename to test/unit/test_either.cpp
diff --git a/tests/unit/test_event_binner.cpp b/test/unit/test_event_binner.cpp
similarity index 100%
rename from tests/unit/test_event_binner.cpp
rename to test/unit/test_event_binner.cpp
diff --git a/tests/unit/test_event_generators.cpp b/test/unit/test_event_generators.cpp
similarity index 100%
rename from tests/unit/test_event_generators.cpp
rename to test/unit/test_event_generators.cpp
diff --git a/tests/unit/test_event_queue.cpp b/test/unit/test_event_queue.cpp
similarity index 100%
rename from tests/unit/test_event_queue.cpp
rename to test/unit/test_event_queue.cpp
diff --git a/tests/unit/test_filter.cpp b/test/unit/test_filter.cpp
similarity index 100%
rename from tests/unit/test_filter.cpp
rename to test/unit/test_filter.cpp
diff --git a/tests/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp
similarity index 98%
rename from tests/unit/test_fvm_layout.cpp
rename to test/unit/test_fvm_layout.cpp
index bd4774f6f42590b9135654c87078e3dcc913ce2b..d5a96bb53479643112e2f7b23f3ede78bd29d433 100644
--- a/tests/unit/test_fvm_layout.cpp
+++ b/test/unit/test_fvm_layout.cpp
@@ -1,13 +1,14 @@
 #include <vector>
 
-#include <cell.hpp>
-#include <fvm_layout.hpp>
-#include <math.hpp>
-#include <mechcat.hpp>
-#include <util/maputil.hpp>
-#include <util/optional.hpp>
-#include <util/rangeutil.hpp>
-#include <util/span.hpp>
+#include <arbor/util/optional.hpp>
+#include <arbor/mechcat.hpp>
+
+#include "cell.hpp"
+#include "fvm_layout.hpp"
+#include "math.hpp"
+#include "util/maputil.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
 
 #include "common.hpp"
 #include "../common_cells.hpp"
diff --git a/tests/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
similarity index 95%
rename from tests/unit/test_fvm_lowered.cpp
rename to test/unit/test_fvm_lowered.cpp
index 35244ee76f3ac0b71c891b6891668e6afc919a3b..c30bf57ded7d05ceb2e84e1e136d56b4ff2e2c71 100644
--- a/tests/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -2,26 +2,27 @@
 
 #include "../gtest.h"
 
-#include <algorithms.hpp>
-#include <backends/fvm_types.hpp>
-#include <backends/multicore/fvm.hpp>
-#include <backends/multicore/mechanism.hpp>
-#include <communication/distributed_context.hpp>
-#include <cell.hpp>
-#include <common_types.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <fvm_lowered_cell_impl.hpp>
-#include <load_balance.hpp>
-#include <math.hpp>
-#include <simulation.hpp>
-#include <recipe.hpp>
-#include <sampler_map.hpp>
-#include <sampling.hpp>
-#include <schedule.hpp>
-#include <segment.hpp>
-#include <util/meta.hpp>
-#include <util/maputil.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+#include <arbor/distributed_context.hpp>
+#include <arbor/fvm_types.hpp>
+
+#include "algorithms.hpp"
+#include "backends/multicore/fvm.hpp"
+#include "backends/multicore/mechanism.hpp"
+#include "cell.hpp"
+#include "fvm_lowered_cell.hpp"
+#include "fvm_lowered_cell_impl.hpp"
+#include "load_balance.hpp"
+#include "math.hpp"
+#include "simulation.hpp"
+#include "recipe.hpp"
+#include "sampler_map.hpp"
+#include "sampling.hpp"
+#include "schedule.hpp"
+#include "segment.hpp"
+#include "util/meta.hpp"
+#include "util/maputil.hpp"
+#include "util/rangeutil.hpp"
 
 #include "common.hpp"
 #include "../common_cells.hpp"
diff --git a/tests/unit/test_gpu_stack.cu b/test/unit/test_gpu_stack.cu
similarity index 100%
rename from tests/unit/test_gpu_stack.cu
rename to test/unit/test_gpu_stack.cu
diff --git a/tests/unit/test_intrin.cpp b/test/unit/test_intrin.cpp
similarity index 100%
rename from tests/unit/test_intrin.cpp
rename to test/unit/test_intrin.cpp
diff --git a/tests/unit/test_intrin.cu b/test/unit/test_intrin.cu
similarity index 95%
rename from tests/unit/test_intrin.cu
rename to test/unit/test_intrin.cu
index a5203bc940ce9609ac1024cd93547651096689ed..e8d7aa34a043ab5b2cb03be35524ebb212993694 100644
--- a/tests/unit/test_intrin.cu
+++ b/test/unit/test_intrin.cu
@@ -2,12 +2,12 @@
 
 #include <limits>
 
-#include <backends/gpu/cuda_atomic.hpp>
-#include <backends/gpu/math.hpp>
-#include <backends/gpu/managed_ptr.hpp>
-#include <memory/memory.hpp>
-#include <util/rangeutil.hpp>
-#include <util/span.hpp>
+#include "backends/gpu/cuda_atomic.hpp"
+#include "backends/gpu/math_cu.hpp"
+#include "backends/gpu/managed_ptr.hpp"
+#include "memory/memory.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
 
 namespace kernels {
     template <typename T>
diff --git a/tests/unit/test_lexcmp.cpp b/test/unit/test_lexcmp.cpp
similarity index 82%
rename from tests/unit/test_lexcmp.cpp
rename to test/unit/test_lexcmp.cpp
index c37016272df8156faa704d7b424f2c5a38ea03d5..36016a74ecb3aeb32752c1d9eb762d6bebd8184f 100644
--- a/tests/unit/test_lexcmp.cpp
+++ b/test/unit/test_lexcmp.cpp
@@ -1,12 +1,12 @@
 #include "../gtest.h"
 
-#include <util/lexcmp_def.hpp>
+#include <arbor/util/lexcmp_def.hpp>
 
 struct lexcmp_test_one {
     int foo;
 };
 
-DEFINE_LEXICOGRAPHIC_ORDERING(lexcmp_test_one, (a.foo), (b.foo))
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING(lexcmp_test_one, (a.foo), (b.foo))
 
 TEST(lexcmp_def,one) {
     lexcmp_test_one p{3}, q{4}, r{4};
@@ -29,7 +29,7 @@ struct lexcmp_test_three {
 };
 
 // test fields in reverse order: z, y, x
-DEFINE_LEXICOGRAPHIC_ORDERING(lexcmp_test_three, (a.z,a.y,a.x), (b.z,b.y,b.x))
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING(lexcmp_test_three, (a.z,a.y,a.x), (b.z,b.y,b.x))
 
 TEST(lexcmp_def,three) {
     lexcmp_test_three p{1,"foo",2};
@@ -69,7 +69,7 @@ private:
     int foo_;
 };
 
-DEFINE_LEXICOGRAPHIC_ORDERING(lexcmp_test_refmemfn, (a.foo()), (b.foo()))
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING(lexcmp_test_refmemfn, (a.foo()), (b.foo()))
 
 TEST(lexcmp_def,refmemfn) {
     lexcmp_test_refmemfn p{3};
@@ -95,7 +95,7 @@ private:
     int bar_;
 };
 
-DEFINE_LEXICOGRAPHIC_ORDERING_BY_VALUE(lexcmp_test_valmemfn, (a.foo(),a.bar()), (b.foo(),b.bar()))
+ARB_DEFINE_LEXICOGRAPHIC_ORDERING_BY_VALUE(lexcmp_test_valmemfn, (a.foo(),a.bar()), (b.foo(),b.bar()))
 
 TEST(lexcmp_def,proxy) {
     lexcmp_test_valmemfn p{3,2}, q{3,4};
diff --git a/tests/unit/test_lif_cell_group.cpp b/test/unit/test_lif_cell_group.cpp
similarity index 98%
rename from tests/unit/test_lif_cell_group.cpp
rename to test/unit/test_lif_cell_group.cpp
index 7c54b74d16b6e1ecf9a4f08353c1fa05f293db0b..385ce7c716ec1bbc0221a1dab243d7e905659244 100644
--- a/tests/unit/test_lif_cell_group.cpp
+++ b/test/unit/test_lif_cell_group.cpp
@@ -1,6 +1,9 @@
 #include "../gtest.h"
+
+#include <arbor/distributed_context.hpp>
+#include <arbor/threadinfo.hpp>
+
 #include <cell_group_factory.hpp>
-#include <communication/distributed_context.hpp>
 #include <fstream>
 #include <lif_cell_description.hpp>
 #include <lif_cell_group.hpp>
@@ -156,7 +159,7 @@ TEST(lif_cell_group, spikes) {
     path_recipe recipe(2, 1000, 0.1);
 
     hw::node_info nd;
-    nd.num_cpu_cores = threading::num_threads();
+    nd.num_cpu_cores = arb::num_threads();
 
     auto decomp = partition_load_balance(recipe, nd, &context);
     simulation sim(recipe, decomp, &context);
diff --git a/tests/unit/test_local_context.cpp b/test/unit/test_local_context.cpp
similarity index 95%
rename from tests/unit/test_local_context.cpp
rename to test/unit/test_local_context.cpp
index 1b02173d5375d34d58b0ba0cb0ccbc6cd46914b6..7ac207d8950ca98526a6a21b15daa97bc693124b 100644
--- a/tests/unit/test_local_context.cpp
+++ b/test/unit/test_local_context.cpp
@@ -1,8 +1,9 @@
 #include <vector>
 
 #include "../gtest.h"
-#include <communication/distributed_context.hpp>
-#include <spike.hpp>
+
+#include <arbor/distributed_context.hpp>
+#include <arbor/spike.hpp>
 
 // Test that there are no errors constructing a distributed_context from a local_context
 TEST(local_context, construct_distributed_context)
diff --git a/tests/unit/test_maputil.cpp b/test/unit/test_maputil.cpp
similarity index 100%
rename from tests/unit/test_maputil.cpp
rename to test/unit/test_maputil.cpp
diff --git a/tests/unit/test_mask_stream.cpp b/test/unit/test_mask_stream.cpp
similarity index 100%
rename from tests/unit/test_mask_stream.cpp
rename to test/unit/test_mask_stream.cpp
diff --git a/tests/unit/test_math.cpp b/test/unit/test_math.cpp
similarity index 99%
rename from tests/unit/test_math.cpp
rename to test/unit/test_math.cpp
index 88e979ed2e376361a7ec4c96bda0103db60ae6ab..c7c9cd0bbaf3794be0c40f4635971126f0c14057 100644
--- a/tests/unit/test_math.cpp
+++ b/test/unit/test_math.cpp
@@ -3,8 +3,9 @@
 
 #include "../gtest.h"
 
-#include <math.hpp>
-#include <util/compat.hpp>
+#include <arbor/util/compat.hpp>
+
+#include "math.hpp"
 
 using namespace arb::math;
 
diff --git a/tests/unit/test_matrix.cpp b/test/unit/test_matrix.cpp
similarity index 100%
rename from tests/unit/test_matrix.cpp
rename to test/unit/test_matrix.cpp
diff --git a/tests/unit/test_matrix.cu b/test/unit/test_matrix.cu
similarity index 100%
rename from tests/unit/test_matrix.cu
rename to test/unit/test_matrix.cu
diff --git a/tests/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
similarity index 100%
rename from tests/unit/test_matrix_cpuvsgpu.cpp
rename to test/unit/test_matrix_cpuvsgpu.cpp
diff --git a/tests/unit/test_mc_cell_group.cpp b/test/unit/test_mc_cell_group.cpp
similarity index 92%
rename from tests/unit/test_mc_cell_group.cpp
rename to test/unit/test_mc_cell_group.cpp
index 4bef15599e15dd11dda1646cd5a0ba6b2a949dd3..0a0c7d1da55b933481ea4906de72668ec838b59d 100644
--- a/tests/unit/test_mc_cell_group.cpp
+++ b/test/unit/test_mc_cell_group.cpp
@@ -1,11 +1,12 @@
 #include "../gtest.h"
 
-#include <backends.hpp>
-#include <common_types.hpp>
-#include <epoch.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <mc_cell_group.hpp>
-#include <util/rangeutil.hpp>
+#include <arbor/common_types.hpp>
+
+#include "backends.hpp"
+#include "epoch.hpp"
+#include "fvm_lowered_cell.hpp"
+#include "mc_cell_group.hpp"
+#include "util/rangeutil.hpp"
 
 #include "common.hpp"
 #include "../common_cells.hpp"
diff --git a/tests/unit/test_mc_cell_group_gpu.cpp b/test/unit/test_mc_cell_group_gpu.cpp
similarity index 79%
rename from tests/unit/test_mc_cell_group_gpu.cpp
rename to test/unit/test_mc_cell_group_gpu.cpp
index 841db8ed2e9c6e6145e5fe134ec37c6125408a45..8b35751088fd59a884fe1be61daac6e824686cf7 100644
--- a/tests/unit/test_mc_cell_group_gpu.cpp
+++ b/test/unit/test_mc_cell_group_gpu.cpp
@@ -1,10 +1,11 @@
 #include "../gtest.h"
 
-#include <backends.hpp>
-#include <common_types.hpp>
-#include <epoch.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <mc_cell_group.hpp>
+#include <arbor/common_types.hpp>
+
+#include "backends.hpp"
+#include "epoch.hpp"
+#include "fvm_lowered_cell.hpp"
+#include "mc_cell_group.hpp"
 
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
@@ -26,7 +27,7 @@ namespace {
     }
 }
 
-TEST(mc_cell_group, test)
+TEST(mc_cell_group, gpu_test)
 {
     mc_cell_group group({0}, cable1d_recipe(make_cell()), lowered_cell());
     group.advance(epoch(0, 50), 0.01, {});
diff --git a/tests/unit/test_mechanisms.cpp b/test/unit/test_mechanisms.cpp
similarity index 100%
rename from tests/unit/test_mechanisms.cpp
rename to test/unit/test_mechanisms.cpp
diff --git a/tests/unit/test_mechcat.cpp b/test/unit/test_mechcat.cpp
similarity index 98%
rename from tests/unit/test_mechcat.cpp
rename to test/unit/test_mechcat.cpp
index f7f6db969189f922c060ca788d54396a1936b8e6..c6a16a61787ff94b4e2b8fe2610b4c3cbbb664cc 100644
--- a/tests/unit/test_mechcat.cpp
+++ b/test/unit/test_mechcat.cpp
@@ -1,7 +1,7 @@
-#include <backends/fvm_types.hpp>
-#include <mechanism.hpp>
-#include <mechcat.hpp>
-#include <mechinfo.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/mechcat.hpp>
+#include <arbor/mechinfo.hpp>
 
 #include "common.hpp"
 
diff --git a/tests/unit/test_mechinfo.cpp b/test/unit/test_mechinfo.cpp
similarity index 93%
rename from tests/unit/test_mechinfo.cpp
rename to test/unit/test_mechinfo.cpp
index ab8329248fe007cf75901d06ac5ae3de2d42f85c..b5995d11a1efa7445150d5e2d79823ea0298cc90 100644
--- a/tests/unit/test_mechinfo.cpp
+++ b/test/unit/test_mechinfo.cpp
@@ -3,10 +3,8 @@
 #include <vector>
 
 #include <cell.hpp>
-//#include "mechinfo.hpp"
 
 #include "../gtest.h"
-#include "../test_util.hpp"
 
 // TODO: This test is really checking part of the recipe description
 // for cable1d cells, so move it there. Make actual tests for mechinfo
diff --git a/tests/unit/test_merge_events.cpp b/test/unit/test_merge_events.cpp
similarity index 100%
rename from tests/unit/test_merge_events.cpp
rename to test/unit/test_merge_events.cpp
diff --git a/tests/unit/test_multi_event_stream.cpp b/test/unit/test_multi_event_stream.cpp
similarity index 95%
rename from tests/unit/test_multi_event_stream.cpp
rename to test/unit/test_multi_event_stream.cpp
index 8183e57a33e8d04db7d4d0e007391a3203911e84..62d51fb94a152a887f08ea0485feb76812916bdb 100644
--- a/tests/unit/test_multi_event_stream.cpp
+++ b/test/unit/test_multi_event_stream.cpp
@@ -7,7 +7,7 @@
 
 using namespace arb;
 
-namespace common_events {
+namespace {
     // set up four targets across three streams and two mech ids.
 
     constexpr cell_local_size_type mech_1 = 10u;
@@ -28,7 +28,7 @@ namespace common_events {
     // cell_2 (handle 1 and 2) has two events at t=2 and t=5
     // cell_3 (handle 3) has one event at t=3
 
-    std::vector<deliverable_event> events = {
+    std::vector<deliverable_event> common_events = {
         deliverable_event(2.f, handle[1], 2.f),
         deliverable_event(3.f, handle[0], 1.f),
         deliverable_event(3.f, handle[3], 4.f),
@@ -45,12 +45,11 @@ namespace {
 
 TEST(multi_event_stream, init) {
     using multi_event_stream = multicore::multi_event_stream<deliverable_event>;
-    using namespace common_events;
 
     multi_event_stream m(n_cell);
     EXPECT_EQ(n_cell, m.n_streams());
 
-    auto events = common_events::events;
+    auto events = common_events;
     ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
     EXPECT_FALSE(m.empty());
@@ -61,12 +60,11 @@ TEST(multi_event_stream, init) {
 
 TEST(multi_event_stream, mark) {
     using multi_event_stream = multicore::multi_event_stream<deliverable_event>;
-    using namespace common_events;
 
     multi_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
-    auto events = common_events::events;
+    auto events = common_events;
     ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
 
@@ -172,12 +170,11 @@ TEST(multi_event_stream, mark) {
 
 TEST(multi_event_stream, time_if_before) {
     using multi_event_stream = multicore::multi_event_stream<deliverable_event>;
-    using namespace common_events;
 
     multi_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
-    auto events = common_events::events;
+    auto events = common_events;
     ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
 
diff --git a/tests/unit/test_multi_event_stream_gpu.cpp b/test/unit/test_multi_event_stream_gpu.cpp
similarity index 94%
rename from tests/unit/test_multi_event_stream_gpu.cpp
rename to test/unit/test_multi_event_stream_gpu.cpp
index 62bffa02f358760e10e3a841a823ca191fd0f254..f57ca150961dbee0b1745c55b316321db2130834 100644
--- a/tests/unit/test_multi_event_stream_gpu.cpp
+++ b/test/unit/test_multi_event_stream_gpu.cpp
@@ -18,7 +18,7 @@ namespace {
 
 using deliverable_event_stream = gpu::multi_event_stream<deliverable_event>;
 
-namespace common_events {
+namespace {
     // set up four targets across three streams and two mech ids.
 
     constexpr cell_local_size_type mech_1 = 10u;
@@ -39,7 +39,7 @@ namespace common_events {
     // cell_2 (handle 1 and 2) has two events at t=2 and t=5
     // cell_3 (handle 3) has one event at t=3
 
-    std::vector<deliverable_event> events = {
+    std::vector<deliverable_event> common_events = {
         deliverable_event(2.f, handle[1], 2.f),
         deliverable_event(3.f, handle[0], 1.f),
         deliverable_event(3.f, handle[3], 4.f),
@@ -47,13 +47,11 @@ namespace common_events {
     };
 }
 
-TEST(multi_event_stream, init) {
-    using namespace common_events;
-
+TEST(multi_event_stream_gpu, init) {
     deliverable_event_stream m(n_cell);
     EXPECT_EQ(n_cell, m.n_streams());
 
-    auto events = common_events::events;
+    auto events = common_events;
     ASSERT_TRUE(util::is_sorted_by(events, evtime));
     util::stable_sort_by(events, evindex);
     m.init(events);
@@ -83,13 +81,11 @@ std::vector<deliverable_event_data> copy_marked_events(int ci, deliverable_event
     return ev;
 }
 
-TEST(multi_event_stream, mark) {
-    using namespace common_events;
-
+TEST(multi_event_stream_gpu, mark) {
     deliverable_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
-    auto events = common_events::events;
+    auto events = common_events;
     ASSERT_TRUE(util::is_sorted_by(events, evtime));
     util::stable_sort_by(events, evindex);
     m.init(events);
@@ -194,13 +190,11 @@ TEST(multi_event_stream, mark) {
     }
 }
 
-TEST(multi_event_stream, time_if_before) {
-    using namespace common_events;
-
+TEST(multi_event_stream_gpu, time_if_before) {
     deliverable_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
-    auto events = common_events::events;
+    auto events = common_events;
     ASSERT_TRUE(util::is_sorted_by(events, evtime));
     util::stable_sort_by(events, evindex);
     m.init(events);
diff --git a/tests/unit/test_multi_event_stream_gpu.cu b/test/unit/test_multi_event_stream_gpu.cu
similarity index 100%
rename from tests/unit/test_multi_event_stream_gpu.cu
rename to test/unit/test_multi_event_stream_gpu.cu
diff --git a/tests/unit/test_nop.cpp b/test/unit/test_nop.cpp
similarity index 100%
rename from tests/unit/test_nop.cpp
rename to test/unit/test_nop.cpp
diff --git a/tests/unit/test_optional.cpp b/test/unit/test_optional.cpp
similarity index 99%
rename from tests/unit/test_optional.cpp
rename to test/unit/test_optional.cpp
index d19fbbc2720fb32ef1bd0caee81100b0deb0eeba..8759ea8da23de58433cb874bb114f4f3150f0c5c 100644
--- a/tests/unit/test_optional.cpp
+++ b/test/unit/test_optional.cpp
@@ -4,7 +4,9 @@
 #include <typeinfo>
 
 #include "../gtest.h"
-#include "util/optional.hpp"
+
+#include <arbor/util/optional.hpp>
+
 #include "common.hpp"
 
 using namespace arb::util;
diff --git a/tests/unit/test_padded.cpp b/test/unit/test_padded.cpp
similarity index 100%
rename from tests/unit/test_padded.cpp
rename to test/unit/test_padded.cpp
diff --git a/tests/unit/test_partition.cpp b/test/unit/test_partition.cpp
similarity index 97%
rename from tests/unit/test_partition.cpp
rename to test/unit/test_partition.cpp
index e0ce1a673d37aac12756a078d36cf2e54a5378a7..e2f5e5d9e757170bba38938238cec79b7e4d504a 100644
--- a/tests/unit/test_partition.cpp
+++ b/test/unit/test_partition.cpp
@@ -5,7 +5,8 @@
 #include <string>
 #include <vector>
 
-#include <util/debug.hpp>
+#include <arbor/assert.hpp>
+
 #include <util/nop.hpp>
 #include <util/partition.hpp>
 
@@ -47,7 +48,7 @@ TEST(partition, short_partition_view) {
 
 TEST(partition, check_monotonicity) {
     // override any EXPECTS checks in partition
-    util::global_failed_assertion_handler = util::ignore_failed_assertion;
+    arb::global_failed_assertion_handler = arb::ignore_failed_assertion;
 
     int divs_ok[] = {1, 2, 2, 3, 3};
     EXPECT_NO_THROW(util::partition_view(divs_ok).validate());
diff --git a/tests/unit/test_partition_by_constraint.cpp b/test/unit/test_partition_by_constraint.cpp
similarity index 84%
rename from tests/unit/test_partition_by_constraint.cpp
rename to test/unit/test_partition_by_constraint.cpp
index 7b8400af9c4a6fd20fa97b17164f7d8b75a52b2c..149453278227ed2ac12345056e6cdc4b360d7573 100644
--- a/tests/unit/test_partition_by_constraint.cpp
+++ b/test/unit/test_partition_by_constraint.cpp
@@ -5,8 +5,9 @@
 #include <string>
 #include <vector>
 
+#include <arbor/common_types.hpp>
+
 #include <simd/simd.hpp>
-#include <common_types.hpp>
 #include <backends/multicore/multicore_common.hpp>
 #include <backends/multicore/partition_by_constraint.hpp>
 
@@ -30,9 +31,9 @@ TEST(partition_by_constraint, partition_contiguous) {
 
     output = multicore::make_constraint_partition(input_index, input_size_, simd_width_);
 
-    EXPECT_EQ(0, output.independent.size());
-    EXPECT_EQ(0, output.none.size());
-    EXPECT_EQ(0, output.constant.size());
+    EXPECT_EQ(0u, output.independent.size());
+    EXPECT_EQ(0u, output.none.size());
+    EXPECT_EQ(0u, output.constant.size());
     EXPECT_EQ(expected, output.contiguous);
 }
 
@@ -51,14 +52,14 @@ TEST(partition_by_constraint, partition_constant) {
 
     output = multicore::make_constraint_partition(input_index, input_size_, simd_width_);
 
-    EXPECT_EQ(0, output.independent.size());
-    EXPECT_EQ(0, output.none.size());
+    EXPECT_EQ(0u, output.independent.size());
+    EXPECT_EQ(0u, output.none.size());
     if(simd_width_ != 1) {
-        EXPECT_EQ(0, output.contiguous.size());
+        EXPECT_EQ(0u, output.contiguous.size());
         EXPECT_EQ(expected, output.constant);
     }
     else {
-        EXPECT_EQ(0, output.constant.size());
+        EXPECT_EQ(0u, output.constant.size());
         EXPECT_EQ(expected, output.contiguous);
     }
 }
@@ -76,14 +77,14 @@ TEST(partition_by_constraint, partition_independent) {
 
     output = multicore::make_constraint_partition(input_index, input_size_, simd_width_);
 
-    EXPECT_EQ(0, output.constant.size());
-    EXPECT_EQ(0, output.none.size());
+    EXPECT_EQ(0u, output.constant.size());
+    EXPECT_EQ(0u, output.none.size());
     if(simd_width_ != 1) {
-        EXPECT_EQ(0, output.contiguous.size());
+        EXPECT_EQ(0u, output.contiguous.size());
         EXPECT_EQ(expected, output.independent);
     }
     else {
-        EXPECT_EQ(0, output.independent.size());
+        EXPECT_EQ(0u, output.independent.size());
         EXPECT_EQ(expected, output.contiguous);
     }
 }
@@ -101,14 +102,14 @@ TEST(partition_by_constraint, partition_none) {
 
     output = multicore::make_constraint_partition(input_index, input_size_, simd_width_);
 
-    EXPECT_EQ(0, output.independent.size());
-    EXPECT_EQ(0, output.constant.size());
+    EXPECT_EQ(0u, output.independent.size());
+    EXPECT_EQ(0u, output.constant.size());
     if(simd_width_ != 1) {
-        EXPECT_EQ(0, output.contiguous.size());
+        EXPECT_EQ(0u, output.contiguous.size());
         EXPECT_EQ(expected, output.none);
     }
     else {
-        EXPECT_EQ(0, output.none.size());
+        EXPECT_EQ(0u, output.none.size());
         EXPECT_EQ(expected, output.contiguous);
     }
 }
diff --git a/tests/unit/test_path.cpp b/test/unit/test_path.cpp
similarity index 100%
rename from tests/unit/test_path.cpp
rename to test/unit/test_path.cpp
diff --git a/tests/unit/test_point.cpp b/test/unit/test_point.cpp
similarity index 98%
rename from tests/unit/test_point.cpp
rename to test/unit/test_point.cpp
index a5e2300fb285e5a01ea6aa583e66564649b4d7af..2edb08edf3f8d289b0dde2cc110b879c2d4d1e5f 100644
--- a/tests/unit/test_point.cpp
+++ b/test/unit/test_point.cpp
@@ -3,7 +3,7 @@
 
 #include "../gtest.h"
 
-#include "../src/point.hpp"
+#include "point.hpp"
 
 using namespace arb;
 
diff --git a/tests/unit/test_probe.cpp b/test/unit/test_probe.cpp
similarity index 98%
rename from tests/unit/test_probe.cpp
rename to test/unit/test_probe.cpp
index 92ff46dd5e33c7feb1f7487c77fc5d780ee46eb3..6064a02b8bcc34cb620a691479b04d57150cc6d8 100644
--- a/tests/unit/test_probe.cpp
+++ b/test/unit/test_probe.cpp
@@ -1,8 +1,9 @@
 #include "../gtest.h"
 
+#include <arbor/common_types.hpp>
+
 #include <backends/event.hpp>
 #include <backends/multicore/fvm.hpp>
-#include <common_types.hpp>
 #include <cell.hpp>
 #include <fvm_lowered_cell_impl.hpp>
 #include <util/rangeutil.hpp>
diff --git a/tests/unit/test_range.cpp b/test/unit/test_range.cpp
similarity index 100%
rename from tests/unit/test_range.cpp
rename to test/unit/test_range.cpp
diff --git a/tests/unit/test_reduce_by_key.cu b/test/unit/test_reduce_by_key.cu
similarity index 100%
rename from tests/unit/test_reduce_by_key.cu
rename to test/unit/test_reduce_by_key.cu
diff --git a/tests/unit/test_schedule.cpp b/test/unit/test_schedule.cpp
similarity index 99%
rename from tests/unit/test_schedule.cpp
rename to test/unit/test_schedule.cpp
index 504d50cf7172bd40216c41ce31c2f1801a3e64da..6f08d2bc79acc9dff6e1d1ab66a3179a7c2bccb1 100644
--- a/tests/unit/test_schedule.cpp
+++ b/test/unit/test_schedule.cpp
@@ -3,7 +3,8 @@
 #include <stdexcept>
 #include <vector>
 
-#include <common_types.hpp>
+#include <arbor/common_types.hpp>
+
 #include <schedule.hpp>
 #include <util/partition.hpp>
 #include <util/rangeutil.hpp>
diff --git a/tests/unit/test_segment.cpp b/test/unit/test_segment.cpp
similarity index 98%
rename from tests/unit/test_segment.cpp
rename to test/unit/test_segment.cpp
index 574e5ad9fd476c077abd085224b34d728688e6b7..fc984ee469b808feae09a7fedafd2479ba33d447 100644
--- a/tests/unit/test_segment.cpp
+++ b/test/unit/test_segment.cpp
@@ -2,7 +2,7 @@
 
 #include "../gtest.h"
 
-#include "../src/segment.hpp"
+#include "segment.hpp"
 
 TEST(segments, soma)
 {
diff --git a/tests/unit/test_simd.cpp b/test/unit/test_simd.cpp
similarity index 99%
rename from tests/unit/test_simd.cpp
rename to test/unit/test_simd.cpp
index 15589290161cfafe5f0ccc56259d830c3bae7422..dcb102a14938fa2fc4fb43ba77632dad3e50f380 100644
--- a/tests/unit/test_simd.cpp
+++ b/test/unit/test_simd.cpp
@@ -8,7 +8,6 @@
 #include <simd/simd.hpp>
 #include <simd/avx.hpp>
 
-#include <common_types.hpp>
 #include "common.hpp"
 
 using namespace arb::simd;
diff --git a/tests/unit/test_span.cpp b/test/unit/test_span.cpp
similarity index 100%
rename from tests/unit/test_span.cpp
rename to test/unit/test_span.cpp
diff --git a/tests/unit/test_spike_source.cpp b/test/unit/test_spike_source.cpp
similarity index 99%
rename from tests/unit/test_spike_source.cpp
rename to test/unit/test_spike_source.cpp
index 7e9939b46fa84c834e6006ae365f7837f6ea8dfb..28a327abebe973b72ea0e8c3064f3dfb5e0ac0d9 100644
--- a/tests/unit/test_spike_source.cpp
+++ b/test/unit/test_spike_source.cpp
@@ -1,5 +1,6 @@
 #include "../gtest.h"
 
+#include <spike_source_cell.hpp>
 #include <spike_source_cell_group.hpp>
 #include <time_sequence.hpp>
 #include <util/unique_any.hpp>
diff --git a/tests/unit/test_spike_store.cpp b/test/unit/test_spike_store.cpp
similarity index 95%
rename from tests/unit/test_spike_store.cpp
rename to test/unit/test_spike_store.cpp
index a0b9a7589e93a1d8bce04aac7f3de26d4de68fbe..9a526cc24c8db4d083a175fa6d92a8db48de5f53 100644
--- a/tests/unit/test_spike_store.cpp
+++ b/test/unit/test_spike_store.cpp
@@ -1,8 +1,8 @@
 #include "../gtest.h"
 
-#include <spike.hpp>
-#include <threading/threading.hpp>
-#include <thread_private_spike_store.hpp>
+#include <arbor/spike.hpp>
+
+#include "thread_private_spike_store.hpp"
 
 using arb::spike;
 
diff --git a/tests/unit/test_spikes.cpp b/test/unit/test_spikes.cpp
similarity index 97%
rename from tests/unit/test_spikes.cpp
rename to test/unit/test_spikes.cpp
index 2cc3850917915bb5cabc508b5011b442a457abd7..656f05001d69e69b18853c7c69a3a8fa1f320445 100644
--- a/tests/unit/test_spikes.cpp
+++ b/test/unit/test_spikes.cpp
@@ -1,6 +1,7 @@
 #include "../gtest.h"
 
-#include <spike.hpp>
+#include <arbor/spike.hpp>
+
 #include <backends/multicore/fvm.hpp>
 #include <memory/memory.hpp>
 #include <util/rangeutil.hpp>
@@ -13,11 +14,13 @@ using namespace arb;
 
 #ifndef USE_BACKEND
 using backend = multicore::backend;
+#define SPIKES_TEST_CLASS spikes
 #else
 using backend = USE_BACKEND;
+#define SPIKES_TEST_CLASS spikes_gpu
 #endif
 
-TEST(spikes, threshold_watcher) {
+TEST(SPIKES_TEST_CLASS, threshold_watcher) {
     using value_type = backend::value_type;
     using index_type = backend::index_type;
     using array = backend::array;
diff --git a/tests/unit/test_spikes_gpu.cpp b/test/unit/test_spikes_gpu.cpp
similarity index 100%
rename from tests/unit/test_spikes_gpu.cpp
rename to test/unit/test_spikes_gpu.cpp
diff --git a/tests/unit/test_stats.cpp b/test/unit/test_stats.cpp
similarity index 100%
rename from tests/unit/test_stats.cpp
rename to test/unit/test_stats.cpp
diff --git a/tests/unit/test_strprintf.cpp b/test/unit/test_strprintf.cpp
similarity index 100%
rename from tests/unit/test_strprintf.cpp
rename to test/unit/test_strprintf.cpp
diff --git a/tests/unit/test_swcio.cpp b/test/unit/test_swcio.cpp
similarity index 99%
rename from tests/unit/test_swcio.cpp
rename to test/unit/test_swcio.cpp
index 6f415c39f13e2ef902982112062f85f310cf68fc..c27318b7ab7ddf175b83685e8ea1e5c87f6adc94 100644
--- a/tests/unit/test_swcio.cpp
+++ b/test/unit/test_swcio.cpp
@@ -9,8 +9,8 @@
 
 #include "../gtest.h"
 
-#include "../src/cell.hpp"
-#include "../src/swcio.hpp"
+#include "cell.hpp"
+#include "swcio.hpp"
 
 // Path to data directory can be overriden at compile time.
 #if !defined(DATADIR)
diff --git a/tests/unit/test_synapses.cpp b/test/unit/test_synapses.cpp
similarity index 98%
rename from tests/unit/test_synapses.cpp
rename to test/unit/test_synapses.cpp
index a5559409e5af8f77368fd9d80fbaaa437d9112d7..08a1422135030a238bd2cc535e9164b5b852b961 100644
--- a/tests/unit/test_synapses.cpp
+++ b/test/unit/test_synapses.cpp
@@ -4,17 +4,17 @@
 #include <tuple>
 #include <vector>
 
+#include <arbor/util/optional.hpp>
+#include <arbor/mechcat.hpp>
+
 #include <cell.hpp>
 #include <constants.hpp>
-#include <mechcat.hpp>
 #include <backends/multicore/fvm.hpp>
 #include <backends/multicore/mechanism.hpp>
-#include <util/optional.hpp>
 #include <util/maputil.hpp>
 #include <util/range.hpp>
 
 #include "common.hpp"
-#include "../test_util.hpp"
 
 using namespace arb;
 
diff --git a/tests/unit/test_time_seq.cpp b/test/unit/test_time_seq.cpp
similarity index 100%
rename from tests/unit/test_time_seq.cpp
rename to test/unit/test_time_seq.cpp
diff --git a/tests/unit/test_transform.cpp b/test/unit/test_transform.cpp
similarity index 100%
rename from tests/unit/test_transform.cpp
rename to test/unit/test_transform.cpp
diff --git a/tests/unit/test_tree.cpp b/test/unit/test_tree.cpp
similarity index 96%
rename from tests/unit/test_tree.cpp
rename to test/unit/test_tree.cpp
index a6426ce964373cce04bdec6f87453ff51f13213d..9fea9278f3a6d265560d7c24671a78379483ad85 100644
--- a/tests/unit/test_tree.cpp
+++ b/test/unit/test_tree.cpp
@@ -1,25 +1,15 @@
 #include <fstream>
 #include <iostream>
-#include <json/json.hpp>
 #include <numeric>
 #include <vector>
 
 #include "../gtest.h"
 
 #include <tree.hpp>
-#include <util/debug.hpp>
-
-// Path to data directory can be overriden at compile time.
-#if !defined(DATADIR)
-#define DATADIR "../data"
-#endif
-
-using json = nlohmann::json;
 
 using namespace arb;
 using int_type = tree::int_type;
 
-
 TEST(tree, from_segment_index) {
     auto no_parent = tree::no_parent;
 
diff --git a/tests/unit/test_uninitialized.cpp b/test/unit/test_uninitialized.cpp
similarity index 98%
rename from tests/unit/test_uninitialized.cpp
rename to test/unit/test_uninitialized.cpp
index 59683b2e11138fb15a74ee97c14c084953c6f289..dbb06d8033c7f61de080fea0f14db7135bc97a6c 100644
--- a/tests/unit/test_uninitialized.cpp
+++ b/test/unit/test_uninitialized.cpp
@@ -1,6 +1,7 @@
 #include "../gtest.h"
 
-#include "util/uninitialized.hpp"
+#include <arbor/util/uninitialized.hpp>
+
 #include "common.hpp"
 
 using namespace arb::util;
diff --git a/tests/unit/test_unique_any.cpp b/test/unit/test_unique_any.cpp
similarity index 100%
rename from tests/unit/test_unique_any.cpp
rename to test/unit/test_unique_any.cpp
diff --git a/tests/unit/test_vector.cpp b/test/unit/test_vector.cpp
similarity index 100%
rename from tests/unit/test_vector.cpp
rename to test/unit/test_vector.cpp
diff --git a/tests/unit/test_vector.cu b/test/unit/test_vector.cu
similarity index 100%
rename from tests/unit/test_vector.cu
rename to test/unit/test_vector.cu
diff --git a/test/unit/test_version.cpp b/test/unit/test_version.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cdf4230451befa58ed49f1e869b85cdf142429e3
--- /dev/null
+++ b/test/unit/test_version.cpp
@@ -0,0 +1,18 @@
+#include <string>
+
+#include "../gtest.h"
+
+#include <arbor/version.hpp>
+
+TEST(version, libmatch) {
+    using std::string;
+
+    string header_version = ARB_VERSION;
+    string header_source_id = ARB_SOURCE_ID;
+
+    string lib_version = arb::version;
+    string lib_source_id = arb::source_id;
+
+    EXPECT_EQ(header_version, lib_version);
+    EXPECT_EQ(header_source_id, lib_source_id);
+}
diff --git a/test/validation/CMakeLists.txt b/test/validation/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..740ce9ad33d62dc1ac5255f25744bad19725acaa
--- /dev/null
+++ b/test/validation/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(validation_sources
+    # unit tests
+    validate_ball_and_stick.cpp
+    validate_compartment_policy.cpp
+    validate_soma.cpp
+    validate_kinetic.cpp
+    validate_synapses.cpp
+
+    # support code
+    validation_data.cpp
+    trace_analysis.cpp
+
+    # unit test driver
+    validate.cpp
+)
+
+add_executable(validate ${validation_sources})
+target_compile_definitions(validate PRIVATE "ARB_DATADIR=\"${ARB_VALIDATION_DATA_DIR}\"")
+target_link_libraries(validate PRIVATE gtest arbor arbor-aux ext-json)
+target_link_libraries(validate PRIVATE arbor-private-headers) # temporary
+
+if(ARB_BUILD_VALIDATION_DATA)
+    add_dependencies(validate validation_data)
+endif()
diff --git a/tests/validation/convergence_test.hpp b/test/validation/convergence_test.hpp
similarity index 99%
rename from tests/validation/convergence_test.hpp
rename to test/validation/convergence_test.hpp
index 610d4abb5552e3675c0d4f16b826312b30e6beb0..8ee744c23645a771d74ce9a46bcfa99840235ee3 100644
--- a/tests/validation/convergence_test.hpp
+++ b/test/validation/convergence_test.hpp
@@ -2,6 +2,8 @@
 
 #include <vector>
 
+#include <nlohmann/json.hpp>
+
 #include <simulation.hpp>
 #include <schedule.hpp>
 #include <sampling.hpp>
@@ -9,7 +11,6 @@
 #include <util/filter.hpp>
 #include <util/rangeutil.hpp>
 
-#include <json/json.hpp>
 
 #include "../gtest.h"
 
diff --git a/tests/validation/make_image.sh b/test/validation/make_image.sh
similarity index 100%
rename from tests/validation/make_image.sh
rename to test/validation/make_image.sh
diff --git a/tests/validation/plot.py b/test/validation/plot.py
similarity index 100%
rename from tests/validation/plot.py
rename to test/validation/plot.py
diff --git a/tests/validation/trace_analysis.cpp b/test/validation/trace_analysis.cpp
similarity index 97%
rename from tests/validation/trace_analysis.cpp
rename to test/validation/trace_analysis.cpp
index 685699835a851e15c668cccb4ef7f3a695f1fe90..5e0268c3581992f0fd597d7713fc4b8621449610 100644
--- a/tests/validation/trace_analysis.cpp
+++ b/test/validation/trace_analysis.cpp
@@ -2,13 +2,14 @@
 #include <fstream>
 #include <string>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
 
 #include "../gtest.h"
 
+#include <arbor/util/optional.hpp>
+
 #include <math.hpp>
 #include <simple_sampler.hpp>
-#include <util/optional.hpp>
 #include <util/partition.hpp>
 #include <util/rangeutil.hpp>
 
@@ -31,7 +32,7 @@ struct trace_interpolant {
 
         auto part = util::partition_view(tx);
         auto i = part.index(t);
-        EXPECTS(i != part.npos);
+        arb_assert(i != part.npos);
         auto p = part[i];
         return math::lerp(vx[i], vx[i+1], (t-p.first)/(p.second-p.first));
     }
diff --git a/tests/validation/trace_analysis.hpp b/test/validation/trace_analysis.hpp
similarity index 98%
rename from tests/validation/trace_analysis.hpp
rename to test/validation/trace_analysis.hpp
index 78a19b5949a93427a8c5113f9bdbf69b1703fd9a..26f2823b0ee14318805400a041468056b2ea0d85 100644
--- a/tests/validation/trace_analysis.hpp
+++ b/test/validation/trace_analysis.hpp
@@ -4,9 +4,10 @@
 
 #include "../gtest.h"
 
+#include <arbor/util/optional.hpp>
+
 #include <simple_sampler.hpp>
 #include <math.hpp>
-#include <util/optional.hpp>
 #include <util/path.hpp>
 #include <util/rangeutil.hpp>
 
diff --git a/tests/validation/validate.cpp b/test/validation/validate.cpp
similarity index 100%
rename from tests/validation/validate.cpp
rename to test/validation/validate.cpp
diff --git a/tests/validation/validate_ball_and_stick.cpp b/test/validation/validate_ball_and_stick.cpp
similarity index 98%
rename from tests/validation/validate_ball_and_stick.cpp
rename to test/validation/validate_ball_and_stick.cpp
index d7365330ddb6c6d9c819102e9b526c95c8199937..e2b6a39de8301ce3ccdbd0a17f543f99f40f15f2 100644
--- a/tests/validation/validate_ball_and_stick.cpp
+++ b/test/validation/validate_ball_and_stick.cpp
@@ -1,7 +1,9 @@
 #include <iostream>
 
+#include <arbor/common_types.hpp>
+#include <nlohmann/json.hpp>
+
 #include <cell.hpp>
-#include <common_types.hpp>
 #include <load_balance.hpp>
 #include <hardware/node_info.hpp>
 #include <hardware/gpu.hpp>
@@ -11,7 +13,6 @@
 #include <simple_sampler.hpp>
 #include <util/meta.hpp>
 #include <util/path.hpp>
-#include <json/json.hpp>
 
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
diff --git a/tests/validation/validate_compartment_policy.cpp b/test/validation/validate_compartment_policy.cpp
similarity index 97%
rename from tests/validation/validate_compartment_policy.cpp
rename to test/validation/validate_compartment_policy.cpp
index 27c944ed07eab92a0d3bcc8269208d5a95a9784e..d0543384186bff204ccb7a702d71aaea26189cf3 100644
--- a/tests/validation/validate_compartment_policy.cpp
+++ b/test/validation/validate_compartment_policy.cpp
@@ -1,9 +1,10 @@
 #include <fstream>
 #include <utility>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
+
+#include <arbor/common_types.hpp>
 
-#include <common_types.hpp>
 #include <cell.hpp>
 #include <simulation.hpp>
 #include <recipe.hpp>
@@ -14,7 +15,6 @@
 
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
-#include "../test_util.hpp"
 
 #include "trace_analysis.hpp"
 #include "validation_data.hpp"
diff --git a/tests/validation/validate_kinetic.cpp b/test/validation/validate_kinetic.cpp
similarity index 98%
rename from tests/validation/validate_kinetic.cpp
rename to test/validation/validate_kinetic.cpp
index 0fe78a426252f27daed6ff8b0915cdc2bde54e13..54fc750ba3a3f6c9e12bd319f835692b9c727794 100644
--- a/tests/validation/validate_kinetic.cpp
+++ b/test/validation/validate_kinetic.cpp
@@ -1,8 +1,9 @@
 #include "../gtest.h"
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
+
+#include <arbor/common_types.hpp>
 
-#include <common_types.hpp>
 #include <cell.hpp>
 #include <hardware/node_info.hpp>
 #include <hardware/gpu.hpp>
diff --git a/tests/validation/validate_soma.cpp b/test/validation/validate_soma.cpp
similarity index 96%
rename from tests/validation/validate_soma.cpp
rename to test/validation/validate_soma.cpp
index 7ccfb7580359d7c1c2d6040d621cb11f43fee6ea..2beb50bc4204238960ad056393f7160b7ce294e1 100644
--- a/tests/validation/validate_soma.cpp
+++ b/test/validation/validate_soma.cpp
@@ -1,6 +1,7 @@
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
+
+#include <arbor/common_types.hpp>
 
-#include <common_types.hpp>
 #include <cell.hpp>
 #include <hardware/gpu.hpp>
 #include <hardware/node_info.hpp>
diff --git a/tests/validation/validate_synapses.cpp b/test/validation/validate_synapses.cpp
similarity index 99%
rename from tests/validation/validate_synapses.cpp
rename to test/validation/validate_synapses.cpp
index 9a2eb22e16b442bfd9365301a0d864feb6e4d71e..2d0aeab8cc2989a9bf310ec4011bfc30c573f968 100644
--- a/tests/validation/validate_synapses.cpp
+++ b/test/validation/validate_synapses.cpp
@@ -1,8 +1,9 @@
+#include <nlohmann/json.hpp>
+
 #include <cell.hpp>
 #include <cell_group.hpp>
 #include <hardware/node_info.hpp>
 #include <hardware/gpu.hpp>
-#include <json/json.hpp>
 #include <load_balance.hpp>
 #include <simulation.hpp>
 #include <recipe.hpp>
diff --git a/tests/validation/validation_data.cpp b/test/validation/validation_data.cpp
similarity index 99%
rename from tests/validation/validation_data.cpp
rename to test/validation/validation_data.cpp
index 8e9250fe3a81e77b3361d0e1c8a63c0811ec708b..5426a57bd72fe5c5ac39380b55b701de2f70f468 100644
--- a/tests/validation/validation_data.cpp
+++ b/test/validation/validation_data.cpp
@@ -4,7 +4,7 @@
 #include <stdexcept>
 #include <string>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
 
 #include <simple_sampler.hpp>
 #include <util/path.hpp>
diff --git a/tests/validation/validation_data.hpp b/test/validation/validation_data.hpp
similarity index 98%
rename from tests/validation/validation_data.hpp
rename to test/validation/validation_data.hpp
index ed2cb09ff76bd3c5d8829e6b99bd746f8d87f477..84056ebba100574cf17ad23866368e31d503473e 100644
--- a/tests/validation/validation_data.hpp
+++ b/test/validation/validation_data.hpp
@@ -5,7 +5,7 @@
 #include <string>
 #include <utility>
 
-#include <json/json.hpp>
+#include <nlohmann/json.hpp>
 
 #include <simple_sampler.hpp>
 #include <util/path.hpp>
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
deleted file mode 100644
index bd115d75f26319145740c17131a6e90fb241660d..0000000000000000000000000000000000000000
--- a/tests/CMakeLists.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-# google test framework
-add_library(gtest gtest-all.cpp)
-
-# Unit tests
-add_subdirectory(unit)
-
-# Test validating models, possebly needing other software installed
-add_subdirectory(validation)
-
-# Test for the internode communication (eg. mpi)
-add_subdirectory(global_communication)
-
-# Microbenchmarks.
-# Attempt to update git submodule if required.
-check_git_submodule(google_bench "${CMAKE_CURRENT_SOURCE_DIR}/ubench/google-benchmark")
-if (google_bench_avail)
-    add_subdirectory(ubench)
-else()
-    add_error_target(ubenches
-        "Building micro benchmarks"
-        "The git submodule for google benchmark is not available")
-endif()
-
-# regression / delta tests
-# Employing the full simulator. validated using deltas on output data
-
-# modcc tests
-if(NOT use_external_modcc)
-    add_subdirectory(modcc)
-endif()
-
-
-# Proposed additional test types:
-
-# Test to check integration between components
-
-# Numbered tests based on bugs in the tracker
diff --git a/tests/global_communication/CMakeLists.txt b/tests/global_communication/CMakeLists.txt
deleted file mode 100644
index bd7c32d235417a85d7c27de34255c1cde0cc3724..0000000000000000000000000000000000000000
--- a/tests/global_communication/CMakeLists.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-set(HEADERS
-    ${PROJECT_SOURCE_DIR}/src/swcio.hpp
-)
-set(COMMUNICATION_SOURCES
-    test_domain_decomposition.cpp
-    test_exporter_spike_file.cpp
-    test_communicator.cpp
-    test_mpi.cpp
-
-    # unit test driver
-    test.cpp
-)
-
-add_executable(global_communication.exe ${COMMUNICATION_SOURCES} ${HEADERS})
-
-set(TARGETS global_communication.exe)
-
-foreach(target ${TARGETS})
-    target_link_libraries(${target} LINK_PUBLIC gtest)
-    target_link_libraries(${target} LINK_PUBLIC ${ARB_LIBRARIES})
-    target_link_libraries(${target} LINK_PUBLIC ${EXTERNAL_LIBRARIES})
-
-    if(ARB_WITH_MPI)
-        target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES})
-        set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-    endif()
-
-    set_target_properties(
-        ${target}
-        PROPERTIES
-        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests"
-    )
-endforeach()
-
diff --git a/tests/global_communication/mpi_listener.hpp b/tests/global_communication/mpi_listener.hpp
deleted file mode 100644
index 0226666f8de16f9a9a9782f82ac6f68a009bc39e..0000000000000000000000000000000000000000
--- a/tests/global_communication/mpi_listener.hpp
+++ /dev/null
@@ -1,159 +0,0 @@
-#pragma once
-
-#include <cstdio>
-#include <fstream>
-#include <stdexcept>
-
-#include <communication/distributed_context.hpp>
-
-#include "../gtest.h"
-
-/// A specialized listener desinged for printing test results with MPI.
-///
-/// When tests are run with MPI, one instance of each test is run on
-/// each rank. The default behavior of Google Test is for each test
-/// instance to print to stdout. With more than one MPI rank, this creates
-/// the usual MPI mess of output.
-///
-/// This specialization has the first rank (rank 0) print to stdout, and all MPI
-/// ranks print their output to separate text files.
-/// For each test a message is printed showing
-///     - detailed messages about errors on rank 0
-///     - a head count of errors that occured on other MPI ranks
-
-class mpi_listener : public testing::EmptyTestEventListener {
-private:
-    using UnitTest = testing::UnitTest;
-    using TestCase = testing::TestCase;
-    using TestInfo = testing::TestInfo;
-    using TestPartResult = testing::TestPartResult;
-
-    int rank_;
-    int size_;
-    std::ofstream fid_;
-    char buffer_[1024];
-    int test_case_failures_;
-    int test_case_tests_;
-    int test_failures_;
-    const arb::distributed_context* context_;
-
-    bool does_print() const {
-        return rank_==0;
-    }
-
-    void print(const char* s) {
-        if (fid_) {
-            fid_ << s;
-        }
-        if (does_print()) {
-            std::cout << s;
-        }
-    }
-
-    void print(const std::string& s) {
-        print(s.c_str());
-    }
-
-    /// convenience function that handles the logic of using snprintf
-    /// and forwarding the results to file and/or stdout.
-    ///
-    /// TODO : it might be an idea to use a resizeable buffer
-    template <typename... Args>
-    void printf_helper(const char* s, Args&&... args) {
-        std::snprintf(buffer_, sizeof(buffer_), s, std::forward<Args>(args)...);
-        print(buffer_);
-    }
-
-public:
-    mpi_listener(std::string f_base, const arb::distributed_context* ctx): context_(ctx) {
-        rank_ = context_->id();
-        size_ = context_->size();
-
-        if (f_base.empty()) {
-            return;
-        }
-        std::string fname = f_base + "_" + std::to_string(rank_) + ".txt";
-        fid_.open(fname);
-        if (!fid_) {
-            throw std::runtime_error("could not open file " + fname + " for test output");
-        }
-    }
-
-    /// Messages that are printed at the start and end of the test program.
-    /// i.e. once only.
-    virtual void OnTestProgramStart(const UnitTest&) override {
-        printf_helper("*** test output for rank %d of %d\n\n", rank_, size_);
-    }
-    virtual void OnTestProgramEnd(const UnitTest&) override {
-        printf_helper("*** end test output for rank %d of %d\n", rank_, size_);
-    }
-
-    /// Messages that are printed at the start and end of each test case.
-    /// On startup a counter that counts the number of tests that fail in
-    /// this test case is initialized to zero, and will be incremented for each
-    /// test that fails.
-    virtual void OnTestCaseStart(const TestCase& test_case) override {
-        test_case_failures_ = 0;
-        test_case_tests_ = 0;
-    }
-    virtual void OnTestCaseEnd(const TestCase& test_case) override {
-        printf_helper(
-            "    PASSED %d of %d tests in %s\n",
-            test_case_tests_-test_case_failures_,
-            test_case_tests_,
-            test_case.name()
-        );
-        if (test_case_failures_>0) {
-            printf_helper(
-                "    FAILED %d of %d tests in %s\n",
-                test_case_failures_,
-                test_case_tests_,
-                test_case.name()
-            );
-        }
-        print("\n");
-    }
-
-    // Called before a test starts.
-    virtual void OnTestStart(const TestInfo& test_info) override {
-        printf_helper( "TEST:  %s::%s\n", test_info.test_case_name(), test_info.name());
-        test_failures_ = 0;
-    }
-
-    // Called after a failed assertion or a SUCCEED() invocation.
-    virtual void OnTestPartResult(const TestPartResult& test_part_result) override {
-        // indent all lines in the summary by 4 spaces
-        std::string summary = "    " + std::string(test_part_result.summary());
-        auto pos = summary.find("\n");
-        while (pos!=summary.size() && pos!=std::string::npos) {
-            summary.replace(pos, 1, "\n    ");
-            pos = summary.find("\n", pos+1);
-        }
-
-        printf_helper(
-            "  LOCAL_%s\n    %s:%d\n%s\n",
-            test_part_result.failed() ? "FAIL" : "SUCCESS",
-            test_part_result.file_name(),
-            test_part_result.line_number(),
-            summary.c_str()
-        );
-
-        // note that there was a failure in this test case
-        if (test_part_result.failed()) {
-            test_failures_++;
-        }
-    }
-
-    // Called after a test ends.
-    virtual void OnTestEnd(const TestInfo& test_info) override {
-        test_case_tests_++;
-
-        // count the number of ranks that had errors
-        int global_errors = context_->sum(test_failures_>0 ? 1 : 0);
-        if (global_errors>0) {
-            test_case_failures_++;
-            printf_helper("  GLOBAL_FAIL on %d ranks\n", global_errors);
-        }
-    }
-};
-
diff --git a/tests/modcc/CMakeLists.txt b/tests/modcc/CMakeLists.txt
deleted file mode 100644
index 64d5e06c04789a5da97178be469bb21ace5e4ba7..0000000000000000000000000000000000000000
--- a/tests/modcc/CMakeLists.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-set(MODCC_TEST_SOURCES
-    # unit tests
-    test_lexer.cpp
-    test_kinetic_rewriter.cpp
-    test_module.cpp
-    test_msparse.cpp
-    test_parser.cpp
-    test_prefixbuf.cpp
-    test_printers.cpp
-    test_removelocals.cpp
-    test_symdiff.cpp
-    test_symge.cpp
-    test_visitors.cpp
-
-    # unit test driver
-    driver.cpp
-
-    # utility
-    expr_expand.cpp
-
-    test_simd_backend.cpp
-    test.cpp
-)
-
-include_directories("${PROJECT_SOURCE_DIR}/modcc")
-
-add_definitions("-DDATADIR=\"${PROJECT_SOURCE_DIR}/data\"")
-add_executable(test_modcc ${MODCC_TEST_SOURCES})
-
-target_link_libraries(test_modcc LINK_PUBLIC compiler gtest)
-
-set_target_properties(test_modcc
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests"
-)
diff --git a/tests/test_util.hpp b/tests/test_util.hpp
deleted file mode 100644
index 1faf8ef3f216a37ea2683385ce1ff2de21d4cf12..0000000000000000000000000000000000000000
--- a/tests/test_util.hpp
+++ /dev/null
@@ -1,146 +0,0 @@
-#include <chrono>
-#include <cmath>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <string>
-#include <vector>
-
-#include <json/json.hpp>
-
-// helpful code for running tests
-// a bit messy: refactor when it gets heavier and obvious patterns emerge...
-
-namespace testing{
-
-using time_point    = std::chrono::time_point<std::chrono::system_clock>;
-using duration_type = std::chrono::duration<double>;
-
-static inline
-time_point tic()
-{
-    return std::chrono::system_clock::now();
-}
-
-static inline
-double toc(time_point start)
-{
-    return duration_type(tic() - start).count();
-}
-
-
-[[gnu::unused]] static
-void write_vis_file(const std::string& fname, std::vector<std::vector<double>> values)
-{
-    auto m = values.size();
-    if(!m) return;
-
-    std::ofstream fid(fname);
-    if(!fid.is_open()) return;
-
-    auto n = values[0].size();
-    for(const auto& v : values) {
-        if(n!=v.size()) {
-            std::cerr << "all output arrays must have the same length\n";
-            return;
-        }
-    }
-
-    for(auto i=0u; i<n; ++i) {
-        for(auto j=0u; j<m; ++j) {
-            fid << " " << values[j][i];
-        }
-        fid << "\n";
-    }
-}
-
-template <typename T>
-std::vector<T> find_spikes(std::vector<T> const& v, T threshold, T dt)
-{
-    if(v.size()<2) {
-        return {};
-    }
-
-    std::vector<T> times;
-    for(auto i=1u; i<v.size(); ++i) {
-        if(v[i]>=threshold && v[i-1]<threshold) {
-            auto pos = (threshold-v[i-1]) / (v[i]-v[i-1]);
-            times.push_back((i-1+pos)*dt);
-        }
-    }
-
-    return times;
-}
-
-struct spike_comparison {
-    double min = std::numeric_limits<double>::quiet_NaN();
-    double max = std::numeric_limits<double>::quiet_NaN();
-    double mean = std::numeric_limits<double>::quiet_NaN();
-    double rms = std::numeric_limits<double>::quiet_NaN();
-    std::vector<double> diff;
-
-    // check whether initialized (i.e. has valid results)
-    bool is_valid() const {
-        return min == min;
-    }
-
-    // return maximum relative error
-    double max_relative_error() const {
-        if(!is_valid()) {
-            return std::numeric_limits<double>::quiet_NaN();
-        }
-
-        return *std::max_element(diff.begin(), diff.end());
-    }
-};
-
-[[gnu::unused]] static
-std::ostream&
-operator<< (std::ostream& o, spike_comparison const& spikes)
-{
-    // use snprintf because C++ is just awful for formatting output
-    char buffer[512];
-    snprintf(
-        buffer, sizeof(buffer),
-        "min,max = %10.8f,%10.8f | mean,rms = %10.8f,%10.8f | max_rel = %10.8f",
-        spikes.min, spikes.max, spikes.mean, spikes.rms,
-        spikes.max_relative_error()
-    );
-    return o << buffer;
-}
-
-template <typename T>
-spike_comparison compare_spikes(
-    std::vector<T> const& spikes,
-    std::vector<T> const& baseline)
-{
-    spike_comparison c;
-
-    // return default initialized (all NaN) if number of spikes differs
-    if(spikes.size() != baseline.size()) {
-        return c;
-    }
-
-    c.min  = std::numeric_limits<double>::max();
-    c.max  = 0.;
-    c.mean = 0.;
-    c.rms  = 0.;
-
-    auto n = spikes.size();
-    for(auto i=0u; i<n; ++i) {
-        auto error = std::fabs(spikes[i] - baseline[i]);
-        c.min = std::min(c.min, error);
-        c.max = std::max(c.max, error);
-        c.mean += error;
-        c.rms += error*error;
-        // relative difference
-        c.diff.push_back(error/baseline[i]);
-    }
-
-    c.mean /= n;
-    c.rms = std::sqrt(c.rms/n);
-
-    return c;
-}
-
-} // namespace testing
diff --git a/tests/ubench/CMakeLists.txt b/tests/ubench/CMakeLists.txt
deleted file mode 100644
index c93d413244c4d9b4ae06aa800de4218a4590ad80..0000000000000000000000000000000000000000
--- a/tests/ubench/CMakeLists.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-include(ExternalProject)
-
-# List of micro benchmarks to build.
-
-set(bench_sources
-    accumulate_functor_values.cpp
-    default_construct.cpp
-    event_setup.cpp
-    event_binning.cpp
-    mech_vec.cpp
-)
-
-set(bench_sources_cuda
-    cuda_compare_and_reduce.cu
-    cuda_reduce_by_key.cu
-)
-
-# Set up google benchmark as an external project.
-
-set(gbench_src_dir "${CMAKE_CURRENT_SOURCE_DIR}/google-benchmark")
-set(gbench_install_dir "${PROJECT_BINARY_DIR}/gbench")
-
-set(gbench_cmake_args
-    "-DCMAKE_BUILD_TYPE=release"
-    "-DCMAKE_INSTALL_PREFIX=${gbench_install_dir}"
-    "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
-    "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
-
-ExternalProject_Add(gbench
-    # Add dummy DOWNLOAD_COMMAND to stop ExternalProject_Add terminating CMake if the
-    # git submodule had not been udpated.
-    DOWNLOAD_COMMAND "${CMAKE_COMMAND}" -E echo "Warning: ${gbench_src_dir} empty or missing."
-    SOURCE_DIR "${gbench_src_dir}"
-    CMAKE_ARGS "${gbench_cmake_args}"
-    INSTALL_DIR "${gbench_install_dir}"
-)
-set_target_properties(gbench PROPERTIES EXCLUDE_FROM_ALL TRUE)
-
-# Build benches.
-
-foreach(bench_src ${bench_sources})
-    string(REGEX REPLACE "\\.[^.]*$" "" bench_exe "${bench_src}")
-    add_executable("${bench_exe}" EXCLUDE_FROM_ALL "${bench_src}")
-    add_dependencies("${bench_exe}" gbench)
-    target_include_directories("${bench_exe}" PRIVATE "${gbench_install_dir}/include")
-    target_link_libraries("${bench_exe}" LINK_PUBLIC "${gbench_install_dir}/lib/libbenchmark.a")
-    target_link_libraries("${bench_exe}" LINK_PUBLIC ${ARB_LIBRARIES})
-
-    list(APPEND bench_exe_list ${bench_exe})
-endforeach()
-
-
-if(ARB_WITH_CUDA)
-    cuda_include_directories("${gbench_install_dir}/include")
-    foreach(bench_src ${bench_sources_cuda})
-        string(REGEX REPLACE "\\.[^.]*$" "" bench_exe "${bench_src}")
-        cuda_add_executable("${bench_exe}" EXCLUDE_FROM_ALL "${bench_src}")
-        add_dependencies("${bench_exe}" gbench)
-        target_link_libraries("${bench_exe}" "${gbench_install_dir}/lib/libbenchmark.a")
-        target_link_libraries("${bench_exe}" LINK_PUBLIC ${ARB_LIBRARIES})
-
-        list(APPEND bench_exe_list ${bench_exe})
-    endforeach()
-endif()
-
-add_custom_target(ubenches DEPENDS ${bench_exe_list})
diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt
deleted file mode 100644
index e00458e7caed2638be26e74197bfe91ea26d35e9..0000000000000000000000000000000000000000
--- a/tests/unit/CMakeLists.txt
+++ /dev/null
@@ -1,133 +0,0 @@
-include(${PROJECT_SOURCE_DIR}/mechanisms/BuildModules.cmake)
-
-# Build prototype mechanisms for testing in test_mechanisms.
-set(proto_mechanisms pas hh expsyn exp2syn test_kin1 test_kinlva test_ca)
-set(mech_proto_dir "${CMAKE_CURRENT_BINARY_DIR}/mech_proto")
-file(MAKE_DIRECTORY "${mech_proto_dir}")
-
-build_modules(
-    ${proto_mechanisms}
-    SOURCE_DIR "${PROJECT_SOURCE_DIR}/mechanisms/mod"
-    DEST_DIR "${mech_proto_dir}"
-    MECH_SUFFIX _proto
-    MODCC_FLAGS -t cpu
-    GENERATES _cpu.hpp
-    TARGET build_test_mods
-)
-
-# Unit test sources
-
-set(test_cuda_sources
-    test_intrin.cu
-    test_gpu_stack.cu
-    test_matrix.cu
-    test_matrix_cpuvsgpu.cpp
-    test_reduce_by_key.cu
-    test_vector.cu
-
-    test_mc_cell_group_gpu.cpp
-    test_multi_event_stream_gpu.cpp
-    test_multi_event_stream_gpu.cu
-    test_spikes_gpu.cpp
-
-    # unit test driver
-    test.cpp
-)
-
-set(test_sources
-    # unit tests
-    test_algorithms.cpp
-    test_any.cpp
-    test_backend.cpp
-    test_double_buffer.cpp
-    test_cell.cpp
-    test_compartments.cpp
-    test_counter.cpp
-    test_cycle.cpp
-    test_domain_decomposition.cpp
-    test_either.cpp
-    test_event_binner.cpp
-    test_event_generators.cpp
-    test_event_queue.cpp
-    test_filter.cpp
-    test_fvm_layout.cpp
-    test_fvm_lowered.cpp
-    test_mc_cell_group.cpp
-    test_lexcmp.cpp
-    test_lif_cell_group.cpp
-    test_maputil.cpp
-    test_mask_stream.cpp
-    test_math.cpp
-    test_matrix.cpp
-    test_mechanisms.cpp
-    test_mechcat.cpp
-    test_merge_events.cpp
-    test_multi_event_stream.cpp
-    test_nop.cpp
-    test_optional.cpp
-    test_mechinfo.cpp
-    test_padded.cpp
-    test_partition.cpp
-    test_partition_by_constraint.cpp
-    test_path.cpp
-    test_point.cpp
-    test_probe.cpp
-    test_range.cpp
-    test_segment.cpp
-    test_schedule.cpp
-    test_spike_source.cpp
-    test_local_context.cpp
-    test_simd.cpp
-    test_span.cpp
-    test_spikes.cpp
-    test_spike_store.cpp
-    test_stats.cpp
-    test_strprintf.cpp
-    test_swcio.cpp
-    test_synapses.cpp
-    test_time_seq.cpp
-    test_tree.cpp
-    test_transform.cpp
-    test_uninitialized.cpp
-    test_unique_any.cpp
-    test_vector.cpp
-
-    # unit test driver
-    test.cpp
-
-    # common routines
-    stats.cpp
-)
-
-set(targets test.exe)
-
-add_executable(test.exe ${test_sources})
-target_compile_options(test.exe PRIVATE ${CXXOPT_ARCH})
-target_compile_definitions(test.exe PUBLIC "-DDATADIR=\"${PROJECT_SOURCE_DIR}/data\"")
-
-if (ARB_AUTO_RUN_MODCC_ON_CHANGES)
-  add_dependencies(test.exe build_test_mods)
-endif()
-
-target_include_directories(test.exe PRIVATE "${mech_proto_dir}/..")
-
-if(ARB_WITH_CUDA)
-    list(APPEND targets test_cuda.exe)
-    cuda_add_executable(test_cuda.exe ${test_cuda_sources})
-endif()
-
-foreach(target ${targets})
-    target_link_libraries(${target} LINK_PUBLIC gtest)
-    target_link_libraries(${target} LINK_PUBLIC ${ARB_LIBRARIES})
-    target_link_libraries(${target} LINK_PUBLIC ${EXTERNAL_LIBRARIES})
-
-    if(ARB_WITH_MPI)
-        target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES})
-        set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-    endif()
-
-    set_target_properties(${target}
-       PROPERTIES
-       RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests"
-    )
-endforeach()
diff --git a/tests/unit/test_backend.cpp b/tests/unit/test_backend.cpp
deleted file mode 100644
index f3d846c15869b5d00a9dba2e1343f65794b41c65..0000000000000000000000000000000000000000
--- a/tests/unit/test_backend.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#include <type_traits>
-
-#include <backends.hpp>
-#include <fvm_lowered_cell.hpp>
-#include <util/config.hpp>
-
-#include "../gtest.h"
-
-using namespace arb;
-
-TEST(backends, gpu_test) {
-    if (!arb::config::has_cuda) {
-        EXPECT_ANY_THROW(make_fvm_lowered_cell(backend_kind::gpu));
-    }
-    else {
-        EXPECT_NO_THROW(make_fvm_lowered_cell(backend_kind::gpu));
-    }
-}
diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt
deleted file mode 100644
index 399cfe84478d6ff03a6b5b7982498e2077dd708c..0000000000000000000000000000000000000000
--- a/tests/validation/CMakeLists.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-set(VALIDATION_SOURCES
-    # unit tests
-    validate_ball_and_stick.cpp
-    validate_compartment_policy.cpp
-    validate_soma.cpp
-    validate_kinetic.cpp
-    validate_synapses.cpp
-
-    # support code
-    validation_data.cpp
-    trace_analysis.cpp
-
-    # unit test driver
-    validate.cpp
-)
-
-if(ARB_VALIDATION_DATA_DIR)
-    if ("${CMAKE_VERSION}" MATCHES "^3.[789].")
-        message(WARNING "CMake ${CMAKE_VERSION} has broken FindCUDA; omitting ARB_DATADIR define.")
-    else()
-        add_definitions("-DARB_DATADIR=\"${ARB_VALIDATION_DATA_DIR}\"")
-    endif()
-endif()
-
-add_executable(validate.exe ${VALIDATION_SOURCES})
-
-target_link_libraries(validate.exe LINK_PUBLIC gtest)
-target_link_libraries(validate.exe LINK_PUBLIC ${ARB_LIBRARIES})
-target_link_libraries(validate.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES})
-
-if(ARB_WITH_MPI)
-    target_link_libraries(validate.exe LINK_PUBLIC ${MPI_C_LIBRARIES})
-    set_property(TARGET validate.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}")
-endif()
-
-set_target_properties(
-    validate.exe
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests"
-)
-
-if(ARB_BUILD_VALIDATION_DATA)
-    add_dependencies(validate.exe validation_data)
-endif()
diff --git a/validation/CMakeLists.txt b/validation/CMakeLists.txt
index 252d099a0f70fddffd39bd4e435da0044c2c0a85..a5555d87207ba8ae7aaa03a7871f009f47aeee7e 100644
--- a/validation/CMakeLists.txt
+++ b/validation/CMakeLists.txt
@@ -43,9 +43,22 @@ function(add_validation_data)
 endfunction()
 
 
-if(ARB_BUILD_NRN_VALIDATION_DATA)
+# Only run Julia-based numeric validation data generators if Julia and required
+# modules are available.
+
+find_package(Julia COMPONENTS Sundials Unitful JSON)
+if(Julia_FOUND)
+    add_subdirectory(ref/numeric)
+else()
+    message(WARNING "julia and required packages not found: will not automatically build validation data sets from julia scripts")
+endif()
+
+
+# Only run NEURON-based generators if nrniv is found.
+
+find_program(Nrniv_EXECUTABLE nrniv)
+if(Nrniv_EXECUTABLE)
     add_subdirectory(ref/neuron)
 endif()
 
-add_subdirectory(ref/numeric)
 
diff --git a/validation/ref/neuron/CMakeLists.txt b/validation/ref/neuron/CMakeLists.txt
index 154a29a6605e44bcd1c4c48252f70fcea7e3fb4f..25248340ebf7d7bf73e6d33cceecc782fad1f1d0 100644
--- a/validation/ref/neuron/CMakeLists.txt
+++ b/validation/ref/neuron/CMakeLists.txt
@@ -14,6 +14,6 @@ foreach(model ${models})
     add_validation_data(
         OUTPUT "neuron_${model}.json"
         DEPENDS "${script}" "nrn_validation.py"
-        COMMAND ${NRNIV_BIN} -nobanner -python "${script}")
+        COMMAND ${Nrniv_EXECUTABLE} -nobanner -python "${script}")
 endforeach()
 
diff --git a/validation/ref/neuron/ball_and_3stick.py b/validation/ref/neuron/ball_and_3stick.py
index a261306783ead17b591dec00ccbf27180cef5441..473fbda9e33aa113fd27fde638fdf2da466a6e30 100644
--- a/validation/ref/neuron/ball_and_3stick.py
+++ b/validation/ref/neuron/ball_and_3stick.py
@@ -21,7 +21,7 @@ model.add_iclamp(40, 10, -0.2, to='dend3')
 simdur = 100.0
 
 data = V.run_nrn_sim(simdur, report_dt=10, model='ball_and_3stick')
-print json.dumps(data)
+print(json.dumps(data))
 
 V.nrn_stop()
 
diff --git a/validation/ref/neuron/ball_and_squiggle.py b/validation/ref/neuron/ball_and_squiggle.py
index 4d4e02225a2dfedc56fe1670a9ef8a80533cb21b..258eb5e717221309e68aea0362dcc7e7ad4f21d2 100644
--- a/validation/ref/neuron/ball_and_squiggle.py
+++ b/validation/ref/neuron/ball_and_squiggle.py
@@ -4,6 +4,7 @@
 import json
 import math
 import nrn_validation as V
+from builtins import range
 
 V.override_defaults_from_args()
 
@@ -12,7 +13,7 @@ length = 100.0
 npoints = 200
 radius = lambda x: math.exp(-x)*(math.sin(40*x)*0.05+0.1)+0.1
 
-xs = [float(i)/(npoints-1) for i in xrange(npoints)]
+xs = [float(i)/(npoints-1) for i in range(npoints)]
 geom = [(length*x, 2.0*radius(x)) for x in xs]
 
 model = V.VModel()
@@ -23,7 +24,7 @@ model.add_iclamp(5, 80, 0.3, to='dend')
 simdur = 100.0
 
 data = V.run_nrn_sim(simdur, report_dt=10, model='ball_and_squiggle')
-print json.dumps(data)
+print(json.dumps(data))
 
 V.nrn_stop()
 
diff --git a/validation/ref/neuron/ball_and_stick.py b/validation/ref/neuron/ball_and_stick.py
index ee4dc999f59552671230eb004891c8676a7bfd32..52378e64e9f3163aa30332acfc50b3cee66e6faf 100644
--- a/validation/ref/neuron/ball_and_stick.py
+++ b/validation/ref/neuron/ball_and_stick.py
@@ -15,6 +15,6 @@ model.add_dendrite('dend', geom)
 model.add_iclamp(5, 80, 0.3, to='dend')
 
 data = V.run_nrn_sim(100, report_dt=10, model='ball_and_stick')
-print json.dumps(data)
+print(json.dumps(data))
 V.nrn_stop()
 
diff --git a/validation/ref/neuron/ball_and_taper.py b/validation/ref/neuron/ball_and_taper.py
index 0610fe72db9e8a3dbdd1ec155ce9c8dd970ed392..13961c0e62b57bc1bc9c8ba901e8f1b7e11c60d6 100644
--- a/validation/ref/neuron/ball_and_taper.py
+++ b/validation/ref/neuron/ball_and_taper.py
@@ -15,6 +15,6 @@ model.add_dendrite('taper', geom)
 model.add_iclamp(5, 80, 0.3, to='taper')
 
 data = V.run_nrn_sim(100, report_dt=10, model='ball_and_taper')
-print json.dumps(data)
+print(json.dumps(data))
 V.nrn_stop()
 
diff --git a/validation/ref/neuron/nrn_validation.py b/validation/ref/neuron/nrn_validation.py
index 2e1cf7f392cc67f4748e263df4bda31976c079fe..caeb357c0c8ef730b73064781d63a2d4e2ccd863 100644
--- a/validation/ref/neuron/nrn_validation.py
+++ b/validation/ref/neuron/nrn_validation.py
@@ -8,23 +8,30 @@ import re
 import numpy as np
 import neuron
 from neuron import h
+from builtins import range
 
 # This is super annoying: without neuron.gui, need
 # to explicit load 'standard' hoc routines like 'run',
 # but this is chatty on stdout, which means we get
 # junk in our data if capturing output.
 
-def hoc_setup():
+def hoc_execute_quiet(arg):
     with open(os.devnull, 'wb') as null:
         fd = sys.stdout.fileno()
         keep = os.dup(fd)
         sys.stdout.flush()
         os.dup2(null.fileno(), fd)
-
-        h('load_file("stdrun.hoc")')
+        h(arg)
         sys.stdout.flush()
         os.dup2(keep, fd)
 
+def hoc_setup():
+    hoc_execute_quiet('load_file("stdrun.hoc")')
+
+def hoc_quit():
+    hoc_execute_quiet('quit()')
+    #h('quit()')
+
 default_model_parameters = {
     'gnabar_hh':  0.12,   # H-H sodium conductance in S/cm^2
     'gkbar_hh':   0.036,  # H-H potassium conductance in S/cm^2
@@ -210,7 +217,7 @@ def run_nrn_sim(tend, sample_dt=0.025, report_t=None, report_dt=None, dt=None, *
     # Instrument every segment for section voltage reports.
     if report_t is None:
         if report_dt is not None:
-            report_t = [report_dt*(1+i) for i in xrange(int(tend/report_dt))]
+            report_t = [report_dt*(1+i) for i in range(int(tend/report_dt))]
         else:
             report_t = []
     elif not isinstance(report_t, list):
@@ -222,7 +229,7 @@ def run_nrn_sim(tend, sample_dt=0.025, report_t=None, report_dt=None, dt=None, *
     if report_t:
         for s in h.allsec():
             nseg = s.nseg;
-            ps = [0] + [(i+0.5)/nseg for i in xrange(nseg)] + [1]
+            ps = [0] + [(i+0.5)/nseg for i in range(nseg)] + [1]
             vs = [h.Vector() for p in ps]
             for p, v in zip(ps, vs):
                 v.record(s(p)._ref_v, vreport_t_hoc)
@@ -283,7 +290,7 @@ def nrn_assert_no_sections():
         assert False, 'a section exists'
 
 def nrn_stop():
-    h.quit()
+    hoc_quit()
 
 # Run hoc setup on load
 hoc_setup()
diff --git a/validation/ref/neuron/simple_exp2_synapse.py b/validation/ref/neuron/simple_exp2_synapse.py
index cf8377fffc4406cb7c74a3a44b6e99b4c996748e..c2314e7619cde5fcfbef583437237546ee3ff043 100644
--- a/validation/ref/neuron/simple_exp2_synapse.py
+++ b/validation/ref/neuron/simple_exp2_synapse.py
@@ -19,6 +19,6 @@ model.add_spike(20, 0.04)
 model.add_spike(40, 0.04)
 
 data = V.run_nrn_sim(70, report_dt=10, model='exp2syn')
-print json.dumps(data)
+print(json.dumps(data))
 V.nrn_stop()
 
diff --git a/validation/ref/neuron/simple_exp_synapse.py b/validation/ref/neuron/simple_exp_synapse.py
index b890dc03f9094790956b80d1d9ae7df2cb719431..587317faaa785491caa640ae5069f9202bacb968 100644
--- a/validation/ref/neuron/simple_exp_synapse.py
+++ b/validation/ref/neuron/simple_exp_synapse.py
@@ -19,5 +19,5 @@ model.add_spike(20, 0.04)
 model.add_spike(40, 0.04)
 
 data = V.run_nrn_sim(70, report_dt=10, model='expsyn')
-print json.dumps(data)
+print(json.dumps(data))
 V.nrn_stop()
diff --git a/validation/ref/neuron/soma.py b/validation/ref/neuron/soma.py
index f1380032b7fb6e3ca3c75488f7a872b96ba611e5..393d898cf20fcac084125e89392182b87dc424f3 100644
--- a/validation/ref/neuron/soma.py
+++ b/validation/ref/neuron/soma.py
@@ -12,6 +12,6 @@ model.add_soma(18.8, Ra=100)
 model.add_iclamp(10, 100, 0.1)
 
 data = V.run_nrn_sim(100, report_dt=None, model='soma')
-print json.dumps(data)
+print(json.dumps(data))
 V.nrn_stop()
 
diff --git a/validation/ref/numeric/CMakeLists.txt b/validation/ref/numeric/CMakeLists.txt
index ed5761ec7c08e636588b0e5e60485893d7baea55..d95c5bb90e4822fcb33148151095af5ddc973cf6 100644
--- a/validation/ref/numeric/CMakeLists.txt
+++ b/validation/ref/numeric/CMakeLists.txt
@@ -1,23 +1,21 @@
 # note: function add_validation_data defined in validation/CMakeLists.txt
 
-if(ARB_BUILD_JULIA_VALIDATION_DATA)
-    add_validation_data(
-        OUTPUT numeric_kin1.json
-        DEPENDS numeric_kin1.jl
-        COMMAND ${JULIA_BIN} numeric_kin1.jl)
+add_validation_data(
+    OUTPUT numeric_kin1.json
+    DEPENDS numeric_kin1.jl
+    COMMAND ${Julia_EXECUTABLE} numeric_kin1.jl)
 
-    add_validation_data(
-        OUTPUT numeric_kinlva.json
-        DEPENDS numeric_kinlva.jl LVAChannels.jl
-        COMMAND ${JULIA_BIN} numeric_kinlva.jl)
+add_validation_data(
+    OUTPUT numeric_kinlva.json
+    DEPENDS numeric_kinlva.jl LVAChannels.jl
+    COMMAND ${Julia_EXECUTABLE} numeric_kinlva.jl)
 
-    add_validation_data(
-        OUTPUT numeric_soma.json
-        DEPENDS numeric_soma.jl HHChannels.jl
-        COMMAND ${JULIA_BIN} numeric_soma.jl)
+add_validation_data(
+    OUTPUT numeric_soma.json
+    DEPENDS numeric_soma.jl HHChannels.jl
+    COMMAND ${Julia_EXECUTABLE} numeric_soma.jl)
 
-    add_validation_data(
-        OUTPUT numeric_rallpack1.json
-        DEPENDS numeric_rallpack1.jl PassiveCable.jl
-        COMMAND ${JULIA_BIN} numeric_rallpack1.jl)
-endif()
+add_validation_data(
+    OUTPUT numeric_rallpack1.json
+    DEPENDS numeric_rallpack1.jl PassiveCable.jl
+    COMMAND ${Julia_EXECUTABLE} numeric_rallpack1.jl)
diff --git a/validation/ref/numeric/HHChannels.jl b/validation/ref/numeric/HHChannels.jl
index 910a402cb8201c8355723a77236045dbad1cf03f..f68a7aa1d11baf446546cf37fef031b785a9ea85 100644
--- a/validation/ref/numeric/HHChannels.jl
+++ b/validation/ref/numeric/HHChannels.jl
@@ -3,7 +3,8 @@ module HHChannels
 export Stim, run_hh
 
 using Sundials
-using SIUnits.ShortUnits
+using Unitful
+using Unitful.DefaultSymbols
 
 immutable HHParam
     c_m       # membrane spacific capacitance
@@ -55,6 +56,8 @@ immutable Stim
     Stim(t0, t1, i_e) = new(t0, t1, i_e)
 end
 
+scale(quantity, unit) = uconvert(NoUnits, quantity/unit)
+
 vtrap(x,y) = x/(exp(x/y) - 1.0)
 
 # "m" sodium activation system
@@ -141,6 +144,7 @@ function run_hh(t_end; v0=-65mV, stim=Stim(), param=HHParam(), sample_dt=0.01ms)
 
         ydot[1], ydot[2], ydot[3], ydot[4] =
             vdot*t_scale/v_scale, mdot*t_scale, hdot*t_scale, ndot*t_scale
+#            Float64(vdot*t_scale/v_scale), Float64(mdot*t_scale), Float64(hdot*t_scale), Float64(ndot*t_scale)
 
         return Sundials.CV_SUCCESS
     end
@@ -148,10 +152,9 @@ function run_hh(t_end; v0=-65mV, stim=Stim(), param=HHParam(), sample_dt=0.01ms)
     # Ideally would run with vector absolute tolerance to account for v_scale,
     # but this would prevent us using the nice cvode wrapper.
 
-    res = Sundials.cvode(fbis, y0, map(t->t/t_scale, samples), abstol=1e-6, reltol=5e-10)
+    res = Sundials.cvode(fbis, y0, scale.(samples, t_scale), abstol=1e-6, reltol=5e-10)
 
-    # Use map here because of issues with type deduction with arrays and SIUnits.
-    return samples, map(v->v*v_scale, res[:, 1])
+    return samples, res[:, 1]*v_scale
 end
 
 end # module HHChannels
diff --git a/validation/ref/numeric/LVAChannels.jl b/validation/ref/numeric/LVAChannels.jl
index 1d11e6b0a9c8f5fd9847e6fbf9c98f687fd21f81..a1f4e86508956e497d07a571b2f9fde5a7a9f074 100644
--- a/validation/ref/numeric/LVAChannels.jl
+++ b/validation/ref/numeric/LVAChannels.jl
@@ -3,10 +3,8 @@ module LVAChannels
 export Stim, run_lva, LVAParam
 
 using Sundials
-using SIUnits
-using SIUnits.ShortUnits
-
-const mS = Milli*Siemens
+using Unitful
+using Unitful.DefaultSymbols
 
 immutable LVAParam
     c_m       # membrane spacific capacitance
@@ -43,6 +41,8 @@ immutable Stim
     Stim(t0, t1, i_e) = new(t0, t1, i_e)
 end
 
+scale(quantity, unit) = uconvert(NoUnits, quantity/unit)
+
 # 'm' activation gate
 function m_lims(v, q10)
     quotient = 1+exp(-(v+63mV)/7.8mV)
@@ -112,7 +112,7 @@ function run_lva(t_end; stim=Stim(), param=LVAParam(), sample_dt=0.01ms)
     t_scale = 1s
 
     v0, m0, h0, d0 = initial_conditions(param.vrest, param.q10_1, param.q10_2)
-    y0 = [ v0/v_scale, m0, h0, d0 ]
+    y0 = [ scale(v0, v_scale), m0, h0, d0 ]
 
 
     fbis(t, y, ydot, istim) = begin
@@ -137,7 +137,7 @@ function run_lva(t_end; stim=Stim(), param=LVAParam(), sample_dt=0.01ms)
     t1 = clamp(stim.t0, 0s, t_end)
     if t1>0s
         ts = make_range(0s, sample_dt, t1)
-        r = Sundials.cvode(fbis_nostim, y0, map(t->t/t_scale, ts), abstol=1e-6, reltol=5e-10)
+        r = Sundials.cvode(fbis_nostim, y0, scale.(ts, t_scale), abstol=1e-6, reltol=5e-10)
         y0 = vec(r[size(r)[1], :])
         push!(res, r)
         push!(samples, ts)
@@ -145,14 +145,14 @@ function run_lva(t_end; stim=Stim(), param=LVAParam(), sample_dt=0.01ms)
     t2 = clamp(stim.t1, t1, t_end)
     if t2>t1
         ts = make_range(t1, sample_dt, t2)
-        r = Sundials.cvode(fbis_stim, y0, map(t->t/t_scale, ts), abstol=1e-6, reltol=5e-10)
+        r = Sundials.cvode(fbis_stim, y0, scale.(ts, t_scale), abstol=1e-6, reltol=5e-10)
         y0 = vec(r[size(r)[1], :])
         push!(res, r)
         push!(samples, ts)
     end
     if t_end>t2
         ts = make_range(t2, sample_dt, t_end)
-        r = Sundials.cvode(fbis_nostim, y0, map(t->t/t_scale, ts), abstol=1e-6, reltol=5e-10)
+        r = Sundials.cvode(fbis_nostim, y0, scale.(ts, t_scale), abstol=1e-6, reltol=5e-10)
         y0 = vec(r[size(r)[1], :])
         push!(res, r)
         push!(samples, ts)
@@ -161,8 +161,7 @@ function run_lva(t_end; stim=Stim(), param=LVAParam(), sample_dt=0.01ms)
     res = vcat(res...)
     samples = vcat(samples...)
 
-    # Use map here because of issues with type deduction with arrays and SIUnits.
-    return samples, map(v->v*v_scale, res[:, 1]), res[:, 2], res[:, 3], res[:, 4]
+    return samples, res[:, 1]*v_scale, res[:, 2], res[:, 3], res[:, 4]
 end
 
 end # module LVAChannels
diff --git a/validation/ref/numeric/PassiveCable.jl b/validation/ref/numeric/PassiveCable.jl
index 6928b74c33948be7162885040cb89870ae245596..56eded5acbc30ebc9cc5c8b7f901cffefdedc708 100644
--- a/validation/ref/numeric/PassiveCable.jl
+++ b/validation/ref/numeric/PassiveCable.jl
@@ -29,7 +29,7 @@ function cable_normalized(x::Float64, t::Float64, L::Float64; tol=1e-8)
         sum = exp(-t/L)
         Ltol = L*tol
 
-        for k = countfrom(1)
+        for k = Iterators.countfrom(1)
             a = k*pi/L
             b = exp(-t*(1+a^2))
 
@@ -94,7 +94,7 @@ function cable(x, t, L, lambda, tau, r, V, I; tol=1e-8)
         return V
     else
         tol_n = abs(tol/scale)
-        return scale*cable_normalized(x/lambda, t/tau, L/lambda, tol=tol_n) + V
+        return scale*cable_normalized(Float64(x/lambda), Float64(t/tau), Float64(L/lambda), tol=tol_n) + V
     end
 end
 
diff --git a/validation/ref/numeric/numeric_kin1.jl b/validation/ref/numeric/numeric_kin1.jl
index 300903147b1e29dbc9603448f2289bbd70ad5ce7..4437c3c89f0c35d86be6c99be0383574027c0eed 100644
--- a/validation/ref/numeric/numeric_kin1.jl
+++ b/validation/ref/numeric/numeric_kin1.jl
@@ -3,7 +3,10 @@
 include("HHChannels.jl")
 
 using JSON
-using SIUnits.ShortUnits
+using Unitful
+using Unitful.DefaultSymbols
+
+scale(quantity, unit) = uconvert(NoUnits, quantity/unit)
 
 radius = 20µm/2
 area = 4*pi*radius^2
@@ -16,7 +19,7 @@ c  = 0.01mA/cm^2
 tau = 10ms
 
 ts = collect(0s: sample_dt: t_end)
-is = area*(1/3*c + (a0-1/3*c)*exp(-ts/tau))
+is = area*(1/3*c + (a0-1/3*c)*exp.(-ts/tau))
 
 trace = Dict(
     :name => "membrane current",
@@ -24,8 +27,8 @@ trace = Dict(
     :model => "test_kin1",
     :units => "nA",
     :data => Dict(
-        :time => map(t->t/ms, ts),
-        Symbol("soma.mid") => map(i->i/nA, is)
+        :time => scale.(ts, 1ms),
+        Symbol("soma.mid") => scale.(is, 1nA)
     )
 )
 
diff --git a/validation/ref/numeric/numeric_kinlva.jl b/validation/ref/numeric/numeric_kinlva.jl
index 9dc18ceacd103e77515967189d73a93498bbe98b..108612e7e7b109dcf045d2edcfcf90a9c6464290 100644
--- a/validation/ref/numeric/numeric_kinlva.jl
+++ b/validation/ref/numeric/numeric_kinlva.jl
@@ -3,9 +3,12 @@
 include("LVAChannels.jl")
 
 using JSON
-using SIUnits.ShortUnits
+using Unitful
+using Unitful.DefaultSymbols
 using LVAChannels
 
+scale(quantity, unit) = uconvert(NoUnits, quantity/unit)
+
 radius = 20µm/2
 area = 4*pi*radius^2
 current = -0.025nA
@@ -19,8 +22,8 @@ trace = Dict(
     :model => "test_kinlva",
     :units => "mV",
     :data => Dict(
-        :time => map(t->t/ms, ts),
-        Symbol("soma.mid") => map(v->v/mV, vs)
+        :time => scale.(ts, 1ms),
+        Symbol("soma.mid") => scale.(vs, 1mV)
     )
 )
 
@@ -30,7 +33,7 @@ state = Dict(
     :model => "kinlva",
     :units => "1",
     :data => Dict(
-        :time => map(t->t/ms, ts),
+        :time => scale.(ts, 1ms),
         Symbol("m") => m,
         Symbol("d") => d,
         Symbol("h") => h
diff --git a/validation/ref/numeric/numeric_rallpack1.jl b/validation/ref/numeric/numeric_rallpack1.jl
index b13a5effe60c3a7d2daa154c3bc1a125c26683ce..9830d81c856e2595b7cd9341f6dfcf85ba710d3a 100644
--- a/validation/ref/numeric/numeric_rallpack1.jl
+++ b/validation/ref/numeric/numeric_rallpack1.jl
@@ -3,9 +3,12 @@
 include("PassiveCable.jl")
 
 using JSON
-using SIUnits.ShortUnits
+using Unitful
+using Unitful.DefaultSymbols
 using PassiveCable
 
+scale(quantity, unit) = uconvert(NoUnits, quantity/unit)
+
 # This should run the same effective model
 # as rallpack1, but with differing
 # electrical parameters (see below).
@@ -42,24 +45,25 @@ function run_cable(x_prop, ts)
 end
 
 function run_rallpack1(x_prop, ts)
-    return [rallpack1(0.001*x_prop, t/s)*V for t in ts]
+    return [rallpack1(0.001*x_prop, scale(t, 1s))*V for t in ts]
 end
 
 # Generate traces at x=0, x=0.3L, x=L
 
 ts = collect(0s: 0.025ms: 250ms)
+
 trace = Dict(
     :name => "membrane voltage",
     :sim => "numeric",
     :model => "rallpack1",
     :units => "mV",
     :data => Dict(
-        :time => map(t->t/ms, ts),
-        Symbol("cable.x0.0") => map(v->v/mV, run_cable(0, ts)),
-        Symbol("cable.x0.3") => map(v->v/mV, run_cable(0.3, ts)),
-        Symbol("cable.x1.0") => map(v->v/mV, run_cable(1.0, ts))
+        :time => scale.(ts, 1ms),
+        Symbol("cable.x0.0") => scale.(run_cable(0, ts), 1mV),
+        Symbol("cable.x0.3") => scale.(run_cable(0.3, ts), 1mV),
+        Symbol("cable.x1.0") => scale.(run_cable(1.0, ts), 1mV)
     )
 )
 
-println(JSON.json([trace]))
+ println(JSON.json([trace]))
 
diff --git a/validation/ref/numeric/numeric_soma.jl b/validation/ref/numeric/numeric_soma.jl
index 6b8b84459ff5753bb01da1b175646e9d49801557..7c5e133643f94be10d8f81f4ec158b8e98e76537 100644
--- a/validation/ref/numeric/numeric_soma.jl
+++ b/validation/ref/numeric/numeric_soma.jl
@@ -3,9 +3,12 @@
 include("HHChannels.jl")
 
 using JSON
-using SIUnits.ShortUnits
+using Unitful
+using Unitful.DefaultSymbols
 using HHChannels
 
+scale(quantity, unit) = uconvert(NoUnits, quantity/unit)
+
 radius = 18.8µm/2
 area = 4*pi*radius^2
 
@@ -18,8 +21,8 @@ trace = Dict(
     :model => "soma",
     :units => "mV",
     :data => Dict(
-        :time => map(t->t/ms, ts),
-        Symbol("soma.mid") => map(v->v/mV, vs)
+        :time => scale.(ts, 1ms),
+        Symbol("soma.mid") => scale.(vs, 1mV)
     )
 )