From ff12bb82fb4354772da2d626d16a94dca643686b Mon Sep 17 00:00:00 2001
From: thorstenhater <24411438+thorstenhater@users.noreply.github.com>
Date: Thu, 29 Jul 2021 08:29:44 +0200
Subject: [PATCH] Implement mechanism ABI

Implements #1376.

* Provide a common C linkage ABI for externally compiled mechanisms, for both CPU and GPU.
* Remove mechanism type hierarchy (`concrete_mechanism` etc.), and move corresponding functionality to the back-end shared state objects. Mechanism catalogue is no longer indexed by type id.
* Distinguish between SIMD optimal alignment and SIMD width with new `min_align` attribute. Mechanisms provide both pieces of information via ABI.
---
 .gitmodules                                   |   3 +
 CMakeLists.txt                                |  13 +
 arbor/CMakeLists.txt                          |   6 +-
 arbor/arbexcept.cpp                           |   8 +
 arbor/backends/event.hpp                      |   5 +-
 arbor/backends/gpu/fvm.cpp                    |  18 -
 arbor/backends/gpu/fvm.hpp                    |   4 +-
 arbor/backends/gpu/matrix_assemble.cu         |   2 +-
 arbor/backends/gpu/matrix_common.hpp          |   2 +-
 arbor/backends/gpu/matrix_fine.cu             |   4 +-
 arbor/backends/gpu/matrix_solve.cu            |   2 +-
 arbor/backends/gpu/mechanism.cpp              | 231 --------
 arbor/backends/gpu/mechanism.cu               |  23 -
 arbor/backends/gpu/mechanism.hpp              |  31 -
 arbor/backends/gpu/multi_event_stream.cu      |   2 +-
 arbor/backends/gpu/shared_state.cpp           | 186 ++++++
 arbor/backends/gpu/shared_state.cu            |   4 +-
 arbor/backends/gpu/shared_state.hpp           |  28 +-
 arbor/backends/gpu/stack_cu.hpp               |   2 +-
 arbor/backends/gpu/stimulus.cu                |   8 +-
 arbor/backends/gpu/threshold_watcher.cu       |   2 +-
 arbor/backends/multicore/fvm.cpp              |  18 -
 arbor/backends/multicore/fvm.hpp              |   2 +
 arbor/backends/multicore/mechanism.cpp        | 225 -------
 arbor/backends/multicore/mechanism.hpp        |  41 --
 .../multicore/partition_by_constraint.hpp     |  29 +-
 arbor/backends/multicore/shared_state.cpp     | 212 ++++++-
 arbor/backends/multicore/shared_state.hpp     |  23 +-
 arbor/fvm_layout.cpp                          |   6 +-
 arbor/fvm_layout.hpp                          |   2 +-
 arbor/fvm_lowered_cell_impl.hpp               |  27 +-
 arbor/gpu_context.cpp                         |   2 +-
 arbor/hardware/memory.cpp                     |   2 +-
 arbor/include/CMakeLists.txt                  |   3 +-
 arbor/include/arbor/arb_types.h               |  14 +
 arbor/include/arbor/arb_types.hpp             |   9 +
 arbor/include/arbor/arb_types.inc             |   8 +
 arbor/include/arbor/arbexcept.hpp             |  12 +
 arbor/include/arbor/fvm_types.hpp             |  10 +-
 .../arbor}/gpu/cuda_api.hpp                   |   0
 .../arbor}/gpu/gpu_api.hpp                    |   0
 .../arbor}/gpu/gpu_common.hpp                 |   0
 .../arbor}/gpu/hip_api.hpp                    |   0
 .../arbor}/gpu/math_cu.hpp                    |   0
 .../arbor}/gpu/reduce_by_key.hpp              |   0
 arbor/include/arbor/mechanism.hpp             | 185 ++----
 arbor/include/arbor/mechanism_abi.h           | 203 +++++++
 arbor/include/arbor/mechanism_ppack.hpp       |  23 -
 arbor/include/arbor/mechcat.hpp               |  43 +-
 arbor/include/arbor/mechinfo.hpp              |   7 +
 arbor/include/arbor/simd/avx.hpp              |   6 +
 arbor/include/arbor/simd/avx512.hpp           |   5 +
 arbor/include/arbor/simd/generic.hpp          |   1 +
 arbor/include/arbor/simd/implbase.hpp         |   2 +
 arbor/include/arbor/simd/neon.hpp             |   2 +
 arbor/include/arbor/simd/simd.hpp             |   7 +
 arbor/include/arbor/simd/sve.hpp              |  16 +-
 arbor/mechcat.cpp                             |  30 +-
 arbor/mechinfo.cpp                            |  34 ++
 arbor/memory/fill.cu                          |   2 +-
 arbor/memory/gpu_wrappers.cpp                 |   2 +-
 arbor/util/maputil.hpp                        |   2 +-
 arbor/util/range.hpp                          |   6 +
 doc/concepts/mechanisms.rst                   |  52 +-
 doc/internals/extending_catalogues.rst        |  70 +--
 doc/internals/index.rst                       |   2 +-
 doc/internals/mechanism_abi.rst               | 310 ++++++++++
 ext/fmt                                       |   1 +
 mechanisms/BuildModules.cmake                 |  10 +-
 mechanisms/CMakeLists.txt                     |   6 +-
 mechanisms/generate_catalogue                 |  14 +-
 modcc/CMakeLists.txt                          |  12 +-
 modcc/expression.hpp                          |   2 +-
 modcc/modcc.cpp                               |   5 +-
 modcc/printer/cexpr_emit.cpp                  |   6 +-
 modcc/printer/cprinter.cpp                    | 480 +++++++--------
 modcc/printer/gpuprinter.cpp                  | 554 ++++++++----------
 modcc/printer/infoprinter.cpp                 | 251 ++++----
 modcc/printer/infoprinter.hpp                 |   3 +-
 modcc/printer/printerutil.cpp                 |  26 +-
 modcc/printer/printerutil.hpp                 |  26 +-
 python/example/single_cell_model.py           |   2 +
 .../{build-catalogue => build-catalogue.in}   |  20 +-
 test/unit-modcc/test_printers.cpp             |  44 +-
 test/unit/CMakeLists.txt                      |  17 +-
 test/unit/gpu_vector.hpp                      |   2 +-
 test/unit/mech_private_field_access.cpp       | 146 ++++-
 test/unit/mech_private_field_access.hpp       |  22 +-
 test/unit/test_abi.cpp                        | 190 ++++++
 test/unit/test_fvm_layout.cpp                 |   2 +-
 test/unit/test_fvm_lowered.cpp                |  83 +--
 test/unit/test_intrin.cu                      |   4 +-
 test/unit/test_kinetic_linear.cpp             |  28 +-
 test/unit/test_matrix_gpu.cpp                 |   2 +-
 test/unit/test_mech_temp_diam.cpp             |   8 +-
 test/unit/test_mechcat.cpp                    | 457 ++++++++-------
 test/unit/test_probe.cpp                      |   3 +-
 test/unit/test_reduce_by_key.cu               |   2 +-
 test/unit/test_simd.cpp                       |  13 +-
 test/unit/test_synapses.cpp                   |  35 +-
 test/unit/unit_test_catalogue.cpp             |  10 +-
 101 files changed, 2689 insertions(+), 2036 deletions(-)
 delete mode 100644 arbor/backends/gpu/fvm.cpp
 delete mode 100644 arbor/backends/gpu/mechanism.cpp
 delete mode 100644 arbor/backends/gpu/mechanism.cu
 delete mode 100644 arbor/backends/gpu/mechanism.hpp
 delete mode 100644 arbor/backends/multicore/fvm.cpp
 delete mode 100644 arbor/backends/multicore/mechanism.cpp
 delete mode 100644 arbor/backends/multicore/mechanism.hpp
 create mode 100644 arbor/include/arbor/arb_types.h
 create mode 100644 arbor/include/arbor/arb_types.hpp
 create mode 100644 arbor/include/arbor/arb_types.inc
 rename arbor/{backends => include/arbor}/gpu/cuda_api.hpp (100%)
 rename arbor/{backends => include/arbor}/gpu/gpu_api.hpp (100%)
 rename arbor/{backends => include/arbor}/gpu/gpu_common.hpp (100%)
 rename arbor/{backends => include/arbor}/gpu/hip_api.hpp (100%)
 rename arbor/{backends => include/arbor}/gpu/math_cu.hpp (100%)
 rename arbor/{backends => include/arbor}/gpu/reduce_by_key.hpp (100%)
 create mode 100644 arbor/include/arbor/mechanism_abi.h
 delete mode 100644 arbor/include/arbor/mechanism_ppack.hpp
 create mode 100644 arbor/mechinfo.cpp
 create mode 100644 doc/internals/mechanism_abi.rst
 create mode 160000 ext/fmt
 rename scripts/{build-catalogue => build-catalogue.in} (87%)
 create mode 100644 test/unit/test_abi.cpp

diff --git a/.gitmodules b/.gitmodules
index 88766b09..7a46b5b6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "python/pybind11"]
 	path = python/pybind11
 	url = https://github.com/pybind/pybind11.git
+[submodule "ext/fmt"]
+	path = ext/fmt
+	url = https://github.com/fmtlib/fmt.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 94c6e21d..ccdfe0db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,6 +8,8 @@ string(REGEX MATCH "^[0-9]+(\\.[0-9]+)?(\\.[0-9]+)?(\\.[0-9]+)?" numeric_version
 project(arbor VERSION ${numeric_version})
 enable_language(CXX)
 
+include(GNUInstallDirs)
+
 # Turn on this option to force the compilers to produce color output when output is
 # redirected from the terminal (e.g. when using ninja or a pager).
 
@@ -171,6 +173,9 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 # to the 'export set', even the private ones, and this must be done
 # in the same CMakeLists.txt in which the target is defined.
 
+# Data and internal scripts go here
+set(ARB_INSTALL_DATADIR ${CMAKE_INSTALL_FULL_DATAROOTDIR}/arbor)
+
 # Interface library `arbor-config-defs` collects configure-time defines
 # for arbor, arborenv, arborio, of the form ARB_HAVE_XXX. These
 # defines should _not_ be used in any installed public headers.
@@ -212,6 +217,12 @@ install(TARGETS arbor-public-deps EXPORT arbor-targets)
 add_library(arborio-public-deps INTERFACE)
 install(TARGETS arborio-public-deps EXPORT arborio-targets)
 
+# Add scripts and supporting CMake for setting up external catalogues
+
+configure_file(scripts/build-catalogue.in ${CMAKE_CURRENT_BINARY_DIR}/build-catalogue @ONLY)
+install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/build-catalogue DESTINATION ${CMAKE_INSTALL_FULL_BINDIR})
+install(FILES mechanisms/BuildModules.cmake DESTINATION ${ARB_INSTALL_DATADIR})
+install(FILES mechanisms/generate_catalogue DESTINATION ${ARB_INSTALL_DATADIR} PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
 # External libraries in `ext` sub-directory: json, tinyopt and randon123.
 # Creates interface libraries `ext-json`, `ext-tinyopt` and `ext-random123`
 
@@ -224,6 +235,8 @@ else()
 endif()
 add_subdirectory(ext)
 
+cmake_dependent_option(ARB_USE_BUNDLED_FMT "Use bundled FMT lib." ON "ARB_USE_BUNDLED_LIBS" OFF)
+
 # Keep track of packages we need to add to the generated CMake config
 # file for arbor.
 
diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 99712875..9296d2ac 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -3,8 +3,6 @@
 set(arbor_sources
     arbexcept.cpp
     assert.cpp
-    backends/multicore/fvm.cpp
-    backends/multicore/mechanism.cpp
     backends/multicore/shared_state.cpp
     communication/communicator.cpp
     communication/dry_run_context.cpp
@@ -27,6 +25,7 @@ set(arbor_sources
     lif_cell_group.cpp
     mc_cell_group.cpp
     mechcat.cpp
+    mechinfo.cpp
     memory/gpu_wrappers.cpp
     memory/util.cpp
     morph/embed_pwlin.cpp
@@ -63,9 +62,6 @@ set(arbor_sources
 
 if(ARB_WITH_GPU)
     list(APPEND arbor_sources
-        backends/gpu/fvm.cpp
-        backends/gpu/mechanism.cpp
-        backends/gpu/mechanism.cu
         backends/gpu/shared_state.cpp
         backends/gpu/stimulus.cu
         backends/gpu/threshold_watcher.cu
diff --git a/arbor/arbexcept.cpp b/arbor/arbexcept.cpp
index 13158e36..185a0d5c 100644
--- a/arbor/arbexcept.cpp
+++ b/arbor/arbexcept.cpp
@@ -134,5 +134,13 @@ bad_catalogue_error::bad_catalogue_error(const std::string &fn, const std::strin
       failed_call{call}
 {}
 
+unsupported_abi_error::unsupported_abi_error(size_t v):
+    arbor_exception(pprintf("ABI version is not supported by this version of arbor '{}'", v)),
+    version{v} {}
+
+bad_alignment::bad_alignment(size_t a):
+    arbor_exception(pprintf("Mechanism reported unsupported alignment '{}'", a)),
+    alignment{a} {}
+
 } // namespace arb
 
diff --git a/arbor/backends/event.hpp b/arbor/backends/event.hpp
index b69f8341..508c2b96 100644
--- a/arbor/backends/event.hpp
+++ b/arbor/backends/event.hpp
@@ -22,13 +22,12 @@ struct target_handle {
 
 struct deliverable_event {
     time_type time;
-    target_handle handle;
     float weight;
+    target_handle handle;
 
     deliverable_event() {}
     deliverable_event(time_type time, target_handle handle, float weight):
-        time(time), handle(handle), weight(weight)
-    {}
+        time(time), weight(weight), handle(handle) {}
 };
 
 // Stream index accessor function for multi_event_stream:
diff --git a/arbor/backends/gpu/fvm.cpp b/arbor/backends/gpu/fvm.cpp
deleted file mode 100644
index 8c07e4b0..00000000
--- a/arbor/backends/gpu/fvm.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#include <string>
-
-#include <arbor/mechanism.hpp>
-#include "fvm.hpp"
-#include "mechanism.hpp"
-
-// Provides implementation of backend::mechanism_field_data.
-
-namespace arb {
-namespace gpu {
-
-fvm_value_type* backend::mechanism_field_data(arb::mechanism* mptr, const std::string& field) {
-    arb::gpu::mechanism* m = dynamic_cast<arb::gpu::mechanism*>(mptr);
-    return m? m->field_data(field): nullptr;
-}
-
-} // namespace gpu
-} // namespace arb
diff --git a/arbor/backends/gpu/fvm.hpp b/arbor/backends/gpu/fvm.hpp
index a3467f22..ef025bbf 100644
--- a/arbor/backends/gpu/fvm.hpp
+++ b/arbor/backends/gpu/fvm.hpp
@@ -32,6 +32,8 @@ struct backend {
     using array  = arb::gpu::array;
     using iarray = arb::gpu::iarray;
 
+    static constexpr arb_backend_kind kind = arb_backend_kind_gpu;
+
     static memory::host_vector<value_type> host_view(const array& v) {
         return memory::on_host(v);
     }
@@ -65,8 +67,6 @@ struct backend {
             thresholds,
             context);
     }
-
-    static value_type* mechanism_field_data(arb::mechanism* mptr, const std::string& field);
 };
 
 } // namespace gpu
diff --git a/arbor/backends/gpu/matrix_assemble.cu b/arbor/backends/gpu/matrix_assemble.cu
index ac93ef49..0f99eec5 100644
--- a/arbor/backends/gpu/matrix_assemble.cu
+++ b/arbor/backends/gpu/matrix_assemble.cu
@@ -1,6 +1,6 @@
 #include <arbor/fvm_types.hpp>
 
-#include "gpu_common.hpp"
+#include <arbor/gpu/gpu_common.hpp>
 #include "matrix_common.hpp"
 
 namespace arb {
diff --git a/arbor/backends/gpu/matrix_common.hpp b/arbor/backends/gpu/matrix_common.hpp
index 754ab337..749bed90 100644
--- a/arbor/backends/gpu/matrix_common.hpp
+++ b/arbor/backends/gpu/matrix_common.hpp
@@ -3,7 +3,7 @@
 #include <cfloat>
 #include <climits>
 
-#include "gpu_api.hpp"
+#include <arbor/gpu/gpu_api.hpp>
 
 #if defined(__CUDACC__) || defined(__HIPCC__)
 #   define HOST_DEVICE_IF_GPU __host__ __device__
diff --git a/arbor/backends/gpu/matrix_fine.cu b/arbor/backends/gpu/matrix_fine.cu
index 228d9642..ca3592f1 100644
--- a/arbor/backends/gpu/matrix_fine.cu
+++ b/arbor/backends/gpu/matrix_fine.cu
@@ -1,7 +1,7 @@
 #include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
 
-#include "gpu_api.hpp"
-#include "gpu_common.hpp"
 #include "matrix_common.hpp"
 #include "matrix_fine.hpp"
 
diff --git a/arbor/backends/gpu/matrix_solve.cu b/arbor/backends/gpu/matrix_solve.cu
index 8cbca651..576f88e9 100644
--- a/arbor/backends/gpu/matrix_solve.cu
+++ b/arbor/backends/gpu/matrix_solve.cu
@@ -1,6 +1,6 @@
 #include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_common.hpp>
 
-#include "gpu_common.hpp"
 #include "matrix_common.hpp"
 
 namespace arb {
diff --git a/arbor/backends/gpu/mechanism.cpp b/arbor/backends/gpu/mechanism.cpp
deleted file mode 100644
index b3d6af94..00000000
--- a/arbor/backends/gpu/mechanism.cpp
+++ /dev/null
@@ -1,231 +0,0 @@
-#include <algorithm>
-#include <cstddef>
-#include <cmath>
-#include <optional>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <arbor/arbexcept.hpp>
-#include <arbor/common_types.hpp>
-#include <arbor/fvm_types.hpp>
-#include <arbor/math.hpp>
-#include <arbor/mechanism.hpp>
-
-#include "memory/memory.hpp"
-#include "util/index_into.hpp"
-#include "util/maputil.hpp"
-#include "util/range.hpp"
-#include "util/span.hpp"
-
-#include "backends/gpu/mechanism.hpp"
-#include "backends/gpu/fvm.hpp"
-
-namespace arb {
-namespace gpu {
-
-using memory::make_const_view;
-using util::make_span;
-using util::ptr_by_key;
-using util::value_by_key;
-
-template <typename T>
-memory::device_view<T> device_view(T* ptr, std::size_t n) {
-    return memory::device_view<T>(ptr, n);
-}
-
-template <typename T>
-memory::const_device_view<T> device_view(const T* ptr, std::size_t n) {
-    return memory::const_device_view<T>(ptr, n);
-}
-
-// The derived class (typically generated code from modcc) holds pointers to
-// data fields. These point point to either:
-//   * shared fields read/written by all mechanisms in a cell group
-//     (e.g. the per-compartment voltage vec_c);
-//   * or mechanism specific parameter or variable fields stored inside the
-//     mechanism.
-// These pointers need to be set point inside the shared state of the cell
-// group, or into the allocated parameter/variable data block.
-//
-// The mechanism::instantiate() method takes a reference to the cell group
-// shared state and discretised cell layout information, and sets the
-// pointers. This also involves setting the pointers in the parameter pack,
-// which is used to pass pointers to CUDA kernels.
-
-void mechanism::instantiate(unsigned id,
-                            backend::shared_state& shared,
-                            const mechanism_overrides& overrides,
-                            const mechanism_layout& pos_data)
-{
-    // Assign global scalar parameters:
-
-    for (auto &kv: overrides.globals) {
-        if (auto opt_ptr = value_by_key(global_table(), kv.first)) {
-            // Take reference to corresponding derived (generated) mechanism value member.
-            value_type& global = *opt_ptr.value();
-            global = kv.second;
-        }
-        else {
-            throw arbor_internal_error("multicore/mechanism: no such mechanism global");
-        }
-    }
-
-    mult_in_place_ = !pos_data.multiplicity.empty();
-    mechanism_id_ = id;
-    width_ = pos_data.cv.size();
-
-    unsigned alignment = std::max(array::alignment(), iarray::alignment());
-    auto width_padded_ = math::round_up(width_, alignment);
-
-    // Assign non-owning views onto shared state:
-
-    mechanism_ppack* pp = ppack_ptr(); // From derived class instance.
-
-    pp->width_ = width_;
-    pp->n_detectors_ = shared.n_detector;
-
-    pp->vec_ci_   = shared.cv_to_cell.data();
-    pp->vec_di_   = shared.cv_to_intdom.data();
-    pp->vec_dt_   = shared.dt_cv.data();
-
-    pp->vec_v_    = shared.voltage.data();
-    pp->vec_i_    = shared.current_density.data();
-    pp->vec_g_    = shared.conductivity.data();
-
-    pp->temperature_degC_ = shared.temperature_degC.data();
-    pp->diam_um_ = shared.diam_um.data();
-    pp->time_since_spike_ = shared.time_since_spike.data();
-
-    auto ion_state_tbl = ion_state_table();
-    num_ions_ = ion_state_tbl.size();
-
-    for (auto i: ion_state_tbl) {
-        auto ion_binding = value_by_key(overrides.ion_rebind, i.first).value_or(i.first);
-
-        ion_state* oion = ptr_by_key(shared.ion_data, ion_binding);
-        if (!oion) {
-            throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
-        }
-
-        ion_state_view& ion_view = *i.second;
-        ion_view.current_density = oion->iX_.data();
-        ion_view.reversal_potential = oion->eX_.data();
-        ion_view.internal_concentration = oion->Xi_.data();
-        ion_view.external_concentration = oion->Xo_.data();
-        ion_view.ionic_charge = oion->charge.data();
-    }
-
-    event_stream_ptr_ = &shared.deliverable_events;
-    vec_t_ptr_    = &shared.time;
-
-    // If there are no sites (is this ever meaningful?) there is nothing more to do.
-    if (width_==0) {
-        return;
-    }
-
-    // Allocate and initialize state and parameter vectors with default values.
-    // (First sub-array of data_ is used for width_.)
-
-    auto fields = field_table();
-    std::size_t num_fields = fields.size();
-
-    data_ = array((1+num_fields)*width_padded_, NAN);
-    memory::copy(make_const_view(pos_data.weight), device_view(data_.data(), width_));
-    pp->weight_ = data_.data();
-
-    for (auto i: make_span(0, num_fields)) {
-        // Take reference to corresponding derived (generated) mechanism value pointer member.
-        fvm_value_type*& field_ptr = *std::get<1>(fields[i]);
-        field_ptr = data_.data()+(i+1)*width_padded_;
-
-        if (auto opt_value = value_by_key(field_default_table(), fields[i].first)) {
-            memory::fill(device_view(field_ptr, width_), *opt_value);
-        }
-    }
-
-    // Allocate and initialize index vectors, viz. node_index_ and any ion indices.
-    // (First sub-array of indices_ is used for node_index_, last sub-array used for multiplicity_ if it is not empty)
-
-    size_type num_elements = (mult_in_place_ ? 1 : 0) + 1 + num_ions_;
-    indices_ = iarray(num_elements*width_padded_);
-
-    auto base_ptr = indices_.data();
-
-    auto append_chunk = [&](const auto& input, auto& output) {
-        memory::copy(make_const_view(input), device_view(base_ptr, width_));
-        output = base_ptr;
-        base_ptr += width_padded_;
-    };
-
-    append_chunk(pos_data.cv, pp->node_index_);
-
-    auto ion_index_tbl = ion_index_table();
-    arb_assert(num_ions_==ion_index_tbl.size());
-
-    for (auto& [ion, ion_ptr]: ion_index_tbl) {
-        auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
-
-        ion_state* oion = ptr_by_key(shared.ion_data, ion_binding);
-
-        if (!oion) {
-            throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
-        }
-
-        auto ni = memory::on_host(oion->node_index_);
-        auto indices = util::index_into(pos_data.cv, ni);
-        std::vector<index_type> mech_ion_index(indices.begin(), indices.end());
-
-        // Take reference to derived (generated) mechanism ion index pointer.
-        append_chunk(mech_ion_index, *ion_ptr);
-    }
-
-    if (mult_in_place_) {
-        append_chunk(pos_data.multiplicity, pp->multiplicity_);
-    }
-}
-
-void mechanism::set_parameter(const std::string& key, const std::vector<fvm_value_type>& values) {
-    if (auto opt_ptr = value_by_key(field_table(), key)) {
-        if (values.size()!=width_) {
-            throw arbor_internal_error("gpu/mechanism: mechanism parameter size mismatch");
-        }
-
-        if (width_>0) {
-            // Retrieve corresponding derived (generated) mechanism value pointer member.
-            value_type* field_ptr = *opt_ptr.value();
-            memory::copy(make_const_view(values), device_view(field_ptr, width_));
-        }
-    }
-    else {
-        throw arbor_internal_error("gpu/mechanism: no such mechanism parameter");
-    }
-}
-
-fvm_value_type* mechanism::field_data(const std::string& field_var) {
-    if (auto opt_ptr = value_by_key(field_table(), field_var)) {
-        return *opt_ptr.value();
-    }
-
-    return nullptr;
-}
-
-void multiply_in_place(fvm_value_type* s, const fvm_index_type* p, int n);
-
-void mechanism::initialize() {
-    mechanism_ppack* pp = ppack_ptr();
-    pp->vec_t_ = vec_t_ptr_->data();
-
-    init();
-    auto states = state_table();
-
-    if(mult_in_place_) {
-        for (auto& state: states) {
-            multiply_in_place(*state.second, pp->multiplicity_, pp->width_);
-        }
-    }
-}
-
-
-} // namespace multicore
-} // namespace arb
diff --git a/arbor/backends/gpu/mechanism.cu b/arbor/backends/gpu/mechanism.cu
deleted file mode 100644
index f0befe09..00000000
--- a/arbor/backends/gpu/mechanism.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <arbor/fvm_types.hpp>
-#include <backends/gpu/gpu_common.hpp>
-
-namespace arb {
-namespace gpu {
-
-__global__
-void multiply_in_place_(fvm_value_type* s, const fvm_index_type* p, int n) {
-    int tid_ = threadIdx.x + blockDim.x*blockIdx.x;
-    if (tid_<n) {
-        s[tid_] *= p[tid_];
-    }
-}
-
-void multiply_in_place(fvm_value_type* s, const fvm_index_type* p, int n) {
-    unsigned block_dim = 128;
-    unsigned grid_dim = gpu::impl::block_count(n, block_dim);
-
-    multiply_in_place_<<<grid_dim, block_dim>>>(s, p, n);
-}
-
-} // namespace gpu
-} // namespace arb
diff --git a/arbor/backends/gpu/mechanism.hpp b/arbor/backends/gpu/mechanism.hpp
deleted file mode 100644
index 42daf236..00000000
--- a/arbor/backends/gpu/mechanism.hpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <cmath>
-#include <cstddef>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <arbor/common_types.hpp>
-#include <arbor/fvm_types.hpp>
-#include <arbor/mechanism.hpp>
-
-#include "backends/gpu/fvm.hpp"
-#include "backends/gpu/gpu_store_types.hpp"
-
-namespace arb {
-namespace gpu {
-
-// Base class for all generated mechanisms for gpu back-end.
-
-class mechanism: public arb::concrete_mechanism<arb::gpu::backend> {
-public:
-    void instantiate(fvm_size_type id, backend::shared_state& shared, const mechanism_overrides&, const mechanism_layout&) override;
-    void initialize() override;
-    void set_parameter(const std::string& key, const std::vector<fvm_value_type>& values) override;
-    fvm_value_type* field_data(const std::string& state_var) override;
-};
-
-} // namespace gpu
-} // namespace arb
diff --git a/arbor/backends/gpu/multi_event_stream.cu b/arbor/backends/gpu/multi_event_stream.cu
index 7e3e90c7..11a8136f 100644
--- a/arbor/backends/gpu/multi_event_stream.cu
+++ b/arbor/backends/gpu/multi_event_stream.cu
@@ -1,7 +1,7 @@
 #include <arbor/common_types.hpp>
+#include <arbor/gpu/gpu_common.hpp>
 
 #include "backends/event.hpp"
-#include "gpu_common.hpp"
 
 namespace arb {
 namespace gpu {
diff --git a/arbor/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp
index 555f9a28..b26dbea4 100644
--- a/arbor/backends/gpu/shared_state.cpp
+++ b/arbor/backends/gpu/shared_state.cpp
@@ -3,15 +3,21 @@
 
 #include <arbor/constants.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/math.hpp>
 
 #include "backends/event.hpp"
 #include "backends/gpu/gpu_store_types.hpp"
 #include "backends/gpu/shared_state.hpp"
 #include "backends/multi_event_stream_state.hpp"
 #include "memory/copy.hpp"
+#include "memory/gpu_wrappers.hpp"
 #include "memory/wrappers.hpp"
 #include "util/index_into.hpp"
 #include "util/rangeutil.hpp"
+#include "util/maputil.hpp"
+#include "util/meta.hpp"
+#include "util/range.hpp"
+#include "util/strprintf.hpp"
 
 using arb::memory::make_const_view;
 
@@ -205,6 +211,186 @@ shared_state::shared_state(
     add_scalar(temperature_degC.size(), temperature_degC.data(), -273.15);
 }
 
+namespace {
+template <typename T>
+struct chunk_writer {
+    T* end; // device ptr
+    const std::size_t stride;
+
+    chunk_writer(T* data, std::size_t stride): end(data), stride(stride) {}
+
+    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
+    T* append(Seq&& seq) {
+        arb_assert(std::size(seq)==stride);
+        return append_freely(std::forward<Seq>(seq));
+    }
+
+    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
+    T* append_freely(Seq&& seq) {
+        std::size_t n = std::size(seq);
+        memory::copy(memory::host_view<T>(const_cast<T*>(std::data(seq)), n), memory::device_view<T>(end, n));
+        auto p = end;
+        end += n;
+        return p;
+    }
+
+    T* fill(T value) {
+        memory::fill(memory::device_view<T>(end, stride), value);
+        auto p = end;
+        end += stride;
+        return p;
+    }
+};
+}
+
+void shared_state::set_parameter(mechanism& m, const std::string& key, const std::vector<arb_value_type>& values) {
+    if (values.size()!=m.ppack_.width) throw arbor_internal_error("mechanism parameter size mismatch");
+    const auto& store = storage.at(m.mechanism_id());
+
+    arb_value_type* data = nullptr;
+    for (arb_size_type i = 0; i<m.mech_.n_parameters; ++i) {
+        if (key==m.mech_.parameters[i].name) {
+            data = store.parameters_[i];
+            break;
+        }
+    }
+    if (!data) throw arbor_internal_error(util::pprintf("no such mechanism parameter '{}'", key));
+
+    if (!m.ppack_.width) return;
+    memory::copy(memory::make_const_view(values), memory::device_view<arb_value_type>(data, m.ppack_.width));
+}
+
+const arb_value_type* shared_state::mechanism_state_data(const mechanism& m, const std::string& key) {
+    const auto& store = storage.at(m.mechanism_id());
+
+    for (arb_size_type i = 0; i<m.mech_.n_state_vars; ++i) {
+        if (key==m.mech_.state_vars[i].name) {
+            return store.state_vars_[i];
+        }
+    }
+    return nullptr;
+}
+
+void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overrides& overrides, const mechanism_layout& pos_data) {
+    assert(m.iface_.backend == arb_backend_kind_gpu);
+    using util::make_range;
+    using util::make_span;
+    using util::ptr_by_key;
+    using util::value_by_key;
+
+    bool mult_in_place = !pos_data.multiplicity.empty();
+
+    // Set internal variables
+    m.time_ptr_ptr   = &time_ptr;
+
+    auto width        = pos_data.cv.size();
+    auto width_padded = math::round_up(pos_data.cv.size(), alignment);
+
+    // Assign non-owning views onto shared state:
+    m.ppack_ = {0};
+    m.ppack_.width            = width;
+    m.ppack_.mechanism_id     = id;
+    m.ppack_.vec_ci           = cv_to_cell.data();
+    m.ppack_.vec_di           = cv_to_intdom.data();
+    m.ppack_.vec_dt           = dt_cv.data();
+    m.ppack_.vec_v            = voltage.data();
+    m.ppack_.vec_i            = current_density.data();
+    m.ppack_.vec_g            = conductivity.data();
+    m.ppack_.temperature_degC = temperature_degC.data();
+    m.ppack_.diam_um          = diam_um.data();
+    m.ppack_.time_since_spike = time_since_spike.data();
+    m.ppack_.n_detectors      = n_detector;
+
+    if (storage.find(id) != storage.end()) throw arb::arbor_internal_error("Duplicate mech id in shared state");
+    auto& store = storage[id];
+
+    // Allocate view pointers
+    store.state_vars_ = std::vector<arb_value_type*>(m.mech_.n_state_vars);
+    store.parameters_ = std::vector<arb_value_type*>(m.mech_.n_parameters);
+    store.ion_states_ = std::vector<arb_ion_state>(m.mech_.n_ions);
+    store.globals_    = std::vector<arb_value_type>(m.mech_.n_globals);
+
+    // Set ion views
+    for (auto idx: make_span(m.mech_.n_ions)) {
+        auto ion = m.mech_.ions[idx].name;
+        auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
+        ion_state* oion = ptr_by_key(ion_data, ion_binding);
+        if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
+        store.ion_states_[idx] = { oion->iX_.data(), oion->eX_.data(), oion->Xi_.data(), oion->Xo_.data(), oion->charge.data(), nullptr };
+    }
+
+    // If there are no sites (is this ever meaningful?) there is nothing more to do.
+    if (width==0) return;
+
+    // Allocate and initialize state and parameter vectors with default values.
+    {
+        // Allocate bulk storage
+        std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*width_padded + m.mech_.n_globals;
+        store.data_   = array(count, NAN);
+        chunk_writer writer(store.data_.data(), width);
+
+        // First sub-array of data_ is used for weight_
+        m.ppack_.weight = writer.append(pos_data.weight);
+        // Set fields
+        for (auto idx: make_span(m.mech_.n_parameters)) {
+            store.parameters_[idx] = writer.fill(m.mech_.parameters[idx].default_value);
+        }
+        for (auto idx: make_span(m.mech_.n_state_vars)) {
+            store.state_vars_[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
+        }
+        // Assign global scalar parameters. NB: Last chunk, since it breaks the width striding.
+        for (auto idx: make_span(m.mech_.n_globals)) store.globals_[idx] = m.mech_.globals[idx].default_value;
+        for (auto& [k, v]: overrides.globals) {
+            auto found = false;
+            for (auto idx: make_span(m.mech_.n_globals)) {
+                if (m.mech_.globals[idx].name == k) {
+                    store.globals_[idx] = v;
+                    found = true;
+                    break;
+                }
+            }
+            if (!found) throw arbor_internal_error(util::pprintf("gpu/mechanism: no such mechanism global '{}'", k));
+        }
+        m.ppack_.globals = writer.append_freely(store.globals_);
+    }
+
+    // Allocate and initialize index vectors, viz. node_index_ and any ion indices.
+    {
+        // Allocate bulk storage
+        std::size_t count = mult_in_place + m.mech_.n_ions + 1;
+        store.indices_ = iarray(count*width_padded);
+        chunk_writer writer(store.indices_.data(), width);
+
+        // Setup node indices
+        m.ppack_.node_index = writer.append(pos_data.cv);
+        // Create ion indices
+        for (auto idx: make_span(m.mech_.n_ions)) {
+            auto  ion = m.mech_.ions[idx].name;
+            // Index into shared_state respecting ion rebindings
+            auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
+            ion_state* oion = ptr_by_key(ion_data, ion_binding);
+            if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
+            // Obtain index and move data
+            auto ni = memory::on_host(oion->node_index_);
+            auto indices = util::index_into(pos_data.cv, ni);
+            std::vector<arb_index_type> mech_ion_index(indices.begin(), indices.end());
+            store.ion_states_[idx].index = writer.append(mech_ion_index);
+        }
+
+        m.ppack_.multiplicity = mult_in_place? writer.append(pos_data.multiplicity): nullptr;
+    }
+
+    // Shift data to GPU, set up pointers
+    store.parameters_d_ = memory::on_gpu(store.parameters_);
+    m.ppack_.parameters = store.parameters_d_.data();
+
+    store.state_vars_d_ = memory::on_gpu(store.state_vars_);
+    m.ppack_.state_vars = store.state_vars_d_.data();
+
+    store.ion_states_d_ = memory::on_gpu(store.ion_states_);
+    m.ppack_.ion_states = store.ion_states_d_.data();
+}
+
 void shared_state::add_ion(
     const std::string& ion_name,
     int charge,
diff --git a/arbor/backends/gpu/shared_state.cu b/arbor/backends/gpu/shared_state.cu
index c01376ff..8fdd161b 100644
--- a/arbor/backends/gpu/shared_state.cu
+++ b/arbor/backends/gpu/shared_state.cu
@@ -5,8 +5,8 @@
 #include <backends/event.hpp>
 #include <backends/multi_event_stream_state.hpp>
 
-#include "gpu_api.hpp"
-#include "gpu_common.hpp"
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
 
 namespace arb {
 namespace gpu {
diff --git a/arbor/backends/gpu/shared_state.hpp b/arbor/backends/gpu/shared_state.hpp
index 695821fa..d74029f0 100644
--- a/arbor/backends/gpu/shared_state.hpp
+++ b/arbor/backends/gpu/shared_state.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <cstddef>
 #include <iosfwd>
 #include <unordered_map>
 #include <utility>
@@ -99,6 +100,20 @@ struct istim_state {
 };
 
 struct shared_state {
+    struct mech_storage {
+        array data_;
+        iarray indices_;
+        std::vector<arb_value_type>  globals_;
+        std::vector<arb_value_type*> parameters_;
+        std::vector<arb_value_type*> state_vars_;
+        std::vector<arb_ion_state>   ion_states_;
+        memory::device_vector<arb_value_type*> parameters_d_;
+        memory::device_vector<arb_value_type*> state_vars_d_;
+        memory::device_vector<arb_ion_state>   ion_states_d_;
+    };
+
+    static constexpr std::size_t alignment = std::max(array::alignment(), iarray::alignment());
+
     fvm_size_type n_intdom = 0;   // Number of distinct integration domains.
     fvm_size_type n_detector = 0; // Max number of detectors on all cells.
     fvm_size_type n_cv = 0;       // Total number of CVs.
@@ -122,9 +137,12 @@ struct shared_state {
     array time_since_spike;   // Stores time since last spike on any detector, organized by cell.
     iarray src_to_spike;      // Maps spike source index to spike index
 
+    arb_value_type* time_ptr;
+
     istim_state stim_data;
     std::unordered_map<std::string, ion_state> ion_data;
     deliverable_event_stream deliverable_events;
+    std::unordered_map<unsigned, mech_storage> storage;
 
     shared_state() = default;
 
@@ -139,9 +157,17 @@ struct shared_state {
         const std::vector<fvm_value_type>& temperature_K,
         const std::vector<fvm_value_type>& diam,
         const std::vector<fvm_index_type>& src_to_spike,
-        unsigned align
+        unsigned // align parameter ignored
     );
 
+    // Setup a mechanism and tie its backing store to this object
+    void instantiate(arb::mechanism&, unsigned, const mechanism_overrides&, const mechanism_layout&);
+
+    void set_parameter(mechanism&, const std::string&, const std::vector<arb_value_type>&);
+
+    // Note: returned pointer points to device memory.
+    const arb_value_type* mechanism_state_data(const mechanism& m, const std::string& key);
+
     void add_ion(
         const std::string& ion_name,
         int charge,
diff --git a/arbor/backends/gpu/stack_cu.hpp b/arbor/backends/gpu/stack_cu.hpp
index 24b9df7e..b1938681 100644
--- a/arbor/backends/gpu/stack_cu.hpp
+++ b/arbor/backends/gpu/stack_cu.hpp
@@ -1,6 +1,6 @@
 #pragma once
 
-#include "gpu_common.hpp"
+#include <arbor/gpu/gpu_common.hpp>
 #include "stack_storage.hpp"
 
 namespace arb {
diff --git a/arbor/backends/gpu/stimulus.cu b/arbor/backends/gpu/stimulus.cu
index a127128c..ea726295 100644
--- a/arbor/backends/gpu/stimulus.cu
+++ b/arbor/backends/gpu/stimulus.cu
@@ -1,10 +1,10 @@
 #include <cmath>
 
 #include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
+#include <arbor/gpu/math_cu.hpp>
 
-#include "backends/gpu/gpu_api.hpp"
-#include "backends/gpu/gpu_common.hpp"
-#include "backends/gpu/math_cu.hpp"
 #include "backends/gpu/stimulus.hpp"
 
 namespace arb {
@@ -55,7 +55,7 @@ void istim_add_current_impl(int n, istim_pp pp) {
 void istim_add_current_impl(int n, const istim_pp& pp) {
     constexpr unsigned block_dim = 128;
     const unsigned grid_dim = impl::block_count(n, block_dim);
-
+    if (!grid_dim) return;
     kernel::istim_add_current_impl<<<grid_dim, block_dim>>>(n, pp);
 }
 
diff --git a/arbor/backends/gpu/threshold_watcher.cu b/arbor/backends/gpu/threshold_watcher.cu
index 553f85e4..6748a482 100644
--- a/arbor/backends/gpu/threshold_watcher.cu
+++ b/arbor/backends/gpu/threshold_watcher.cu
@@ -1,9 +1,9 @@
 #include <cmath>
 
 #include <arbor/fvm_types.hpp>
+#include <arbor/gpu/math_cu.hpp>
 
 #include "backends/threshold_crossing.hpp"
-#include "math_cu.hpp"
 #include "stack_cu.hpp"
 
 namespace arb {
diff --git a/arbor/backends/multicore/fvm.cpp b/arbor/backends/multicore/fvm.cpp
deleted file mode 100644
index 2432d43d..00000000
--- a/arbor/backends/multicore/fvm.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#include <string>
-
-#include <arbor/mechanism.hpp>
-#include "fvm.hpp"
-#include "mechanism.hpp"
-
-// Provides implementation of backend::mechanism_field_data.
-
-namespace arb {
-namespace multicore {
-
-fvm_value_type* backend::mechanism_field_data(arb::mechanism* mptr, const std::string& field) {
-    arb::multicore::mechanism* m = dynamic_cast<arb::multicore::mechanism*>(mptr);
-    return m? m->field_data(field): nullptr;
-}
-
-} // namespace multicore
-} // namespace arb
diff --git a/arbor/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp
index 1920f801..6e21311d 100644
--- a/arbor/backends/multicore/fvm.hpp
+++ b/arbor/backends/multicore/fvm.hpp
@@ -30,6 +30,8 @@ struct backend {
     using array  = arb::multicore::array;
     using iarray = arb::multicore::iarray;
 
+    static constexpr arb_backend_kind kind = arb_backend_kind_cpu;
+
     static util::range<const value_type*> host_view(const array& v) {
         return util::range_pointer_view(v);
     }
diff --git a/arbor/backends/multicore/mechanism.cpp b/arbor/backends/multicore/mechanism.cpp
deleted file mode 100644
index 0a5ee29b..00000000
--- a/arbor/backends/multicore/mechanism.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-#include <algorithm>
-#include <cstddef>
-#include <cmath>
-#include <optional>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <arbor/fvm_types.hpp>
-#include <arbor/common_types.hpp>
-#include <arbor/math.hpp>
-#include <arbor/mechanism.hpp>
-
-#include "util/index_into.hpp"
-#include "util/maputil.hpp"
-#include "util/padded_alloc.hpp"
-#include "util/range.hpp"
-#include "util/rangeutil.hpp"
-
-#include "backends/multicore/mechanism.hpp"
-#include "backends/multicore/multicore_common.hpp"
-#include "backends/multicore/fvm.hpp"
-#include "backends/multicore/partition_by_constraint.hpp"
-
-namespace arb {
-namespace multicore {
-
-using util::make_range;
-using util::ptr_by_key;
-using util::value_by_key;
-
-// The derived class (typically generated code from modcc) holds pointers that need
-// to be set to point inside the shared state, or into the allocated parameter/variable
-// data block.
-//
-// In ths SIMD case, there may be a 'tail' of values that correspond to a partial
-// SIMD value when the width is not a multiple of the SIMD data width. In this
-// implementation we do not use SIMD masking to avoid tail values, but instead
-// extend the vectors to a multiple of the SIMD width: sites/CVs corresponding to
-// these past-the-end values are given a weight of zero, and any corresponding
-// indices into shared state point to the last valid slot.
-
-void mechanism::instantiate(unsigned id, backend::shared_state& shared, const mechanism_overrides& overrides, const mechanism_layout& pos_data) {
-    using util::make_range;
-
-    // Assign global scalar parameters:
-
-    for (auto &kv: overrides.globals) {
-        if (auto opt_ptr = value_by_key(global_table(), kv.first)) {
-            // Take reference to corresponding derived (generated) mechanism value member.
-            fvm_value_type& global = *opt_ptr.value();
-            global = kv.second;
-        }
-        else {
-            throw arbor_internal_error("multicore/mechanism: no such mechanism global");
-        }
-    }
-
-    mult_in_place_ = !pos_data.multiplicity.empty();
-    util::padded_allocator<> pad(shared.alignment);
-    mechanism_id_ = id;
-    width_ = pos_data.cv.size();
-
-    // Assign non-owning views onto shared state:
-    auto pp = (arb::multicore::mechanism_ppack*) ppack_ptr();
-
-    pp->width_  = width_;
-    pp->vec_ci_ = shared.cv_to_cell.data();
-    pp->vec_di_ = shared.cv_to_intdom.data();
-    pp->vec_dt_ = shared.dt_cv.data();
-
-    pp->vec_v_  = shared.voltage.data();
-    pp->vec_i_  = shared.current_density.data();
-    pp->vec_g_  = shared.conductivity.data();
-
-    pp->temperature_degC_ = shared.temperature_degC.data();
-    pp->diam_um_  = shared.diam_um.data();
-    pp->time_since_spike_ = shared.time_since_spike.data();
-
-    pp->n_detectors_ = shared.n_detector;
-
-    auto ion_state_tbl = ion_state_table();
-    num_ions_ = ion_state_tbl.size();
-    for (auto i: ion_state_tbl) {
-        auto ion_binding = value_by_key(overrides.ion_rebind, i.first).value_or(i.first);
-
-        ion_state* oion = ptr_by_key(shared.ion_data, ion_binding);
-        if (!oion) {
-            throw arbor_internal_error("multicore/mechanism: mechanism holds ion with no corresponding shared state");
-        }
-
-        ion_state_view& ion_view = *i.second;
-        ion_view.current_density = oion->iX_.data();
-        ion_view.reversal_potential = oion->eX_.data();
-        ion_view.internal_concentration = oion->Xi_.data();
-        ion_view.external_concentration = oion->Xo_.data();
-        ion_view.ionic_charge = oion->charge.data();
-    }
-
-    vec_t_ptr_        = &shared.time;
-    event_stream_ptr_ = &shared.deliverable_events;
-
-    // If there are no sites (is this ever meaningful?) there is nothing more to do.
-    if (width_==0) {
-        return;
-    }
-
-    // Extend width to account for requisite SIMD padding.
-    width_padded_ = math::round_up(width_, shared.alignment);
-
-    // Allocate and initialize state and parameter vectors with default values.
-
-    auto fields = field_table();
-    std::size_t n_field = fields.size();
-
-    // (First sub-array of data_ is used for weight_, below.)
-    data_ = array((1+n_field)*width_padded_, NAN, pad);
-    for (std::size_t i = 0; i<n_field; ++i) {
-        // Take reference to corresponding derived (generated) mechanism value pointer member.
-        fvm_value_type*& field_ptr = *(fields[i].second);
-        field_ptr = data_.data()+(i+1)*width_padded_;
-        if (auto opt_value = value_by_key(field_default_table(), fields[i].first)) {
-            std::fill(field_ptr, field_ptr+width_padded_, *opt_value);
-        }
-    }
-    pp->weight_ = data_.data();
-
-    // Allocate and copy local state: weight, node indices, ion indices.
-    // The tail comprises those elements between width_ and width_padded_:
-    //
-    // * For entries in the padded tail of weight_, set weight to zero.
-    // * For indices in the padded tail of node_index_, set index to last valid CV index.
-    // * For indices in the padded tail of ion index maps, set index to last valid ion index.
-
-    util::copy_extend(pos_data.weight, make_range(data_.data(), data_.data()+width_padded_), 0);
-
-    // Make index bulk storage
-    {
-        auto table = ion_index_table();
-        // Allocate bulk storage
-        auto count = table.size() + 1 + (mult_in_place_ ? 1 : 0);
-        indices_ = iarray(count*width_padded_, 0, pad);
-        auto base_ptr = indices_.data();
-
-        auto append_chunk = [&](const auto& input, auto& output, const auto& pad) {
-            copy_extend(input, make_range(base_ptr, base_ptr + width_padded_), pad);
-            output = base_ptr;
-            base_ptr += width_padded_;
-        };
-
-        // Setup node indices
-        append_chunk(pos_data.cv, pp->node_index_, pos_data.cv.back());
-
-        auto node_index = make_range(pp->node_index_, pp->node_index_ + width_padded_);
-        pp->index_constraints_ = make_constraint_partition(node_index, width_, simd_width());
-
-        // Create ion indices
-        for (const auto& [ion_name, ion_index_ptr]: table) {
-            // Index into shared_state respecting ion rebindings
-            auto ion_binding = value_by_key(overrides.ion_rebind, ion_name).value_or(ion_name);
-            ion_state* oion = ptr_by_key(shared.ion_data, ion_binding);
-            if (!oion) {
-                throw arbor_internal_error("multicore/mechanism: mechanism holds ion with no corresponding shared state");
-            }
-            // Obtain index and move data
-            auto indices = util::index_into(node_index, oion->node_index_);
-            append_chunk(indices, *ion_index_ptr, util::back(indices));
-
-            // Check SIMD constraints
-            auto ion_index = make_range(*ion_index_ptr, *ion_index_ptr + width_padded_);
-            arb_assert(compatible_index_constraints(node_index, ion_index, simd_width()));
-        }
-
-        if (mult_in_place_) {
-            append_chunk(pos_data.multiplicity, pp->multiplicity_, 0);
-        }
-    }
-}
-
-void mechanism::set_parameter(const std::string& key, const std::vector<fvm_value_type>& values) {
-    if (auto opt_ptr = value_by_key(field_table(), key)) {
-        if (values.size()!=width_) {
-            throw arbor_internal_error("multicore/mechanism: mechanism parameter size mismatch");
-        }
-
-        if (width_>0) {
-            // Retrieve corresponding derived (generated) mechanism value pointer member.
-            fvm_value_type* field_ptr = *opt_ptr.value();
-            util::range<fvm_value_type*> field(field_ptr, field_ptr+width_padded_);
-
-            copy_extend(values, field, values.back());
-        }
-    }
-    else {
-        throw arbor_internal_error("multicore/mechanism: no such mechanism parameter");
-    }
-}
-
-void mechanism::initialize() {
-    auto pp_ptr = ppack_ptr();
-    pp_ptr->vec_t_ = vec_t_ptr_->data();
-    init();
-
-    auto states = state_table();
-
-    if (mult_in_place_) {
-        for (auto& state: states) {
-            for (std::size_t j = 0; j < width_; ++j) {
-                (*state.second)[j] *= pp_ptr->multiplicity_[j];
-            }
-        }
-    }
-}
-
-fvm_value_type* mechanism::field_data(const std::string& field_var) {
-    if (auto opt_ptr = value_by_key(field_table(), field_var)) {
-        return *opt_ptr.value();
-    }
-
-    return nullptr;
-}
-
-
-} // namespace multicore
-} // namespace arb
diff --git a/arbor/backends/multicore/mechanism.hpp b/arbor/backends/multicore/mechanism.hpp
deleted file mode 100644
index 3a080369..00000000
--- a/arbor/backends/multicore/mechanism.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <cmath>
-#include <cstddef>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include <arbor/common_types.hpp>
-#include <arbor/fvm_types.hpp>
-#include <arbor/mechanism.hpp>
-#include <arbor/mechanism_ppack.hpp>
-
-#include "backends/multicore/fvm.hpp"
-#include "backends/multicore/multicore_common.hpp"
-#include "backends/multicore/partition_by_constraint.hpp"
-
-namespace arb {
-namespace multicore {
-
-// Parameter pack extended for multicore.
-struct mechanism_ppack: arb::mechanism_ppack {
-    constraint_partition index_constraints_;    // Per-mechanism index and weight data, excepting ion indices.
-};
-
-// Base class for all generated mechanisms for multicore back-end.
-class mechanism: public arb::concrete_mechanism<arb::multicore::backend> {
-public:
-    void instantiate(fvm_size_type id, backend::shared_state& shared, const mechanism_overrides&, const mechanism_layout&) override;
-    void initialize() override;
-    void set_parameter(const std::string& key, const std::vector<fvm_value_type>& values) override;
-    fvm_value_type* field_data(const std::string& state_var) override;
-
-protected:
-    virtual unsigned simd_width() const { return 1; }
-    fvm_size_type width_padded_ = 0;            // Width rounded up to multiple of pad/alignment.
-};
-
-} // namespace multicore
-} // namespace arb
diff --git a/arbor/backends/multicore/partition_by_constraint.hpp b/arbor/backends/multicore/partition_by_constraint.hpp
index 34b28b37..7ec1f2ce 100644
--- a/arbor/backends/multicore/partition_by_constraint.hpp
+++ b/arbor/backends/multicore/partition_by_constraint.hpp
@@ -73,21 +73,22 @@ index_constraint idx_constraint(It it, unsigned simd_width) {
 
 template <typename T>
 constraint_partition make_constraint_partition(const T& node_index, unsigned width, unsigned simd_width) {
-
     constraint_partition part;
-    for (unsigned i = 0; i < width; i+= simd_width) {
-        auto ptr = &node_index[i];
-        if (is_contiguous_n(ptr, simd_width)) {
-            part.contiguous.push_back(i);
-        }
-        else if (is_constant_n(ptr, simd_width)) {
-            part.constant.push_back(i);
-        }
-        else if (is_independent_n(ptr, simd_width)) {
-            part.independent.push_back(i);
-        }
-        else {
-            part.none.push_back(i);
+    if (simd_width) {
+        for (unsigned i = 0; i < width; i+= simd_width) {
+            auto ptr = &node_index[i];
+            if (is_contiguous_n(ptr, simd_width)) {
+                part.contiguous.push_back(i);
+            }
+            else if (is_constant_n(ptr, simd_width)) {
+                part.constant.push_back(i);
+            }
+            else if (is_independent_n(ptr, simd_width)) {
+                part.independent.push_back(i);
+            }
+            else {
+                part.none.push_back(i);
+            }
         }
     }
     return part;
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index 442da337..befda35a 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -12,6 +12,7 @@
 #include <arbor/constants.hpp>
 #include <arbor/fvm_types.hpp>
 #include <arbor/math.hpp>
+#include <arbor/mechanism.hpp>
 #include <arbor/simd/simd.hpp>
 
 #include "backends/event.hpp"
@@ -19,6 +20,8 @@
 #include "util/index_into.hpp"
 #include "util/padded_alloc.hpp"
 #include "util/rangeutil.hpp"
+#include "util/maputil.hpp"
+#include "util/range.hpp"
 
 #include "multi_event_stream.hpp"
 #include "multicore_common.hpp"
@@ -27,6 +30,11 @@
 namespace arb {
 namespace multicore {
 
+using util::make_range;
+using util::make_span;
+using util::ptr_by_key;
+using util::value_by_key;
+
 constexpr unsigned vector_length = (unsigned) simd::simd_abi::native_width<fvm_value_type>::value;
 using simd_value_type = simd::simd<fvm_value_type, vector_length, simd::simd_abi::default_abi>;
 using simd_index_type = simd::simd<fvm_index_type, vector_length, simd::simd_abi::default_abi>;
@@ -45,8 +53,6 @@ inline unsigned min_alignment(unsigned align) {
 
 using pad = util::padded_allocator<>;
 
-// ion_state methods:
-
 ion_state::ion_state(
     int charge,
     const fvm_ion_config& ion_data,
@@ -215,6 +221,8 @@ shared_state::shared_state(
     src_to_spike(src_to_spike.begin(), src_to_spike.end(), pad(alignment)),
     deliverable_events(n_intdom)
 {
+    time_ptr = time.data();
+
     // For indices in the padded tail of cv_to_intdom, set index to last valid intdom index.
     if (n_cv>0) {
         std::copy(cv_to_intdom_vec.begin(), cv_to_intdom_vec.end(), cv_to_intdom.begin());
@@ -388,5 +396,205 @@ std::ostream& operator<<(std::ostream& out, const shared_state& s) {
     return out;
 }
 
+namespace {
+template <typename T>
+struct chunk_writer {
+    T* end;
+    const std::size_t stride;
+
+    chunk_writer(T* data, std::size_t stride):
+        end(data), stride(stride) {}
+
+    template <typename Seq>
+    T* append(const Seq& seq, T pad) {
+        auto p = end;
+        copy_extend(seq, util::make_range(p, end+=stride), pad);
+        return p;
+    }
+
+    T* fill(T value) {
+        auto p = end;
+        std::fill(p, end+=stride, value);
+        return p;
+    }
+};
+
+template <typename V>
+std::size_t extend_width(const arb::mechanism& mech, std::size_t width) {
+    // Width has to accommodate mechanism alignment and SIMD width.
+    std::size_t m = std::lcm(mech.data_alignment(), mech.iface_.partition_width*sizeof(V))/sizeof(V);
+    return math::round_up(width, m);
+}
+} // anonymous namespace
+
+void shared_state::set_parameter(mechanism& m, const std::string& key, const std::vector<arb_value_type>& values) {
+    if (values.size()!=m.ppack_.width) throw arbor_internal_error("mechanism parameter size mismatch");
+
+    arb_value_type* data = nullptr;
+    for (arb_size_type i = 0; i<m.mech_.n_parameters; ++i) {
+        if (key==m.mech_.parameters[i].name) {
+            data = m.ppack_.parameters[i];
+            break;
+        }
+    }
+    if (!data) throw arbor_internal_error(util::pprintf("no such mechanism parameter '{}'", key));
+
+    if (!m.ppack_.width) return;
+    auto width_padded = extend_width<arb_value_type>(m, m.ppack_.width);
+    copy_extend(values, util::range_n(data, width_padded), values.back());
+}
+
+const arb_value_type* shared_state::mechanism_state_data(const mechanism& m, const std::string& key) {
+    for (arb_size_type i = 0; i<m.mech_.n_state_vars; ++i) {
+        if (key==m.mech_.state_vars[i].name) {
+            return m.ppack_.state_vars[i];
+        }
+    }
+    return nullptr;
+}
+
+// The derived class (typically generated code from modcc) holds pointers that need
+// to be set to point inside the shared state, or into the allocated parameter/variable
+// data block.
+//
+// In ths SIMD case, there may be a 'tail' of values that correspond to a partial
+// SIMD value when the width is not a multiple of the SIMD data width. In this
+// implementation we do not use SIMD masking to avoid tail values, but instead
+// extend the vectors to a multiple of the SIMD width: sites/CVs corresponding to
+// these past-the-end values are given a weight of zero, and any corresponding
+// indices into shared state point to the last valid slot.
+// The tail comprises those elements between width_ and width_padded_:
+//
+// * For entries in the padded tail of weight_, set weight to zero.
+// * For indices in the padded tail of node_index_, set index to last valid CV index.
+// * For indices in the padded tail of ion index maps, set index to last valid ion index.
+
+void shared_state::instantiate(arb::mechanism& m, unsigned id, const mechanism_overrides& overrides, const mechanism_layout& pos_data) {
+    // Mechanism indices and data require:
+    // * an alignment that is a multiple of the mechansim data_alignment();
+    // * a size which is a multiple of partition_width() for SIMD access.
+    //
+    // We used the padded_allocator to allocate arrays with the correct alignment, and allocate
+    // sizes that are multiples of a width padded to account for SIMD access and per-vector alignment.
+
+    util::padded_allocator<> pad(m.data_alignment());
+
+    // Set internal variables
+    m.time_ptr_ptr   = &time_ptr;
+
+    // Assign non-owning views onto shared state:
+    m.ppack_ = {0};
+    m.ppack_.width            = pos_data.cv.size();
+    m.ppack_.mechanism_id     = id;
+    m.ppack_.vec_ci           = cv_to_cell.data();
+    m.ppack_.vec_di           = cv_to_intdom.data();
+    m.ppack_.vec_dt           = dt_cv.data();
+    m.ppack_.vec_v            = voltage.data();
+    m.ppack_.vec_i            = current_density.data();
+    m.ppack_.vec_g            = conductivity.data();
+    m.ppack_.temperature_degC = temperature_degC.data();
+    m.ppack_.diam_um          = diam_um.data();
+    m.ppack_.time_since_spike = time_since_spike.data();
+    m.ppack_.n_detectors      = n_detector;
+    m.ppack_.events           = {};
+    m.ppack_.vec_t            = nullptr;
+
+    bool mult_in_place = !pos_data.multiplicity.empty();
+
+    if (storage.find(id) != storage.end()) throw arb::arbor_internal_error("Duplicate mech id in shared state");
+    auto& store = storage[id];
+
+    // Allocate view pointers (except globals!)
+    store.state_vars_.resize(m.mech_.n_state_vars); m.ppack_.state_vars = store.state_vars_.data();
+    store.parameters_.resize(m.mech_.n_parameters); m.ppack_.parameters = store.parameters_.data();
+    store.ion_states_.resize(m.mech_.n_ions);       m.ppack_.ion_states = store.ion_states_.data();
+
+    // Set ion views
+    for (auto idx: make_span(m.mech_.n_ions)) {
+        auto ion = m.mech_.ions[idx].name;
+        auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
+        ion_state* oion = ptr_by_key(ion_data, ion_binding);
+        if (!oion) throw arbor_internal_error(util::pprintf("multicore/mechanism: mechanism holds ion '{}' with no corresponding shared state", ion));
+        m.ppack_.ion_states[idx] = { oion->iX_.data(), oion->eX_.data(), oion->Xi_.data(), oion->Xo_.data(), oion->charge.data() };
+    }
+
+    // If there are no sites (is this ever meaningful?) there is nothing more to do.
+    if (m.ppack_.width==0) return;
+
+    // Initialize state and parameter vectors with default values.
+    {
+        // Allocate bulk storage
+        std::size_t value_width_padded = extend_width<arb_value_type>(m, pos_data.cv.size());
+        std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*value_width_padded + m.mech_.n_globals;
+        store.data_ = array(count, NAN, pad);
+        chunk_writer writer(store.data_.data(), value_width_padded);
+
+        // First sub-array of data_ is used for weight_
+        m.ppack_.weight = writer.append(pos_data.weight, 0);
+        // Set fields
+        for (auto idx: make_span(m.mech_.n_parameters)) {
+            m.ppack_.parameters[idx] = writer.fill(m.mech_.parameters[idx].default_value);
+        }
+        for (auto idx: make_span(m.mech_.n_state_vars)) {
+            m.ppack_.state_vars[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
+        }
+
+        // Assign global scalar parameters
+        m.ppack_.globals = writer.end;
+        for (auto idx: make_span(m.mech_.n_globals)) {
+            m.ppack_.globals[idx] = m.mech_.globals[idx].default_value;
+        }
+        for (auto& [k, v]: overrides.globals) {
+            auto found = false;
+            for (auto idx: make_span(m.mech_.n_globals)) {
+                if (m.mech_.globals[idx].name == k) {
+                    m.ppack_.globals[idx] = v;
+                    found = true;
+                    break;
+                }
+            }
+            if (!found) throw arbor_internal_error(util::pprintf("multicore/mechanism: no such mechanism global '{}'", k));
+        }
+        store.globals_ = std::vector<arb_value_type>(m.ppack_.globals, m.ppack_.globals + m.mech_.n_globals);
+    }
+
+    // Make index bulk storage
+    {
+        // Allocate bulk storage
+        std::size_t index_width_padded = extend_width<arb_index_type>(m, pos_data.cv.size());
+        std::size_t count = mult_in_place + m.mech_.n_ions + 1;
+        store.indices_ = iarray(count*index_width_padded, 0, pad);
+        chunk_writer writer(store.indices_.data(), index_width_padded);
+        // Setup node indices
+        m.ppack_.node_index = writer.append(pos_data.cv, pos_data.cv.back());
+
+        auto node_index = util::range_n(m.ppack_.node_index, index_width_padded);
+        // Make SIMD index constraints and set the view
+        store.constraints_ = make_constraint_partition(node_index, m.ppack_.width, m.iface_.partition_width);
+        m.ppack_.index_constraints.contiguous    = store.constraints_.contiguous.data();
+        m.ppack_.index_constraints.constant      = store.constraints_.constant.data();
+        m.ppack_.index_constraints.independent   = store.constraints_.independent.data();
+        m.ppack_.index_constraints.none          = store.constraints_.none.data();
+        m.ppack_.index_constraints.n_contiguous  = store.constraints_.contiguous.size();
+        m.ppack_.index_constraints.n_constant    = store.constraints_.constant.size();
+        m.ppack_.index_constraints.n_independent = store.constraints_.independent.size();
+        m.ppack_.index_constraints.n_none        = store.constraints_.none.size();
+        // Create ion indices
+        for (auto idx: make_span(m.mech_.n_ions)) {
+            auto  ion = m.mech_.ions[idx].name;
+            // Index into shared_state respecting ion rebindings
+            auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
+            ion_state* oion = ptr_by_key(ion_data, ion_binding);
+            if (!oion) throw arbor_internal_error(util::pprintf("multicore/mechanism: mechanism holds ion '{}' with no corresponding shared state ", ion));
+            // Obtain index and move data
+            auto indices = util::index_into(node_index, oion->node_index_);
+            m.ppack_.ion_states[idx].index = writer.append(indices, util::back(indices));
+            // Check SIMD constraints
+            arb_assert(compatible_index_constraints(node_index, util::range_n(m.ppack_.ion_states[idx].index, index_width_padded), m.iface_.partition_width));
+        }
+        if (mult_in_place) m.ppack_.multiplicity = writer.append(pos_data.multiplicity, 0);
+    }
+}
+
 } // namespace multicore
 } // namespace arb
diff --git a/arbor/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp
index 54c0424b..d913ca9d 100644
--- a/arbor/backends/multicore/shared_state.hpp
+++ b/arbor/backends/multicore/shared_state.hpp
@@ -19,9 +19,9 @@
 #include "matrix_state.hpp"
 #include "multi_event_stream.hpp"
 #include "threshold_watcher.hpp"
-
 #include "fvm_layout.hpp"
 #include "multicore_common.hpp"
+#include "partition_by_constraint.hpp"
 
 namespace arb {
 namespace multicore {
@@ -106,6 +106,16 @@ struct istim_state {
 };
 
 struct shared_state {
+    struct mech_storage {
+        array data_;
+        iarray indices_;
+        constraint_partition constraints_;
+        std::vector<arb_value_type>  globals_;
+        std::vector<arb_value_type*> parameters_;
+        std::vector<arb_value_type*> state_vars_;
+        std::vector<arb_ion_state>   ion_states_;
+    };
+
     unsigned alignment = 1;   // Alignment and padding multiple.
     util::padded_allocator<> alloc;  // Allocator with corresponging alignment/padding.
 
@@ -116,7 +126,7 @@ struct shared_state {
 
     iarray cv_to_intdom;      // Maps CV index to integration domain index.
     iarray cv_to_cell;        // Maps CV index to the first spike
-    gjarray  gap_junctions;   // Stores gap_junction info.
+    gjarray gap_junctions;   // Stores gap_junction info.
     array time;               // Maps intdom index to integration start time [ms].
     array time_to;            // Maps intdom index to integration stop time [ms].
     array dt_intdom;          // Maps  index to (stop time) - (start time) [ms].
@@ -132,9 +142,12 @@ struct shared_state {
     array time_since_spike;   // Stores time since last spike on any detector, organized by cell.
     iarray src_to_spike;      // Maps spike source index to spike index
 
+    arb_value_type* time_ptr;
+
     istim_state stim_data;
     std::unordered_map<std::string, ion_state> ion_data;
     deliverable_event_stream deliverable_events;
+    std::unordered_map<unsigned, mech_storage> storage;
 
     shared_state() = default;
 
@@ -152,6 +165,12 @@ struct shared_state {
         unsigned align
     );
 
+    void instantiate(mechanism&, unsigned, const mechanism_overrides&, const mechanism_layout&);
+
+    void set_parameter(mechanism&, const std::string&, const std::vector<arb_value_type>&);
+
+    const arb_value_type* mechanism_state_data(const mechanism&, const std::string&);
+
     void add_ion(
         const std::string& ion_name,
         int charge,
diff --git a/arbor/fvm_layout.cpp b/arbor/fvm_layout.cpp
index d8a4ea8b..3261726f 100644
--- a/arbor/fvm_layout.cpp
+++ b/arbor/fvm_layout.cpp
@@ -860,7 +860,7 @@ fvm_mechanism_data fvm_build_mechanism_data(const cable_cell_global_properties&
 
         std::vector<double> param_dflt;
         fvm_mechanism_config config;
-        config.kind = mechanismKind::density;
+        config.kind = arb_mechanism_kind_density;
 
         std::vector<std::string> param_names;
         assign(param_names, util::keys(info.parameters));
@@ -1044,7 +1044,7 @@ fvm_mechanism_data fvm_build_mechanism_data(const cable_cell_global_properties&
         bool coalesce = catalogue[name].linear && gprop.coalesce_synapses;
 
         fvm_mechanism_config config;
-        config.kind = mechanismKind::point;
+        config.kind = arb_mechanism_kind_point;
         for (auto& kv: info.parameters) {
             config.param_values.emplace_back(kv.first, std::vector<value_type>{});
             if (!coalesce) {
@@ -1248,7 +1248,7 @@ fvm_mechanism_data fvm_build_mechanism_data(const cable_cell_global_properties&
                 }
                 else {
                     fvm_mechanism_config config;
-                    config.kind = mechanismKind::revpot;
+                    config.kind = arb_mechanism_kind_reversal_potential;
                     config.cv = M.ions[ion].cv;
                     config.norm_area.assign(config.cv.size(), 1.);
 
diff --git a/arbor/fvm_layout.hpp b/arbor/fvm_layout.hpp
index 88cf14dd..6d39d93e 100644
--- a/arbor/fvm_layout.hpp
+++ b/arbor/fvm_layout.hpp
@@ -226,7 +226,7 @@ struct fvm_mechanism_config {
     using value_type = fvm_value_type;
     using index_type = fvm_index_type;
 
-    mechanismKind kind;
+    arb_mechanism_kind kind;
 
     // Ordered CV indices where mechanism is present; may contain
     // duplicates for point mechanisms.
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index d427ad39..90206ad4 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -242,7 +242,13 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         state_->zero_currents();
         PL();
         for (auto& m: mechanisms_) {
-            m->deliver_events();
+            auto state = state_->deliverable_events.marked_events();
+            arb_deliverable_event_stream events;
+            events.n_streams = state.n;
+            events.begin     = state.begin_offset;
+            events.end       = state.end_offset;
+            events.events    = (arb_deliverable_event_data*) state.ev_data; // FIXME(TH): This relies on bit-castability
+            m->deliver_events(events);
             m->update_current();
         }
 
@@ -312,6 +318,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         PL();
 
         std::swap(state_->time_to, state_->time);
+        state_->time_ptr = state_->time.data();
 
         // Check for non-physical solutions:
 
@@ -442,7 +449,7 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
 
     // Mechanism instantiator helper.
     auto mech_instance = [&catalogue](const std::string& name) {
-        return catalogue->instance<backend>(name);
+        return catalogue->instance(backend::kind, name);
     };
 
     // Check for physically reasonable membrane volages?
@@ -492,7 +499,7 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
     }
 
     // Create shared cell state.
-    // (SIMD padding requires us to check each mechanism for alignment/padding constraints.)
+    // Shared state vectors should accommodate each mechanism's data alignment requests.
 
     unsigned data_alignment = util::max_value(
         util::transform_view(keys(mech_data.mechanisms),
@@ -543,7 +550,7 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
         // to convert from the mechanism current contribution units to A/mÂ².
 
         switch (config.kind) {
-        case mechanismKind::point:
+        case arb_mechanism_kind_point:
             // Point mechanism contributions are in [nA]; CV area A in [Âµm^2].
             // F = 1/A * [nA/ÂµmÂ²] / [A/mÂ²] = 1000/A.
 
@@ -564,28 +571,28 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
                 }
             }
             break;
-        case mechanismKind::density:
+        case arb_mechanism_kind_density:
             // Current density contributions from mechanism are already in [A/mÂ²].
 
             for (auto i: count_along(layout.cv)) {
                 layout.weight[i] = config.norm_area[i];
             }
             break;
-        case mechanismKind::revpot:
+        case arb_mechanism_kind_reversal_potential:
             // Mechanisms that set reversal potential should not be contributing
             // to any currents, so leave weights as zero.
             break;
         }
 
         auto minst = mech_instance(name);
-        minst.mech->instantiate(mech_id++, *state_, minst.overrides, layout);
+        state_->instantiate(*minst.mech, mech_id++, minst.overrides, layout);
         mechptr_by_name[name] = minst.mech.get();
 
         for (auto& pv: config.param_values) {
-            minst.mech->set_parameter(pv.first, pv.second);
+            state_->set_parameter(*minst.mech, pv.first, pv.second);
         }
 
-        if (config.kind==mechanismKind::revpot) {
+        if (config.kind==arb_mechanism_kind_reversal_potential) {
             revpot_mechanisms_.push_back(mechanism_ptr(minst.mech.release()));
         }
         else {
@@ -741,7 +748,7 @@ struct probe_resolution_data {
         mechanism* m = util::value_by_key(mech_instance_by_name, name).value_or(nullptr);
         if (!m) return nullptr;
 
-        const fvm_value_type* data = Backend::mechanism_field_data(m, state_var);
+        const fvm_value_type* data = state->mechanism_state_data(*m, state_var);
         if (!data) throw cable_cell_error("no state variable '"+state_var+"' in mechanism '"+name+"'");
 
         return data;
diff --git a/arbor/gpu_context.cpp b/arbor/gpu_context.cpp
index 8785ff36..273c0058 100644
--- a/arbor/gpu_context.cpp
+++ b/arbor/gpu_context.cpp
@@ -5,7 +5,7 @@
 #include "gpu_context.hpp"
 
 #ifdef ARB_HAVE_GPU
-#include <backends/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_api.hpp>
 #endif
 
 namespace arb {
diff --git a/arbor/hardware/memory.cpp b/arbor/hardware/memory.cpp
index 34b434d3..9495327b 100644
--- a/arbor/hardware/memory.cpp
+++ b/arbor/hardware/memory.cpp
@@ -7,7 +7,7 @@ extern "C" {
 #endif
 
 #ifdef ARB_HAVE_GPU
-    #include <backends/gpu/gpu_api.hpp>
+    #include <arbor/gpu/gpu_api.hpp>
 #endif
 
 namespace arb {
diff --git a/arbor/include/CMakeLists.txt b/arbor/include/CMakeLists.txt
index fde67911..82415e4f 100644
--- a/arbor/include/CMakeLists.txt
+++ b/arbor/include/CMakeLists.txt
@@ -70,6 +70,7 @@ add_dependencies(arbor-public-headers generate_version_hpp)
 
 install(DIRECTORY arbor ${CMAKE_CURRENT_BINARY_DIR}/arbor
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-    FILES_MATCHING PATTERN "*.hpp")
+    FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h" PATTERN "*.inc"
+    )
 
 install(TARGETS arbor-public-headers EXPORT arbor-targets)
diff --git a/arbor/include/arbor/arb_types.h b/arbor/include/arbor/arb_types.h
new file mode 100644
index 00000000..955fc9fd
--- /dev/null
+++ b/arbor/include/arbor/arb_types.h
@@ -0,0 +1,14 @@
+#ifndef ARB_TYPES_H
+#define ARB_TYPES_H
+
+// Define ABI arb_ typedefs.
+
+#ifdef __cplusplus
+#include <cstdint>
+#else
+#include <stdint.h>
+#endif
+
+#include <arbor/arb_types.inc>
+
+#endif // ndef ARB_TYPES_H
diff --git a/arbor/include/arbor/arb_types.hpp b/arbor/include/arbor/arb_types.hpp
new file mode 100644
index 00000000..586a6681
--- /dev/null
+++ b/arbor/include/arbor/arb_types.hpp
@@ -0,0 +1,9 @@
+#pragma once
+
+// Define ABI arb_ typedefs in arb:: namespace.
+
+#include <cstdint>
+
+namespace arb {
+#include <arbor/arb_types.inc>
+}
diff --git a/arbor/include/arbor/arb_types.inc b/arbor/include/arbor/arb_types.inc
new file mode 100644
index 00000000..0062fd5a
--- /dev/null
+++ b/arbor/include/arbor/arb_types.inc
@@ -0,0 +1,8 @@
+typedef double   arb_value_type;
+typedef float    arb_weight_type;
+typedef int      arb_index_type;
+#ifdef __cplusplus
+typedef std::uint32_t arb_size_type;
+#else
+typedef uint32_t arb_size_type;
+#endif
diff --git a/arbor/include/arbor/arbexcept.hpp b/arbor/include/arbor/arbexcept.hpp
index 2ba55753..01d4e91e 100644
--- a/arbor/include/arbor/arbexcept.hpp
+++ b/arbor/include/arbor/arbexcept.hpp
@@ -152,4 +152,16 @@ struct bad_catalogue_error: arbor_exception {
     std::string failed_call;
 };
 
+// ABI errors
+
+struct bad_alignment: arbor_exception {
+    bad_alignment(size_t);
+    size_t alignment;
+};
+
+struct unsupported_abi_error: arbor_exception {
+    unsupported_abi_error(size_t);
+    size_t version;
+};
+
 } // namespace arb
diff --git a/arbor/include/arbor/fvm_types.hpp b/arbor/include/arbor/fvm_types.hpp
index a4f21386..ea3f21f3 100644
--- a/arbor/include/arbor/fvm_types.hpp
+++ b/arbor/include/arbor/fvm_types.hpp
@@ -1,14 +1,13 @@
 #pragma once
 
+#include <arbor/arb_types.h>
 #include <arbor/common_types.hpp>
 
-// Basic types shared across FVM implementations/backends.
-
 namespace arb {
 
-using fvm_value_type = double;
-using fvm_size_type = cell_local_size_type;
-using fvm_index_type = int;
+using fvm_value_type = arb_value_type;
+using fvm_size_type  = arb_size_type;
+using fvm_index_type = arb_index_type;
 
 struct fvm_gap_junction {
     using value_type = fvm_value_type;
@@ -19,7 +18,6 @@ struct fvm_gap_junction {
 
     fvm_gap_junction() {}
     fvm_gap_junction(std::pair<index_type, index_type> l, value_type w): loc(l), weight(w) {}
-
 };
 
 } // namespace arb
diff --git a/arbor/backends/gpu/cuda_api.hpp b/arbor/include/arbor/gpu/cuda_api.hpp
similarity index 100%
rename from arbor/backends/gpu/cuda_api.hpp
rename to arbor/include/arbor/gpu/cuda_api.hpp
diff --git a/arbor/backends/gpu/gpu_api.hpp b/arbor/include/arbor/gpu/gpu_api.hpp
similarity index 100%
rename from arbor/backends/gpu/gpu_api.hpp
rename to arbor/include/arbor/gpu/gpu_api.hpp
diff --git a/arbor/backends/gpu/gpu_common.hpp b/arbor/include/arbor/gpu/gpu_common.hpp
similarity index 100%
rename from arbor/backends/gpu/gpu_common.hpp
rename to arbor/include/arbor/gpu/gpu_common.hpp
diff --git a/arbor/backends/gpu/hip_api.hpp b/arbor/include/arbor/gpu/hip_api.hpp
similarity index 100%
rename from arbor/backends/gpu/hip_api.hpp
rename to arbor/include/arbor/gpu/hip_api.hpp
diff --git a/arbor/backends/gpu/math_cu.hpp b/arbor/include/arbor/gpu/math_cu.hpp
similarity index 100%
rename from arbor/backends/gpu/math_cu.hpp
rename to arbor/include/arbor/gpu/math_cu.hpp
diff --git a/arbor/backends/gpu/reduce_by_key.hpp b/arbor/include/arbor/gpu/reduce_by_key.hpp
similarity index 100%
rename from arbor/backends/gpu/reduce_by_key.hpp
rename to arbor/include/arbor/gpu/reduce_by_key.hpp
diff --git a/arbor/include/arbor/mechanism.hpp b/arbor/include/arbor/mechanism.hpp
index 92209c11..40fab8aa 100644
--- a/arbor/include/arbor/mechanism.hpp
+++ b/arbor/include/arbor/mechanism.hpp
@@ -3,21 +3,25 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <unordered_map>
 
+#include <arbor/arbexcept.hpp>
 #include <arbor/fvm_types.hpp>
+#include <arbor/mechanism_abi.h>
 #include <arbor/mechinfo.hpp>
-#include <arbor/mechanism_ppack.hpp>
 
 namespace arb {
 
-enum class mechanismKind { point, density, revpot };
-
 class mechanism;
 using mechanism_ptr = std::unique_ptr<mechanism>;
 
-template <typename B> class concrete_mechanism;
-template <typename B>
-using concrete_mech_ptr = std::unique_ptr<concrete_mechanism<B>>;
+struct ion_state_view {
+    fvm_value_type* current_density;
+    fvm_value_type* reversal_potential;
+    fvm_value_type* internal_concentration;
+    fvm_value_type* external_concentration;
+    fvm_value_type* ionic_charge;
+};
 
 class mechanism {
 public:
@@ -25,63 +29,50 @@ public:
     using index_type = fvm_index_type;
     using size_type  = fvm_size_type;
 
+    mechanism(const arb_mechanism_type m,
+              const arb_mechanism_interface& i): mech_{m}, iface_{i} {
+        if (mech_.abi_version != ARB_MECH_ABI_VERSION) throw unsupported_abi_error{mech_.abi_version};
+    }
     mechanism() = default;
     mechanism(const mechanism&) = delete;
+    ~mechanism() = default;
 
     // Return fingerprint of mechanism dynamics source description for validation/replication.
-    virtual const mechanism_fingerprint& fingerprint() const = 0;
+    const mechanism_fingerprint fingerprint() const { return mech_.fingerprint; };
 
     // Name as given in mechanism source.
-    virtual std::string internal_name() const { return ""; }
+    std::string internal_name() const { return mech_.name; }
 
     // Density or point mechanism?
-    virtual mechanismKind kind() const = 0;
-
-    // Does the implementation require padding and alignment of shared data structures?
-    virtual unsigned data_alignment() const { return 1; }
+    arb_mechanism_kind kind() const { return mech_.kind; };
 
-    // Memory use in bytes.
-    virtual std::size_t memory() const = 0;
+    // Minimum expected alignment of allocated vectors and shared state data.
+    unsigned data_alignment() const { return iface_.alignment; }
 
-    // Width of an instance: number of CVs (density mechanism) or sites (point mechanism)
-    // that the mechanism covers.
-    virtual std::size_t size() const = 0;
-
-    // Cloning makes a new object of the derived concrete mechanism type, but does not
-    // copy any state.
-    virtual mechanism_ptr clone() const = 0;
+    // Make a new object of the mechanism type, but does not copy any state, so
+    // the result must be instantiated.
+    mechanism_ptr clone() const { return std::make_unique<mechanism>(mech_, iface_); }
 
     // Non-global parameters can be set post-instantiation:
-    virtual void set_parameter(const std::string& key, const std::vector<fvm_value_type>& values) = 0;
-
-    // Peek into state variable
-    virtual fvm_value_type* field_data(const std::string& var) = 0;
+    void set_parameter(const std::string&, const std::vector<arb_value_type>&);
 
-    // Simulation interfaces:
-    virtual void initialize() {};
-    virtual void update_state() {};
-    virtual void update_current() {};
-    virtual void deliver_events() {};
-    virtual void post_event() {};
-    virtual void update_ions() {};
-
-    virtual ~mechanism() = default;
+    // Forward to interface methods
+    void initialize()     { ppack_.vec_t = *time_ptr_ptr; iface_.init_mechanism(&ppack_); }
+    void update_current() { ppack_.vec_t = *time_ptr_ptr; iface_.compute_currents(&ppack_); }
+    void update_state()   { ppack_.vec_t = *time_ptr_ptr; iface_.advance_state(&ppack_); }
+    void update_ions()    { ppack_.vec_t = *time_ptr_ptr; iface_.write_ions(&ppack_); }
+    void post_event()     { ppack_.vec_t = *time_ptr_ptr; iface_.post_event(&ppack_); }
+    void deliver_events(arb_deliverable_event_stream& stream) { ppack_.vec_t  = *time_ptr_ptr; iface_.apply_events(&ppack_, &stream); }
 
     // Per-cell group identifier for an instantiated mechanism.
-    unsigned mechanism_id() const { return mechanism_id_; }
+    unsigned mechanism_id() const { return ppack_.mechanism_id; }
 
-protected:
-    // Per-cell group identifier for an instantiation of a mechanism; set by
-    // concrete_mechanism<B>::instantiate()
-    unsigned mechanism_id_ = -1;
+    arb_mechanism_type  mech_;
+    arb_mechanism_interface iface_;
+    arb_mechanism_ppack ppack_;
+    arb_value_type** time_ptr_ptr;
 };
 
-// Backend-specific implementations provide mechanisms that are derived from `concrete_mechanism<Backend>`,
-// likely via an intermediate class that captures common behaviour for that backend.
-//
-// `concrete_mechanism` provides the `instantiate` method, which takes the backend-specific shared state,
-// together with a layout derived from the discretization, and any global parameter overrides.
-
 struct mechanism_layout {
     // Maps in-instance index to CV index.
     std::vector<fvm_index_type> cv;
@@ -96,7 +87,7 @@ struct mechanism_layout {
 
 struct mechanism_overrides {
     // Global scalar parameters (any value down-conversion to fvm_value_type is the
-    // responsibility of the concrete mechanism).
+    // responsibility of the mechanism).
     std::unordered_map<std::string, double> globals;
 
     // Ion renaming: keys are ion dependency names as
@@ -104,106 +95,4 @@ struct mechanism_overrides {
     std::unordered_map<std::string, std::string> ion_rebind;
 };
 
-struct ion_state_view {
-    fvm_value_type* current_density;
-    fvm_value_type* reversal_potential;
-    fvm_value_type* internal_concentration;
-    fvm_value_type* external_concentration;
-    fvm_value_type* ionic_charge;
-};
-
-template <typename Backend>
-class concrete_mechanism: public mechanism {
-public:
-    using backend = Backend;
-    // Instantiation: allocate per-instance state; set views/pointers to shared data.
-    virtual void instantiate(unsigned id, typename backend::shared_state&, const mechanism_overrides&, const mechanism_layout&) = 0;
-
-    std::size_t size() const override { return width_; }
-
-    std::size_t memory() const override {
-        std::size_t s = object_sizeof();
-        s += sizeof(data_[0])    * data_.size();
-        s += sizeof(indices_[0]) * indices_.size();
-        return s;
-    }
-
-    // Delegate to derived class.
-    virtual void deliver_events() override { apply_events(event_stream_ptr_->marked_events()); }
-    virtual void update_current() override { set_time_ptr(); compute_currents(); }
-    virtual void update_state()   override { set_time_ptr(); advance_state(); }
-    virtual void update_ions()    override { set_time_ptr(); write_ions(); }
-
-protected:
-    using deliverable_event_stream = typename backend::deliverable_event_stream;
-    using iarray = typename backend::iarray;
-    using array  = typename backend::array;
-
-     void set_time_ptr() { ppack_ptr()->vec_t_ = vec_t_ptr_->data(); }
-
-    // Generated mechanism field, global and ion table lookup types.
-    // First component is name, second is pointer to corresponing member in
-    // the mechanism's parameter pack, or for field_default_table,
-    // the scalar value used to initialize the field.
-    using global_table_entry = std::pair<const char*, value_type*>;
-    using mechanism_global_table = std::vector<global_table_entry>;
-
-    using state_table_entry = std::pair<const char*, value_type**>;
-    using mechanism_state_table = std::vector<state_table_entry>;
-
-    using field_table_entry = std::pair<const char*, value_type**>;
-    using mechanism_field_table = std::vector<field_table_entry>;
-
-    using field_default_entry = std::pair<const char*, value_type>;
-    using mechanism_field_default_table = std::vector<field_default_entry>;
-
-    using ion_state_entry = std::pair<const char*, ion_state_view*>;
-    using mechanism_ion_state_table = std::vector<ion_state_entry>;
-
-    using ion_index_entry = std::pair<const char*, index_type**>;
-    using mechanism_ion_index_table = std::vector<ion_index_entry>;
-
-    // Generated mechanisms must implement the following methods
-
-    // Member tables: introspection into derived mechanism fields, views etc.
-    // Default implementations correspond to no corresponding fields/globals/ions.
-    virtual mechanism_field_table         field_table() { return {}; }
-    virtual mechanism_field_default_table field_default_table() { return {}; }
-    virtual mechanism_global_table        global_table() { return {}; }
-    virtual mechanism_state_table         state_table() { return {}; }
-    virtual mechanism_ion_state_table     ion_state_table() { return {}; }
-    virtual mechanism_ion_index_table     ion_index_table() { return {}; }
-
-    // Returns pointer to (derived) parameter-pack object that holds:
-    // * pointers to shared cell state `vec_ci_` et al.,
-    // * pointer to mechanism weights `weight_`,
-    // * pointer to mechanism node indices `node_index_`,
-    // * mechanism global scalars and pointers to mechanism range parameters.
-    // * mechanism ion_state_view objects and pointers to mechanism ion indices.
-    virtual mechanism_ppack* ppack_ptr() = 0;
-
-    // to be overridden in mechanism implemetations
-    virtual void advance_state() {};
-    virtual void compute_currents() {};
-    virtual void apply_events(typename deliverable_event_stream::state) {};
-    virtual void write_ions() {};
-    virtual void init() {};
-    // Report raw size in bytes of mechanism object.
-    virtual std::size_t object_sizeof() const = 0;
-
-    // events to be processed
-
-    // indirection for accessing time in mechanisms
-    const array* vec_t_ptr_;
-
-    deliverable_event_stream* event_stream_ptr_;
-    size_type width_ = 0;         // Instance width (number of CVs/sites)
-    size_type num_ions_ = 0;      // Ion count
-    bool mult_in_place_;          // perform multipliction in place?
-
-    // Bulk storage for index vectors and state and parameter variables.
-    iarray indices_;
-    array data_;
-};
-
 } // namespace arb
diff --git a/arbor/include/arbor/mechanism_abi.h b/arbor/include/arbor/mechanism_abi.h
new file mode 100644
index 00000000..ce85067f
--- /dev/null
+++ b/arbor/include/arbor/mechanism_abi.h
@@ -0,0 +1,203 @@
+#ifndef ARB_MECH_ABI
+#define ARB_MECH_ABI
+
+#include <arbor/arb_types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Version
+#define ARB_MECH_ABI_VERSION_MAJOR 0
+#define ARB_MECH_ABI_VERSION_MINOR 0
+#define ARB_MECH_ABI_VERSION_PATCH 1
+#define ARB_MECH_ABI_VERSION ((ARB_MECH_ABI_VERSION_MAJOR * 10000L * 10000L) + (ARB_MECH_ABI_VERSION_MAJOR * 10000L) + ARB_MECH_ABI_VERSION_PATCH)
+
+typedef const char* arb_mechanism_fingerprint;
+
+// Selectors
+typedef uint32_t arb_mechanism_kind;
+#define arb_mechanism_kind_nil 0
+#define arb_mechanism_kind_point 1
+#define arb_mechanism_kind_density 2
+#define arb_mechanism_kind_reversal_potential 3
+
+typedef uint32_t arb_backend_kind;
+#define arb_backend_kind_nil 0
+#define arb_backend_kind_cpu 1
+#define arb_backend_kind_gpu 2
+
+// Ion state variables; view into shared_state
+typedef struct arb_ion_state {
+    arb_value_type* current_density;
+    arb_value_type* reversal_potential;
+    arb_value_type* internal_concentration;
+    arb_value_type* external_concentration;
+    arb_value_type* ionic_charge;
+    arb_index_type* index;
+} arb_ion_state;
+
+// Event; consumed by `apply_event`
+typedef struct arb_deliverable_event_data {
+    arb_size_type   mech_id;       // Mechanism type identifier (per cell group).
+    arb_size_type   mech_index;    // Instance of the mechanism.
+    arb_weight_type weight;
+} arb_deliverable_event_data;
+
+/* A set of `n` streams of events, where those in the
+ * ranges (events + begin[i], events + end[i]) i = 0..n-1
+ * are meant to be consumed
+ */
+typedef struct arb_deliverable_event_stream {
+    arb_size_type                     n_streams; // Number of streams.
+    const arb_deliverable_event_data* events;    // Array of event data items.
+    const arb_index_type*             begin;     // Array of offsets to beginning of marked events.
+    const arb_index_type*             end;       // Array of offsets to end of marked events.
+}  arb_deliverable_event_stream;
+
+// Constraints for use in SIMD implementations, see there.
+typedef struct arb_constraint_partition {
+    arb_size_type   n_contiguous;
+    arb_size_type   n_constant;
+    arb_size_type   n_independent;
+    arb_size_type   n_none;
+    arb_index_type* contiguous;
+    arb_index_type* constant;
+    arb_index_type* independent;
+    arb_index_type* none;
+} arb_constraint_partition;
+
+// Parameter Pack
+typedef struct arb_mechanism_ppack {
+    arb_size_type  width;                        // Number of CVs.
+    arb_index_type  n_detectors;                 // Number of spike detectors.
+    arb_index_type* vec_ci;
+    arb_index_type* vec_di;
+    const arb_value_type* vec_t;
+    arb_value_type* vec_dt;
+    arb_value_type* vec_v;
+    arb_value_type* vec_i;
+    arb_value_type* vec_g;
+    arb_value_type* temperature_degC;
+    arb_value_type* diam_um;
+    arb_value_type* time_since_spike;
+    arb_index_type* node_index;
+    arb_index_type* multiplicity;
+    arb_value_type* weight;
+    arb_size_type   mechanism_id;
+
+    arb_deliverable_event_stream events;            // Events during the last period.
+    arb_constraint_partition     index_constraints; // Index restrictions, not initialised for all backend.
+
+    arb_value_type** parameters;                    // Array of setable parameters.    (Array)
+    arb_value_type** state_vars;                    // Array of integrable state.      (Array)
+    arb_value_type*  globals;                       // Array of global constant state. (Scalar)
+    arb_ion_state*   ion_states;                    // Array of views into shared state.
+} arb_mechanism_ppack;
+
+
+/* Mechanism Plugin
+ *
+ * Everything below has to be filled out by the plugin author/compiler.
+ * The interface methods will be called with allocated and initialised `ppack`
+ * data. The actual layout is unspecified, but all pointers are allocated and set
+ * by the library. Plugins should never allocate memory on their own.
+ */
+typedef void (*arb_mechanism_method)(arb_mechanism_ppack*); // Convenience for extension methods
+typedef void (*arb_mechanism_method_events)(arb_mechanism_ppack*, arb_deliverable_event_stream*);
+
+typedef struct arb_mechanism_interface {
+    arb_backend_kind   backend;               // GPU, CPU, ...
+    arb_size_type      partition_width;       // Width for partitioning indices, based on SIMD for example
+    arb_size_type      alignment;
+    // Interface methods; hooks called by the engine during the lifetime of the mechanism.
+    /* 1. init_mechanism
+     *   - called once during instantiation,
+     *   - setup initial state, corresponds to NMODL's INITIAL block,
+     *   - will receive an allocated and initialised ppack object
+     *     - pointers in ion_state_view are set to their associated values in shared state
+     *     - pointers to state, parameters, globals, and constants are allocated and initialised to the given defaults.
+     *     - SIMD only: index_constraint is set up
+     *     - Internal values (see above) are initialised
+     */
+    arb_mechanism_method init_mechanism;
+    /* 2. compute_currents
+     * - compute ionic currents
+     *   - pointers in `ion_state` are set to [ion_0, ion_1, ...] from the `ions` table
+     *   - currents live in `current_density`
+     * - called during each integration time step
+     *   - at the start for reversal potential mechanisms, *before* current reset
+     *   - after event deliver for anything else
+     */
+    arb_mechanism_method compute_currents;
+
+    /* 3. apply_events
+     * - consume `deliverable_events` and apply effects to internal state
+     * - `deliverable_events` is setup correctly externally, is read-only for apply events
+     * - called during each integration time step, right after resetting currents
+     */
+    arb_mechanism_method_events apply_events;
+    /* 4. advanced_state
+     * - called during each integration time step, after solving Hines matrices
+     * - perform integration on state variables, often given as an ODE
+     * - state variables live in `ppack::state_vars`
+     */
+    arb_mechanism_method advance_state;
+    /* 5. write_ions
+     * - update ionic concentrations
+     *   - pointers in `ion_state` are set to [ion_0, ion_1, ...] from the `ions` table
+     *   - variables live in `internal_concentration` and `external_concentration`
+     * - called during each integration time step, after state integration
+     */
+    arb_mechanism_method write_ions;
+    /* 6. post_event
+     * - called during each integration time step, after checking for spikes
+     * - corresponds to NET_RECEIVE in NMODL
+     */
+    arb_mechanism_method post_event;
+} arb_mechanism_interface;
+
+typedef struct arb_field_info {
+    const char* name;
+    const char* unit;
+    arb_value_type default_value;
+    arb_value_type range_low;
+    arb_value_type range_high;
+} arb_field_info;
+
+// Ion dependency
+typedef struct arb_ion_info {
+    const char* name;
+    bool write_int_concentration;
+    bool write_ext_concentration;
+    bool write_rev_potential;
+    bool read_rev_potential;
+    bool read_valence;
+    bool verify_valence;
+    int  expected_valence;
+} arb_ion_info;
+
+// Backend independent data
+typedef struct arb_mechanism_type {
+    // Metadata
+    unsigned long             abi_version;      // plugin ABI version used to build this mechanism
+    arb_mechanism_fingerprint fingerprint;      // provide a unique ID
+    const char*               name;             // provide unique name
+    arb_mechanism_kind        kind;             // Point, Density, ReversalPotential, ...
+    bool                      is_linear;        // linear, homogeneous mechanism
+    bool                      has_post_events;
+    // Tables
+    arb_field_info*           globals;          // Global constants
+    arb_size_type             n_globals;
+    arb_field_info*           state_vars;       // Integrable state
+    arb_size_type             n_state_vars;
+    arb_field_info*           parameters;       // Mechanism parameters
+    arb_size_type             n_parameters;
+    arb_ion_info*             ions;             // Ion properties
+    arb_size_type             n_ions;
+} arb_mechanism_type;
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/arbor/include/arbor/mechanism_ppack.hpp b/arbor/include/arbor/mechanism_ppack.hpp
deleted file mode 100644
index bb510a5c..00000000
--- a/arbor/include/arbor/mechanism_ppack.hpp
+++ /dev/null
@@ -1,23 +0,0 @@
-#pragma once
-
-#include <arbor/fvm_types.hpp>
-
-namespace arb {
-struct mechanism_ppack {
-    fvm_index_type width_;
-    fvm_index_type n_detectors_;
-    const fvm_index_type* vec_ci_;
-    const fvm_index_type* vec_di_;
-    const fvm_value_type* vec_t_;
-    const fvm_value_type* vec_dt_;
-    const fvm_value_type* vec_v_;
-    fvm_value_type* vec_i_;
-    fvm_value_type* vec_g_;
-    const fvm_value_type* temperature_degC_;
-    const fvm_value_type* diam_um_;
-    const fvm_value_type* time_since_spike_;
-    const fvm_index_type* node_index_;
-    const fvm_index_type* multiplicity_;
-    const fvm_value_type* weight_;
-};
-} // namespace arb
diff --git a/arbor/include/arbor/mechcat.hpp b/arbor/include/arbor/mechcat.hpp
index 32cfffb8..0db56c55 100644
--- a/arbor/include/arbor/mechcat.hpp
+++ b/arbor/include/arbor/mechcat.hpp
@@ -8,6 +8,7 @@
 
 #include <arbor/mechinfo.hpp>
 #include <arbor/mechanism.hpp>
+#include <arbor/mechanism_abi.h>
 
 // Mechanism catalogue maintains:
 //
@@ -16,12 +17,9 @@
 // 2. A further hierarchy of 'derived' mechanisms, that allow specialization of
 //    global parameters, ion bindings, and implementations.
 //
-// 3. A map taking mechanism names x back-end class -> mechanism implementation
+// 3. A map taking mechanism names x back-end kind -> mechanism implementation
 //    prototype object.
 //
-// Implementations for a backend `B` are represented by a pointer to a 
-// `concrete_mechanism<B>` object.
-//
 // References to mechanism_info and mechanism_fingerprint objects are invalidated
 // after any modification to the catalogue.
 //
@@ -80,43 +78,34 @@ public:
 
     // Clone the implementation associated with name (search derivation hierarchy starting from
     // most derived) and return together with any global overrides.
-    template <typename B>
     struct cat_instance {
-        std::unique_ptr<concrete_mechanism<B>> mech;
+        mechanism_ptr mech;
         mechanism_overrides overrides;
     };
 
-    template <typename B>
-    cat_instance<B> instance(const std::string& name) const {
-        auto mech = instance_impl(std::type_index(typeid(B)), name);
-
-        return cat_instance<B>{
-            std::unique_ptr<concrete_mechanism<B>>(dynamic_cast<concrete_mechanism<B>*>(mech.first.release())),
-            std::move(mech.second)
-        };
+    cat_instance instance(arb_backend_kind kind, const std::string& name) const {
+        auto mech = instance_impl(kind, name);
+        return { std::move(mech.first), std::move(mech.second) };
     }
 
-    // Associate a concrete (prototype) mechanism for a given back-end B with a (possibly derived)
-    // mechanism name.
-    template <typename B>
-    void register_implementation(const std::string& name, std::unique_ptr<concrete_mechanism<B>> proto) {
-        mechanism_ptr generic_proto = mechanism_ptr(proto.release());
-        register_impl(std::type_index(typeid(B)), name, std::move(generic_proto));
+    void register_implementation(const std::string& name, mechanism_ptr proto) {
+        auto be = proto->iface_.backend;
+        register_impl(be, name, std::move(proto));
     }
 
-   // Copy over another catalogue's mechanism and attach a -- possibly empty -- prefix
-   void import(const mechanism_catalogue& other, const std::string& prefix);
+    // Copy over another catalogue's mechanism and attach a -- possibly empty -- prefix
+    void import(const mechanism_catalogue& other, const std::string& prefix);
 
-   ~mechanism_catalogue();
+    ~mechanism_catalogue();
 
-   // Grab a collection of all mechanism names in the catalogue.
-   std::vector<std::string> mechanism_names() const;
+    // Grab a collection of all mechanism names in the catalogue.
+    std::vector<std::string> mechanism_names() const;
 
 private:
     std::unique_ptr<catalogue_state> state_;
 
-    std::pair<mechanism_ptr, mechanism_overrides> instance_impl(std::type_index, const std::string&) const;
-    void register_impl(std::type_index, const std::string&, mechanism_ptr);
+    std::pair<mechanism_ptr, mechanism_overrides> instance_impl(arb_backend_kind, const std::string&) const;
+    void register_impl(arb_backend_kind, const std::string&, mechanism_ptr);
 };
 
 
diff --git a/arbor/include/arbor/mechinfo.hpp b/arbor/include/arbor/mechinfo.hpp
index 45785e31..3e4ff24b 100644
--- a/arbor/include/arbor/mechinfo.hpp
+++ b/arbor/include/arbor/mechinfo.hpp
@@ -10,6 +10,8 @@
 #include <utility>
 #include <vector>
 
+#include <arbor/mechanism_abi.h>
+
 namespace arb {
 
 struct mechanism_field_spec {
@@ -51,6 +53,11 @@ struct ion_dependency {
 using mechanism_fingerprint = std::string;
 
 struct mechanism_info {
+
+    // mechanism_info is a convenient subset of the ABI mech description
+    mechanism_info(const arb_mechanism_type&);
+    mechanism_info() = default;
+
     // Global fields have one value common to an instance of a mechanism, are
     // constant in time and set at instantiation.
     std::unordered_map<std::string, mechanism_field_spec> globals;
diff --git a/arbor/include/arbor/simd/avx.hpp b/arbor/include/arbor/simd/avx.hpp
index 1dd39c9c..67fb4f4a 100644
--- a/arbor/include/arbor/simd/avx.hpp
+++ b/arbor/include/arbor/simd/avx.hpp
@@ -18,9 +18,12 @@ namespace detail {
 struct avx_int4;
 struct avx_double4;
 
+static constexpr unsigned avx_min_align = 16;
+
 template <>
 struct simd_traits<avx_int4> {
     static constexpr unsigned width = 4;
+    static constexpr unsigned min_align = avx_min_align;
     using scalar_type = std::int32_t;
     using vector_type = __m128i;
     using mask_impl = avx_int4;
@@ -29,6 +32,7 @@ struct simd_traits<avx_int4> {
 template <>
 struct simd_traits<avx_double4> {
     static constexpr unsigned width = 4;
+    static constexpr unsigned min_align = avx_min_align;
     using scalar_type = double;
     using vector_type = __m256d;
     using mask_impl = avx_double4;
@@ -694,6 +698,7 @@ struct avx2_double4;
 template <>
 struct simd_traits<avx2_int4> {
     static constexpr unsigned width = 4;
+    static constexpr unsigned min_align = avx_min_align;
     using scalar_type = std::int32_t;
     using vector_type = __m128i;
     using mask_impl = avx_int4;
@@ -702,6 +707,7 @@ struct simd_traits<avx2_int4> {
 template <>
 struct simd_traits<avx2_double4> {
     static constexpr unsigned width = 4;
+    static constexpr unsigned min_align = avx_min_align;
     using scalar_type = double;
     using vector_type = __m256d;
     using mask_impl = avx2_double4;
diff --git a/arbor/include/arbor/simd/avx512.hpp b/arbor/include/arbor/simd/avx512.hpp
index 0416d8a0..c7f10bd8 100644
--- a/arbor/include/arbor/simd/avx512.hpp
+++ b/arbor/include/arbor/simd/avx512.hpp
@@ -19,9 +19,12 @@ struct avx512_double8;
 struct avx512_int8;
 struct avx512_mask8;
 
+static constexpr unsigned avx512_min_align = 16;
+
 template <>
 struct simd_traits<avx512_mask8> {
     static constexpr unsigned width = 8;
+    static constexpr unsigned min_align = avx512_min_align;
     using scalar_type = bool;
     using vector_type = __mmask8;
     using mask_impl = avx512_mask8;
@@ -30,6 +33,7 @@ struct simd_traits<avx512_mask8> {
 template <>
 struct simd_traits<avx512_double8> {
     static constexpr unsigned width = 8;
+    static constexpr unsigned min_align = avx512_min_align;
     using scalar_type = double;
     using vector_type = __m512d;
     using mask_impl = avx512_mask8;
@@ -38,6 +42,7 @@ struct simd_traits<avx512_double8> {
 template <>
 struct simd_traits<avx512_int8> {
     static constexpr unsigned width = 8;
+    static constexpr unsigned min_align = avx512_min_align;
     using scalar_type = std::int32_t;
     using vector_type = __m512i;
     using mask_impl = avx512_mask8;
diff --git a/arbor/include/arbor/simd/generic.hpp b/arbor/include/arbor/simd/generic.hpp
index d0f2b5f8..bcbbf8af 100644
--- a/arbor/include/arbor/simd/generic.hpp
+++ b/arbor/include/arbor/simd/generic.hpp
@@ -19,6 +19,7 @@ struct simd_traits<generic<T, N>> {
     using scalar_type = T;
     using vector_type = std::array<T, N>;
     using mask_impl = generic<bool, N>;
+    static constexpr unsigned min_align = alignof(vector_type);
 };
 
 template <typename T, unsigned N>
diff --git a/arbor/include/arbor/simd/implbase.hpp b/arbor/include/arbor/simd/implbase.hpp
index 544846cd..54611088 100644
--- a/arbor/include/arbor/simd/implbase.hpp
+++ b/arbor/include/arbor/simd/implbase.hpp
@@ -75,6 +75,7 @@ namespace detail {
 template <typename I>
 struct simd_traits {
     static constexpr unsigned width = 0;
+    static constexpr unsigned min_align = 0;
     using scalar_type = void;
     using vector_type = void;
     using mask_impl = void;
@@ -90,6 +91,7 @@ struct tag {};
 template <typename I>
 struct implbase {
     constexpr static unsigned width = simd_traits<I>::width;
+    constexpr static unsigned min_align = simd_traits<I>::min_align;
     using scalar_type = typename simd_traits<I>::scalar_type;
     using vector_type = typename simd_traits<I>::vector_type;
 
diff --git a/arbor/include/arbor/simd/neon.hpp b/arbor/include/arbor/simd/neon.hpp
index 32d07012..6743452f 100644
--- a/arbor/include/arbor/simd/neon.hpp
+++ b/arbor/include/arbor/simd/neon.hpp
@@ -24,6 +24,7 @@ struct simd_traits<neon_double2> {
     using scalar_type = double;
     using vector_type = float64x2_t;
     using mask_impl = neon_double2;  // int64x2_t?
+    static constexpr unsigned min_align = alignof(vector_type);
 };
 
 template <>
@@ -32,6 +33,7 @@ struct simd_traits<neon_int2> {
     using scalar_type = int32_t;
     using vector_type = int32x2_t;
     using mask_impl = neon_int2;  // int64x2_t
+    static constexpr unsigned min_align = alignof(vector_type);
 };
 
 struct neon_int2 : implbase<neon_int2> {
diff --git a/arbor/include/arbor/simd/simd.hpp b/arbor/include/arbor/simd/simd.hpp
index 5a72bab7..9345e8ec 100644
--- a/arbor/include/arbor/simd/simd.hpp
+++ b/arbor/include/arbor/simd/simd.hpp
@@ -378,6 +378,7 @@ namespace detail {
 
     public:
         static constexpr unsigned width = simd_traits<Impl>::width;
+        static constexpr unsigned min_align = simd_traits<Impl>::min_align;
 
         template <typename Other>
         friend struct simd_impl;
@@ -983,6 +984,12 @@ inline constexpr int width(const S a = S{}) {
     return S::width;
 };
 
+template <typename S, std::enable_if_t<is_simd<S>::value, int> = 0>
+inline constexpr unsigned min_align(const S a = S{}) {
+    return S::min_align;
+};
+
+
 // Gather/scatter indexed memory specification.
 
 template <
diff --git a/arbor/include/arbor/simd/sve.hpp b/arbor/include/arbor/simd/sve.hpp
index c9f9cd70..2d301750 100644
--- a/arbor/include/arbor/simd/sve.hpp
+++ b/arbor/include/arbor/simd/sve.hpp
@@ -37,6 +37,8 @@ struct simd_traits<sve_mask> {
     using scalar_type = bool;
     using vector_type = svbool_t;
     using mask_impl = sve_mask;
+    // alignof not necessarily defined for sizeless types.
+    static constexpr unsigned min_align = alignof(scalar_type);
 };
 
 template <>
@@ -45,6 +47,8 @@ struct simd_traits<sve_double> {
     using scalar_type = double;
     using vector_type = svfloat64_t;
     using mask_impl = sve_mask;
+    // alignof not necessarily defined for sizeless types.
+    static constexpr unsigned min_align = alignof(scalar_type);
 };
 
 template <>
@@ -53,6 +57,8 @@ struct simd_traits<sve_int> {
     using scalar_type = int32_t;
     using vector_type = svint64_t;
     using mask_impl = sve_mask;
+    // alignof not necessarily defined for sizeless types.
+    static constexpr unsigned min_align = alignof(scalar_type);
 };
 
 struct sve_mask {
@@ -810,9 +816,17 @@ static int width(const svint64_t& v) {
     return svlen_s64(v);
 };
 
-template <typename S, typename std::enable_if_t<detail::is_sve<S>::value, int> = 0> 
+template <typename S, typename std::enable_if_t<detail::is_sve<S>::value, int> = 0>
+static constexpr int min_align(const S& v) {
+    return detail::simd_traits<typename detail::sve_type_to_impl<S>::type>::min_align;
+};
+
+template <typename S, typename std::enable_if_t<detail::is_sve<S>::value, int> = 0>
 static int width() { S v; return width(v); }
 
+template <typename S, typename std::enable_if_t<detail::is_sve<S>::value, int> = 0>
+static constexpr int min_align() { S v; return min_align(v); }
+
 namespace detail {
 
 template <typename I, typename V>
diff --git a/arbor/mechcat.cpp b/arbor/mechcat.cpp
index 9d94da91..0b39ffd2 100644
--- a/arbor/mechcat.cpp
+++ b/arbor/mechcat.cpp
@@ -7,12 +7,16 @@
 
 #include <dlfcn.h>
 
+#include <arbor/version.hpp>
 #include <arbor/arbexcept.hpp>
 #include <arbor/mechcat.hpp>
+#include <arbor/mechanism_abi.h>
+#include <arbor/mechanism.hpp>
 #include <arbor/util/expected.hpp>
 
 #include "util/rangeutil.hpp"
 #include "util/maputil.hpp"
+#include "util/span.hpp"
 
 /* Notes on implementation:
  *
@@ -142,7 +146,7 @@ struct catalogue_state {
         }
 
         for (const auto& name_impls: other.impl_map_) {
-            std::unordered_map<std::type_index, std::unique_ptr<mechanism>> impls;
+            std::unordered_map<arb_backend_kind, std::unique_ptr<mechanism>> impls;
             for (const auto& tidx_mptr: name_impls.second) {
                 impls[tidx_mptr.first] = tidx_mptr.second->clone();
             }
@@ -172,13 +176,13 @@ struct catalogue_state {
     }
 
     // Register concrete mechanism for a back-end type.
-    hopefully<void> register_impl(std::type_index tidx, const std::string& name, std::unique_ptr<mechanism> mech) {
+    hopefully<void> register_impl(arb_backend_kind kind, const std::string& name, std::unique_ptr<mechanism> mech) {
         if (auto fptr = fingerprint_ptr(name)) {
             if (mech->fingerprint()!=*fptr.value()) {
                 return unexpected_exception_ptr(fingerprint_mismatch(name));
             }
 
-            impl_map_[name][tidx] = std::move(mech);
+            impl_map_[name][kind] = std::move(mech);
             return {};
         }
         else {
@@ -326,7 +330,7 @@ struct catalogue_state {
         new_info->ions = std::move(new_ions);
 
         deriv.derived_info = std::move(new_info);
-        return std::move(deriv);
+        return deriv;
     }
 
     // Implicit derivation.
@@ -399,7 +403,7 @@ struct catalogue_state {
     }
 
     // Retrieve implementation for this mechanism name or closest ancestor.
-    hopefully<std::unique_ptr<mechanism>> implementation(std::type_index tidx, const std::string& name) const {
+    hopefully<std::unique_ptr<mechanism>> implementation(arb_backend_kind kind, const std::string& name) const {
         const std::string* impl_name = &name;
         hopefully<derivation> implicit_deriv;
 
@@ -413,7 +417,7 @@ struct catalogue_state {
 
         for (;;) {
             if (const auto* mech_impls = ptr_by_key(impl_map_, *impl_name)) {
-                if (auto* p = ptr_by_key(*mech_impls, tidx)) {
+                if (auto* p = ptr_by_key(*mech_impls, kind)) {
                     return p->get()->clone();
                 }
             }
@@ -498,7 +502,7 @@ struct catalogue_state {
     string_map<derivation> derived_map_;
 
     // Prototype register, keyed on mechanism name, then backend type (index).
-    string_map<std::unordered_map<std::type_index, mechanism_ptr>> impl_map_;
+    string_map<std::unordered_map<arb_backend_kind, mechanism_ptr>> impl_map_;
 };
 
 // Mechanism catalogue method implementations.
@@ -568,16 +572,12 @@ void mechanism_catalogue::remove(const std::string& name) {
     state_->remove(name);
 }
 
-void mechanism_catalogue::register_impl(std::type_index tidx, const std::string& name, std::unique_ptr<mechanism> mech) {
-    value(state_->register_impl(tidx, name, std::move(mech)));
+void mechanism_catalogue::register_impl(arb_backend_kind kind, const std::string& name, mechanism_ptr mech) {
+    value(state_->register_impl(kind, name, std::move(mech)));
 }
 
-std::pair<mechanism_ptr, mechanism_overrides> mechanism_catalogue::instance_impl(std::type_index tidx, const std::string& name) const {
-    std::pair<mechanism_ptr, mechanism_overrides> result;
-    result.first = value(state_->implementation(tidx, name));
-    result.second = value(state_->overrides(name));
-
-    return result;
+std::pair<mechanism_ptr, mechanism_overrides> mechanism_catalogue::instance_impl(arb_backend_kind kind, const std::string& name) const {
+    return {value(state_->implementation(kind, name)), value(state_->overrides(name))};
 }
 
 mechanism_catalogue::~mechanism_catalogue() = default;
diff --git a/arbor/mechinfo.cpp b/arbor/mechinfo.cpp
new file mode 100644
index 00000000..abfe6c97
--- /dev/null
+++ b/arbor/mechinfo.cpp
@@ -0,0 +1,34 @@
+#include <arbor/mechinfo.hpp>
+
+#include "util/span.hpp"
+
+namespace arb {
+mechanism_info::mechanism_info(const arb_mechanism_type& m) {
+    post_events = m.has_post_events;
+    linear      = m.is_linear;
+    fingerprint = m.fingerprint;
+    for (auto idx: util::make_span(m.n_globals)) {
+        const auto& v = m.globals[idx];
+        globals[v.name] = { mechanism_field_spec::field_kind::global, v.unit, v.default_value, v.range_low, v.range_high };
+    }
+    for (auto idx: util::make_span(m.n_parameters)) {
+        const auto& v = m.parameters[idx];
+        parameters[v.name] = { mechanism_field_spec::field_kind::parameter, v.unit, v.default_value, v.range_low, v.range_high };
+    }
+    for (auto idx: util::make_span(m.n_state_vars)) {
+        const auto& v = m.state_vars[idx];
+        state[v.name] = { mechanism_field_spec::field_kind::state, v.unit, v.default_value, v.range_low, v.range_high };
+    }
+    for (auto idx: util::make_span(m.n_ions)) {
+        const auto& v = m.ions[idx];
+        ions[v.name] = { v.write_int_concentration,
+        v.write_ext_concentration,
+        v.read_rev_potential,
+        v.write_rev_potential,
+        v.read_valence,
+        v.verify_valence,
+        v.expected_valence };
+    }
+}
+
+}
diff --git a/arbor/memory/fill.cu b/arbor/memory/fill.cu
index fc16e9cc..d444c76d 100644
--- a/arbor/memory/fill.cu
+++ b/arbor/memory/fill.cu
@@ -1,4 +1,4 @@
-#include "backends/gpu/gpu_api.hpp"
+#include <arbor/gpu/gpu_api.hpp>
 
 #include <cstdint>
 
diff --git a/arbor/memory/gpu_wrappers.cpp b/arbor/memory/gpu_wrappers.cpp
index 4e0679ac..d037f5b9 100644
--- a/arbor/memory/gpu_wrappers.cpp
+++ b/arbor/memory/gpu_wrappers.cpp
@@ -7,7 +7,7 @@
 
 #ifdef ARB_HAVE_GPU
 
-#include <backends/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_api.hpp>
 
 #define HANDLE_GPU_ERROR(error, msg)\
 throw arbor_exception("GPU memory:: "+std::string(__func__)+" "+std::string((msg))+": "+error.description());
diff --git a/arbor/util/maputil.hpp b/arbor/util/maputil.hpp
index f6b4a939..e2082d8f 100644
--- a/arbor/util/maputil.hpp
+++ b/arbor/util/maputil.hpp
@@ -113,7 +113,7 @@ namespace maputil_impl {
     }
 }
 
-// Return copy of value associated with key, wrapped in std::optional, or std::nullopty.
+// Return copy of value associated with key, wrapped in std::optional, or std::nullopt.
 
 template <typename C, typename Key, typename Eq>
 auto value_by_key(C&& c, const Key& k, Eq eq) {
diff --git a/arbor/util/range.hpp b/arbor/util/range.hpp
index 7eaddb53..3a7257b7 100644
--- a/arbor/util/range.hpp
+++ b/arbor/util/range.hpp
@@ -145,6 +145,12 @@ range<U, V> make_range(const std::pair<U, V>& iterators) {
     return range<U, V>(iterators.first, iterators.second);
 }
 
+// From pointer and length
+template <typename T>
+range<T, T> range_n(T t, size_t n) {
+    return {t, t + n};
+}
+
 // Present a possibly sentinel-terminated range as an STL-compatible sequence
 // using the sentinel_iterator adaptor.
 
diff --git a/doc/concepts/mechanisms.rst b/doc/concepts/mechanisms.rst
index 14c33b44..d98bc2d8 100644
--- a/doc/concepts/mechanisms.rst
+++ b/doc/concepts/mechanisms.rst
@@ -76,9 +76,55 @@ Two catalogues are provided that collect mechanisms associated with specific pro
 * *bbp* For models published by the Blue Brain Project (BBP).
 * *allen* For models published on the Allen Brain Atlas Database.
 
-Further catalogues can be added by extending the list of built-in catalogues in
-the arbor source tree or by compiling a dynamically loadable catalogue
-(:ref:`extending catalogues <extending-catalogues>`).
+.. _mechanisms_dynamic:
+
+Adding Catalogues to Arbor
+''''''''''''''''''''''''''
+
+.. Note::
+
+   If you are coming from NEURON this is the equivalent of ``nrnivmodl``.
+
+This will produce a catalogue loadable at runtime by calling ``load_catalogue``
+with a filename in both C++ and Python. The steps are
+
+1. Prepare a directory containing your NMODL files (.mod suffixes required)
+2. Call ``build-catalogue`` installed by arbor
+
+   .. code-block :: bash
+
+     build-catalogue <name> <path/to/nmodl>
+
+All files with the suffix ``.mod`` located in ``<path/to/nmodl>`` will be baked into
+a catalogue named ``lib<name>-catalogue.so`` and placed into your current working
+directory. Note that these files are platform-specific and should only be used
+on the combination of OS, compiler, arbor, and machine they were built with.
+
+Errors might be diagnosable by passing the ``-v`` flag.
+
+This catalogue can then be used similarly to the built-in ones
+
+   .. code-block :: python
+
+     import arbor as A
+
+     c = A.load_catalogue('bbp2-catalogue.so')
+
+     [n for n in c]
+     >> ['Ca_LVAst',
+         'Nap_Et2',
+         'NaTa_t',
+         'SKv3_1',
+         'K_Tst',
+         'Ih',
+         'SK_E2',
+         'Ca_HVA',
+         'CaDynamics_E2',
+         'Im',
+         'NaTs2_t',
+         'K_Pst']
+
+See also the demonstration in ``python/example/dynamic-catalogue.py`` for an example.
 
 Parameters
 ''''''''''
diff --git a/doc/internals/extending_catalogues.rst b/doc/internals/extending_catalogues.rst
index b9de2e88..fbdb7e91 100644
--- a/doc/internals/extending_catalogues.rst
+++ b/doc/internals/extending_catalogues.rst
@@ -1,58 +1,52 @@
 .. _extending-catalogues:
 
-Adding Catalogues to Arbor
-==========================
+Adding Built-in Catalogues to Arbor
+===================================
 
 There are two ways new mechanisms catalogues can be added to Arbor, statically
-or dynamically. None is considered to be part of the stable user-facing API at
-the moment, although the dynamic approach is aligned with our eventual goals.
+or dynamically. If you have a set of mechanisms to use with Arbor, you are in
+all likelihood interested in the former.
 
-Both require a copy of the Arbor source tree and the compiler toolchain used to
-build Arbor in addition to the installed library.
+.. warning::
 
-Static Extensions
-'''''''''''''''''
+   If you are coming from NEURON and looking for the equivalent of
+   ``nrnivmodl``, please read on :ref:`here <_mechanisms_dynamic>`.
 
-This will produce a catalogue of the same level of integration as the built-in
-catalogues (*default*, *bbp*, and *allen*). The required steps are as follows
+   Following this path is for developers rather than end-users.
+
+This requires a copy of the Arbor source tree and the compiler toolchain used to
+build Arbor in addition to the installed library. Following these steps will
+produce a catalogue of the same level of integration as the built-in catalogues
+(*default*, *bbp*, and *allen*). The required steps are as follows
 
 1. Go to the Arbor source tree.
 2. Create a new directory under *mechanisms*.
 3. Add your .mod files.
-4. Edit *mechanisms/CMakeLists.txt* to add a definition like this
+4. Edit *mechanisms/CMakeLists.txt* to add a definition like this (example from
+   *default* catalogue)
 
    .. code-block :: cmake
 
      make_catalogue(
-       NAME <catalogue-name>                             # Name of your catalogue
-       SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/<directory>" # Directory name (added above)
-       OUTPUT "<output-name>"                            # Variable name to output to
-       CXX_FLAGS_TARGET "<compiler flags>"               # Target-specific flags for C++ compiler
-       MECHS <names>)                                    # Space separated list of mechanism
-                                                         # names w/o .mod suffix.
-
-5. Add your `output-name` to the `arbor_mechanism_sources` list.
-6. Add a `global_NAME_catalogue` function in `mechcat.hpp` and `mechcat.cpp`
-7. Bind this function in `python/mechanisms.cpp`.
-
-All steps can be more or less copied from the surrounding code.
+       NAME default                                                   # Name of your catalogue
+       SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/default"                  # Directory name (added above)
+       OUTPUT "CAT_DEFAULT_SOURCES"                                   # Variable name to store C++ files into (see below)
+       MECHS exp2syn expsyn expsyn_stdp hh kamt kdrmt nax nernst pas  # Space separated list of mechanisms w/o .mod suffix.
+       PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"                      # where does 'generate_catalogue' live, do not change
+       STANDALONE FALSE                                               # build as shared object, must be OFF
+       VERBOSE OFF)                                                   # Print debug info at configuration time
 
-Dynamic Extensions
-''''''''''''''''''
+5. Add your ``output-name`` to the ``arbor_mechanism_sources`` list.
 
-This will produce a catalogue loadable at runtime by calling `load_catalogue`
-with a filename in both C++ and Python. The steps are
-
-1. Prepare a directory containing your NMODL files (.mod suffixes required)
-2. Call `build_catalogue` from the `scripts` directory
-
-   .. code-block :: bash
+   .. code-block :: cmake
 
-     build-catalogue <name> <path/to/nmodl>
+     set(arbor_mechanism_sources
+       ${CAT_BBP_SOURCES}
+       ${CAT_ALLEN_SOURCES}
+       ${CAT_DEFAULT_SOURCES}                                          # from above
+       PARENT_SCOPE)
 
-All files with the suffix `.mod` located in `<path/to/nmodl>` will be baked into
-a catalogue named `lib<name>-catalogue.so` and placed into your current working
-directory. Note that these files are platform-specific and should only be used
-on the combination of OS, compiler, arbor, and machine they were built with.
+6. Add a ``global_NAME_catalogue`` function in ``mechcat.hpp`` and ``mechcat.cpp``
+7. Bind this function in ``python/mechanisms.cpp``.
 
-See the demonstration in `python/example/dynamic-catalogue.py` for an example.
+All steps can be more or less copied from the surrounding code.
diff --git a/doc/internals/index.rst b/doc/internals/index.rst
index b6400862..cb7a8447 100644
--- a/doc/internals/index.rst
+++ b/doc/internals/index.rst
@@ -12,4 +12,4 @@ Here we document internal components of Arbor. These pages can be useful if you'
    util
    simd_api
    extending_catalogues
-
+   mechanism_abi
diff --git a/doc/internals/mechanism_abi.rst b/doc/internals/mechanism_abi.rst
new file mode 100644
index 00000000..972204b4
--- /dev/null
+++ b/doc/internals/mechanism_abi.rst
@@ -0,0 +1,310 @@
+.. _mechanism_abi:
+
+Mechanism ABI
+=============
+
+Here you will find the information needed to connect Arbor to mechanism
+implementations outside the use of NMODL and ``modcc``. This may include writing
+a custom compiler targetting Arbor, or directly implementing mechanisms in a
+C-compatible language. Needless to say that this is aimed at developers rather
+than users.
+
+The Arbor library is isolated from these implementations through an Application
+Binary Interface (ABI) or plugin interface. Information is provided by the ABI
+implementor via two core types.
+
+All functionality is offered via a single C header file in the Arbor include
+directory, ``mechanism_abi.h``. The central datatypes here are
+``arb_mechanism_type`` and ``arb_mechanism_interface``, laying out the metadata
+and backend implementations respectively. A single ``arb_mechanism_type``
+instance may be used by multiple ``arb_mechanism_interface`` instances.
+
+Note that ``mechanism_abi.h`` is heavily commented and might be useful as
+documentation in its own right.
+
+Metadata: ``arb_mechanism_type``
+--------------------------------
+
+This type collects all information independent of the backend.
+
+  .. code:: c
+
+   typedef struct {
+     // Metadata
+     unsigned long             abi_version;       // mechanism was built using this ABI,
+                                                  // should be ARB_MECH_ABI_VERSION
+     arb_mechanism_fingerprint fingerprint;       // unique ID, currently ignored
+     const char*               name;              // (catalogue-level) unique name
+     arb_mechanism_kind        kind;              // one of: point, density, reversal_potential
+     bool                      is_linear;         // synapses only: if the state G is governed by dG/dt = f(v, G, M(t)), where:
+                                                  //    M(t) =Î£ wáµ¢Î´áµ¢(t) weighted incoming events,
+                                                  // then f is linear in G and M. If true, mechanisms must adhere to this contract.
+                                                  // Ignored for everything else.
+     bool                      has_post_events;   // implements post_event hook
+     // Tables
+     arb_field_info*           globals;
+     arb_size_type             n_globals;
+     arb_field_info*           state_vars;
+     arb_size_type             n_state_vars;
+     arb_field_info*           parameters;
+     arb_size_type             n_parameters;
+     arb_ion_info*             ions;
+     arb_size_type             n_ions;
+   } arb_mechanism_type;
+
+Tables
+''''''
+
+All tables are given as an integer size and an array. Currently we have two
+kinds of tables, which are fairly self-explanatory. Note that these are not
+connected to the actual storage layout, in particular, no memory management is
+allowed inside mechanisms.
+
+First, parameters, state variables, and global constants
+
+  .. code:: c
+
+    typedef struct {
+        const char* name;               // Field name, can be used from the library to query/set field values.
+        const char* unit;               // Physical units, just for introspection, not checked
+        arb_value_type default_value;   // values will be initialised to this value
+        arb_value_type range_low;       // valid range, lower bound, will be enforced
+        arb_value_type range_high;      // valid range, upper bound, will be enforced
+    } arb_field_info;
+
+Second ion dependencies
+
+  .. code:: c
+
+    typedef struct {
+        const char* name;              // Ion name, eg Ca, K, ...
+        bool write_int_concentration;  // writes Xi?
+        bool write_ext_concentration;  // writes Xo?
+        bool write_rev_potential;      // writes Er?
+        bool read_rev_potential;       // uses Er?
+        bool read_valence;             // Uses valence?
+        bool verify_valence;           // Checks valence?
+        int  expected_valence;         // Expected value
+    } arb_ion_info;
+
+Interlude: Parameter packs
+--------------------------
+
+In order to explain the interface type, we have to digress first and introduce
+the type ``arb_mechanism_ppack``. This record is used to pass all information to
+and from the interface methods.
+
+Objects of this type are always created and allocated by the library and passed
+fully formed to the interface. In particular, at this point
+
+- Global data values are initialised
+- pointers in ``ion_state_view`` are set to their associated values in shared
+  state on the library side
+- pointers to state, parameters, globals, and constants are allocated and
+  initialised to the given defaults.
+- SIMD only: ``index_constraint`` is set up
+
+  .. code:: c
+
+    typedef struct {
+        // Global data
+        arb_index_type width;                           // Number of CVs of this mechanism, size of arrays
+        arb_index_type n_detectors;                     // Number of spike detectors
+        arb_index_type* vec_ci;                         // [Array] Map CV to cell
+        arb_index_type* vec_di;                         // [Array] Map
+        const arb_value_type* vec_t;                    // [Array] time value
+        arb_value_type* vec_dt;                         // [Array] time step
+        arb_value_type* vec_v;                          // [Array] potential
+        arb_value_type* vec_i;                          // [Array] current
+        arb_value_type* vec_g;                          // [Array] conductance
+        arb_value_type* temperature_degC;               // [Array] Temperature in celsius
+        arb_value_type* diam_um;                        // [Array] CV diameter
+        arb_value_type* time_since_spike;               // Times since last spike; one entry per cell and detector.
+        arb_index_type* node_index;                     // Indices of CVs covered by this mechanism, size is width
+        arb_index_type* multiplicity;                   // [Unused]
+        arb_value_type* weight;                         // [Array] Weight
+        arb_size_type mechanism_id;                     // Unique ID for this mechanism on this cell group
+        arb_deliverable_event_stream events;            // Events during the last period
+        arb_constraint_partition     index_constraints; // Index restrictions, not initialised for all backends.
+        // User data
+        arb_value_type** parameters;                    // [Array] setable parameters
+        arb_value_type** state_vars;                    // [Array] integrable state
+        arb_value_type*  globals;                       // global constant state
+        arb_ion_state*   ion_states;                    // [Array] views into shared state
+    } arb_mechanism_ppack;
+
+Members tagged as ``[Array]`` represent one value per CV. To access the values
+belonging to your mechanism, a level of indirection via ``node_index`` is
+needed.
+
+Example: Let's assume mechanism ``hh`` is defined on two regions: ``R``
+comprising CVs ``0`` and ``1``, ``R'`` with a single CV ``9``. Then ``node_index
+= [0, 1, 9]`` and ``width = 3``. Arrays like ``vec_v`` will be of size ``3`` as
+well. To access the CVs' diameters, one would write
+
+  .. code:: c++
+
+    for (auto cv = 0; cv < ppack.width; ++cv) {
+        auto idx = node_index[cv];
+        auto d   = ppack_um[idx];
+    }
+
+Note that values in ``ppack.diam_um`` cover _all_ CV's regardless whether they
+are covered by the current mechanisms. Reading those values (or worse writing to
+them) is considered undefined behaviour. The same holds for all other fields of
+``ppack``.
+
+User Data
+'''''''''
+
+This section is derived from the tables passed in via the metadata struct, see
+above. One entry per relevant table entry is provided in the same order. So, if
+
+  .. code:: c
+
+    arb_field_info globals[] = { arb_field_info { .name="A",
+                                                  .unit="lb ft / s",
+                                                  .default_value=42.0,
+                                                  .range_low=0,
+                                                  .range_high=123 },
+                                 arb_field_info { .name="B",
+                                                  .unit="kg m / s",
+                                                  .default_value=42.0,
+                                                  .range_low=0,
+                                                  .range_high=123 }};
+    arb_mechanism_type m = { .n_globals=2, .globals=globals };
+
+the ``globals`` field of the corresponding parameter pack would have two
+entries, the first corresponding to ``A`` and initialised to 42.0 and the second
+for ``B`` set to 42.0.
+
+The evolution of the state variables is left to the implementation via
+``integrate_state``, while ``globals`` and ``parameters`` are considered
+read-only. The ion states internal concentration ``Xi``, external concentration
+``Xo``, trans-membrane current ``iX`` may also be read and written. Note that
+concurrent updates by multiple mechanisms might occur in any order and each
+mechanism will only observe the initial values at the time step boundary. All
+contribution by mechanisms are summed up into a final value. Further note that
+accessing these values without declaring this via a relevant ``arb_ion_info`` in
+the ``arb_mechanism_type`` is undefined behaviour. Parameter packs are specific
+to a backend.
+
+Implementation: ``arb_mechanism_interface``
+-------------------------------------------
+
+The interface methods will be called with allocated and initialised ``ppack``
+data. The actual layout is unspecified, but all pointers are allocated and set
+by the library. Plugins should never allocate memory on their own.
+
+  .. code:: C
+
+    typedef void (*arb_mechanism_method)(arb_mechanism_ppack*);
+
+This is the type of all interface methods. These are collected in the record
+below with some metadata about the backend.
+
+  .. code:: C
+
+    typedef struct {
+      arb_backend_kind   backend;               // one of cpu, gpu
+      arb_size_type      partition_width;       // granularity for this backed, eg SIMD lanes
+      // Interface methods; see below
+      arb_mechanism_method init_mechanism;
+      arb_mechanism_method compute_currents;
+      arb_mechanism_method apply_events;
+      arb_mechanism_method advance_state;
+      arb_mechanism_method write_ions;
+      arb_mechanism_method post_event;
+    } arb_mechanism_interface;
+
+
+``init_mechanism``
+''''''''''''''''''
+- called once during instantiation,
+- setup initial state, corresponds to NMODL's INITIAL block,
+- will receive an allocated and initialised ppack object
+
+``compute_currents``
+''''''''''''''''''''
+
+- compute ionic currents and set them through pointers in `ion_state`, currents
+  live in `current_density`
+- called during each integration time step
+  - at the start for reversal potential mechanisms, *before* current reset
+  - after event deliver for anything else
+
+``apply_events``
+''''''''''''''''
+
+This method is expected to consume a set of `arb_deliverable_events` and apply
+effects to internal state, found in ``ppack.events`` which is of type
+``arb_deliverable_event_stream``.
+
+  .. code:: c
+
+     typedef struct {
+         arb_size_type   mech_id;       // mechanism type identifier (per cell group).
+         arb_size_type   mech_index;    // instance of the mechanism
+         arb_float_type  weight;        // connection weight
+     } arb_deliverable_event;
+
+     typedef struct {
+         arb_size_type                n_streams; // number of streams
+         const arb_deliverable_event* events;    // array of event data items
+         const arb_index_type*        begin;     // array of offsets to beginning of marked events
+         const arb_index_type*        end;       // array of offsets to end of marked events
+     }  arb_deliverable_event_stream;
+
+These structures are set up correctly externally, but are only valid during this call.
+The data is read-only for ``apply_events``.
+
+- called during each integration time step, right after resetting currents
+- corresponding to ``NET_RECEIVE``
+
+``advanced_state``
+''''''''''''''''''
+
+- called during each integration time step, after solving Hines matrices
+- perform integration on state variables
+- state variables live in `state_vars`, with a layout described above
+
+``write_ions``
+''''''''''''''
+
+- update ionic concentrations via the pointers in `ion_state`
+- called during each integration time step, after state integration
+
+``post_event``
+''''''''''''''
+
+- used to implement spike time dependent plasticity
+- consumes ``ppack.time_since_spike``
+- called during each integration time step, after checking for spikes
+- if implementing this, also set ``has_post_event=true`` in the metadata
+
+SIMDization
+-----------
+
+If a mechanism interface processes arrays in SIMD bundles, it needs to set
+``partition_width`` to that bundle's width in units of ``arb_value_type``. The
+library will set up ``arb_constraint_partition index_constraint`` in the
+parameter pack. This structure describe which bundles can be loaded/stored as a
+contiguous block, which ones must be gathered/scattered, which are to be
+broadcast from a constant, and so on. The reason for this is the indirection via
+``node_index`` mentioned before. Please refer to the documentation of our SIMD
+interface layer for more information.
+
+Making A Loadable Mechanism
+---------------------------
+
+Mechanisms interface with the library by providing three functions, one
+returning the metadata portion, and one for each implemented backend (currently
+two). The latter may return a NULL pointer, indicating that this backend is not
+supported. The naming scheme is shown in the example below
+
+  .. code:: C
+
+    arb_mechanism_type make_arb_default_catalogue_pas();
+
+    arb_mechanism_interface* make_arb_default_catalogue_pas_interface_multicore();
+    arb_mechanism_interface* make_arb_default_catalogue_pas_interface_gpu();
diff --git a/ext/fmt b/ext/fmt
new file mode 160000
index 00000000..9cb347b4
--- /dev/null
+++ b/ext/fmt
@@ -0,0 +1 @@
+Subproject commit 9cb347b4b2e80fc9fbf57b8621746663c3f870f6
diff --git a/mechanisms/BuildModules.cmake b/mechanisms/BuildModules.cmake
index aa751769..38f466f0 100644
--- a/mechanisms/BuildModules.cmake
+++ b/mechanisms/BuildModules.cmake
@@ -58,7 +58,7 @@ function(build_modules)
 endfunction()
 
 function("make_catalogue")
-  cmake_parse_arguments(MK_CAT "" "NAME;SOURCES;OUTPUT;ARBOR;STANDALONE;VERBOSE" "CXX_FLAGS_TARGET;MECHS" ${ARGN})
+  cmake_parse_arguments(MK_CAT "" "NAME;SOURCES;OUTPUT;PREFIX;STANDALONE;VERBOSE" "CXX_FLAGS_TARGET;MECHS" ${ARGN})
   set(MK_CAT_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated/${MK_CAT_NAME}")
 
   # Need to set ARB_WITH_EXTERNAL_MODCC *and* modcc
@@ -72,10 +72,10 @@ function("make_catalogue")
     message("Catalogue mechanisms: ${MK_CAT_MECHS}")
     message("Catalogue sources:    ${MK_CAT_SOURCES}")
     message("Catalogue output:     ${MK_CAT_OUT_DIR}")
-    message("Arbor source tree:    ${MK_CAT_ARBOR}")
     message("Build as standalone:  ${MK_CAT_STANDALONE}")
     message("Arbor cxx flags:      ${MK_CAT_CXX_FLAGS_TARGET}")
     message("Arbor cxx compiler:   ${ARB_CXX}")
+    message("Script prefix:        ${MK_CAT_PREFIX}")
     message("Current cxx compiler: ${CMAKE_CXX_COMPILER}")
   endif()
 
@@ -101,10 +101,10 @@ function("make_catalogue")
   endif()
 
   add_custom_command(
-    OUTPUT ${catalogue_${MK_CAT_NAME}_source}
-    COMMAND ${MK_CAT_ARBOR}/mechanisms/generate_catalogue ${catalogue_${MK_CAT_NAME}_options} ${MK_CAT_MECHS}
+    OUTPUT  ${catalogue_${MK_CAT_NAME}_source}
+    COMMAND ${MK_CAT_PREFIX}/generate_catalogue ${catalogue_${MK_CAT_NAME}_options} ${MK_CAT_MECHS}
     COMMENT "Building catalogue ${MK_CAT_NAME}"
-    DEPENDS ${MK_CAT_ARBOR}/mechanisms/generate_catalogue)
+    DEPENDS ${MK_CAT_PREFIX}/generate_catalogue)
 
   add_custom_target(${MK_CAT_NAME}_catalogue_cpp_target DEPENDS ${catalogue_${MK_CAT_NAME}_source})
   add_dependencies(build_catalogue_${MK_CAT_NAME}_mods ${MK_CAT_NAME}_catalogue_cpp_target)
diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt
index 67e5f9cc..f696245d 100644
--- a/mechanisms/CMakeLists.txt
+++ b/mechanisms/CMakeLists.txt
@@ -6,8 +6,8 @@ make_catalogue(
   SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/bbp"
   OUTPUT "CAT_BBP_SOURCES"
   MECHS CaDynamics_E2 Ca_HVA Ca_LVAst Ih Im K_Pst K_Tst Nap_Et2 NaTa_t NaTs2_t SK_E2 SKv3_1
+  PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"
   CXX_FLAGS_TARGET "${ARB_CXX_FLAGS_TARGET_FULL}"
-  ARBOR "${PROJECT_SOURCE_DIR}"
   STANDALONE FALSE
   VERBOSE ${ARB_CAT_VERBOSE})
 
@@ -16,8 +16,8 @@ make_catalogue(
   SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/allen"
   OUTPUT "CAT_ALLEN_SOURCES"
   MECHS CaDynamics Ca_HVA Ca_LVA Ih Im Im_v2 K_P K_T Kd Kv2like Kv3_1 NaTa NaTs NaV Nap SK
+  PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"
   CXX_FLAGS_TARGET "${ARB_CXX_FLAGS_TARGET_FULL}"
-  ARBOR "${PROJECT_SOURCE_DIR}"
   STANDALONE FALSE
   VERBOSE ${ARB_CAT_VERBOSE})
 
@@ -26,8 +26,8 @@ make_catalogue(
   SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/default"
   OUTPUT "CAT_DEFAULT_SOURCES"
   MECHS exp2syn expsyn expsyn_stdp hh kamt kdrmt nax nernst pas
+  PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"
   CXX_FLAGS_TARGET "${ARB_CXX_FLAGS_TARGET_FULL}"
-  ARBOR "${PROJECT_SOURCE_DIR}"
   STANDALONE FALSE
   VERBOSE ${ARB_CAT_VERBOSE})
 
diff --git a/mechanisms/generate_catalogue b/mechanisms/generate_catalogue
index 8f4b46bb..b0491e5e 100755
--- a/mechanisms/generate_catalogue
+++ b/mechanisms/generate_catalogue
@@ -97,9 +97,10 @@ r'''// Automatically generated by:
 // $cmdline
 
 #include <${arbpfx}mechcat.hpp>
+#include <${arbpfx}mechanism.hpp>
+#include <${arbpfx}mechanism_abi.h>
 $backend_includes
 $module_includes
-$using_namespace
 
 namespace arb {
 
@@ -138,18 +139,13 @@ extern "C" {
         cmdline=" ".join(sys.argv),
         arbpfx=arbpfx,
         catalogue=catalogue,
-        using_namespace = indent(0,
-            ['using namespace {};'.format(n) for n in namespaces]),
-        backend_includes = indent(0,
-            # ['#include <{}backends/{}/fvm.hpp>'.format(arbpfx, b) for b in backends]),
-            ['#include "backends/{}/fvm.hpp"'.format(b) for b in backends]),
+        backend_includes = indent(0, []),
         module_includes = indent(0,
             ['#include "{}{}.hpp"'.format(modpfx, m) for m in modules]),
         add_modules = indent(4,
-            ['cat.add("{0}", mechanism_{0}_info());'.format(m) for m in modules]),
+            [f'cat.add("{mod}", make_arb_{catalogue}_catalogue_{mod}());' for mod in modules]),
         register_modules = indent(4,
-            ['cat.register_implementation("{0}", make_mechanism_{0}<{1}::backend>());'.format(m, b)
-             for m in modules for b in backends])
+            [f'cat.register_implementation("{mod}", std::make_unique<mechanism>(make_arb_{catalogue}_catalogue_{mod}(), *make_arb_{catalogue}_catalogue_{mod}_interface_{be}()));' for mod in modules for be in backends])
         ))
 
 
diff --git a/modcc/CMakeLists.txt b/modcc/CMakeLists.txt
index 67ee89c8..bb2692e7 100644
--- a/modcc/CMakeLists.txt
+++ b/modcc/CMakeLists.txt
@@ -31,11 +31,19 @@ set(modcc_sources modcc.cpp)
 
 add_library(libmodcc STATIC ${libmodcc_sources})
 target_include_directories(libmodcc PUBLIC .)
+target_include_directories(libmodcc PRIVATE ../ext/fmt/include)
+
 set_target_properties(libmodcc PROPERTIES OUTPUT_NAME modcc)
 
-add_executable(modcc ${modcc_sources})
-target_link_libraries(modcc libmodcc ext-tinyopt)
 
+add_executable(modcc ${modcc_sources})
+target_link_libraries(modcc PRIVATE libmodcc ext-tinyopt)
+if (ARB_USE_BUNDLED_FMT)
+    target_include_directories(modcc PRIVATE ../ext/fmt/include)
+else ()
+    find_package(fmt)
+    target_link_libraries(modcc PRIVATE fmt::fmt-header-only)
+endif ()
 set_target_properties(modcc libmodcc PROPERTIES EXCLUDE_FROM_ALL ${ARB_WITH_EXTERNAL_MODCC})
 
 if (NOT ARB_WITH_EXTERNAL_MODCC)
diff --git a/modcc/expression.hpp b/modcc/expression.hpp
index fc7c3dac..e1eef14e 100644
--- a/modcc/expression.hpp
+++ b/modcc/expression.hpp
@@ -235,7 +235,7 @@ public :
     virtual VariableExpression*   is_variable()          {return nullptr;}
     virtual ProcedureExpression*  is_procedure()         {return nullptr;}
     virtual NetReceiveExpression* is_net_receive()       {return nullptr;}
-    virtual PostEventExpression*  is_post_event()       {return nullptr;}
+    virtual PostEventExpression*  is_post_event()        {return nullptr;}
     virtual APIMethod*            is_api_method()        {return nullptr;}
     virtual IndexedVariable*      is_indexed_variable()  {return nullptr;}
     virtual LocalVariable*        is_local_variable()    {return nullptr;}
diff --git a/modcc/modcc.cpp b/modcc/modcc.cpp
index a9bceb1c..4cb4cb3a 100644
--- a/modcc/modcc.cpp
+++ b/modcc/modcc.cpp
@@ -240,7 +240,10 @@ int main(int argc, char **argv) {
         // If no output prefix given, use the module name.
         std::string prefix = opt.outprefix.empty()? m.module_name(): opt.outprefix;
 
-        io::write_all(build_info_header(m, popt), prefix+".hpp");
+        bool have_cpu = opt.targets.find(targetKind::cpu) != opt.targets.end();
+        bool have_gpu = opt.targets.find(targetKind::gpu) != opt.targets.end();
+
+        io::write_all(build_info_header(m, popt, have_cpu, have_gpu), prefix+".hpp");
         for (targetKind target: opt.targets) {
             std::string outfile = prefix;
             switch (target) {
diff --git a/modcc/printer/cexpr_emit.cpp b/modcc/printer/cexpr_emit.cpp
index fdc02657..d1366a46 100644
--- a/modcc/printer/cexpr_emit.cpp
+++ b/modcc/printer/cexpr_emit.cpp
@@ -3,6 +3,7 @@
 #include <ostream>
 #include <unordered_map>
 
+#include "printerutil.hpp"
 #include "cexpr_emit.hpp"
 #include "error.hpp"
 #include "lexer.hpp"
@@ -226,7 +227,7 @@ std::string id_prefix(IdentifierExpression* id) {
         if (auto symbol = id->symbol()->is_symbol()) {
             if (auto var = symbol->is_variable()) {
                 if (!var->is_local_variable()) {
-                    return "pp->"+id->name();
+                    return pp_var_pfx + id->name();
                 }
             }
         }
@@ -374,6 +375,7 @@ void SimdExprEmitter::visit(CallExpression* e) {
 }
 
 void SimdExprEmitter::visit(AssignmentExpression* e) {
+    ENTER(out_);
     if (!e->lhs() || !e->lhs()->is_identifier() || !e->lhs()->is_identifier()->symbol()) {
         throw compiler_exception("Expect symbol on lhs of assignment: "+e->to_string());
     }
@@ -383,7 +385,6 @@ void SimdExprEmitter::visit(AssignmentExpression* e) {
 
     auto lhs_pfxd = id_prefix(e->lhs()->is_identifier());
 
-
     if (lhs->is_variable() && lhs->is_variable()->is_range()) {
         if (!input_mask_.empty()) {
             mask = "S::logical_and(" + mask + ", " + input_mask_ + ")";
@@ -408,6 +409,7 @@ void SimdExprEmitter::visit(AssignmentExpression* e) {
         out_ << ") = ";
         e->rhs()->accept(this);
     }
+    EXIT(out_);
 }
 
 void SimdExprEmitter::visit(IfExpression* e) {
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index b2a3c5f1..22137760 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -13,6 +13,11 @@
 #include "printer/printerutil.hpp"
 #include "printer/marks.hpp"
 
+#define FMT_HEADER_ONLY YES
+#include <fmt/core.h>
+#include <fmt/format.h>
+#include <fmt/compile.h>
+
 using io::indent;
 using io::popindent;
 using io::quote;
@@ -25,9 +30,16 @@ constexpr bool with_profiling() {
 #endif
 }
 
-inline static std::string make_cpu_class_name(const std::string& module_name) { return std::string{"mechanism_cpu_"} + module_name; }
+static std::string ion_field(const IonDep& ion) { return fmt::format("ion_{}",       ion.name); }
+static std::string ion_index(const IonDep& ion) { return fmt::format("ion_{}_index", ion.name); }
 
-inline static std::string make_cpu_ppack_name(const std::string& module_name) { return make_cpu_class_name(module_name) + std::string{"_pp_"}; }
+static std::string scaled(double coeff) {
+    std::stringstream ss;
+    if (coeff != 1) {
+        ss << as_c_double(coeff) << '*';
+    }
+    return ss.str();
+}
 
 struct index_prop {
     std::string source_var; // array holding the indices
@@ -41,10 +53,8 @@ struct index_prop {
 void emit_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
 void emit_simd_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
 void emit_masked_simd_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
-
-void emit_api_body(std::ostream&, APIMethod*, bool cv_loop = true);
+void emit_api_body(std::ostream&, APIMethod*, bool cv_loop = true, bool ppack_iface=true);
 void emit_simd_api_body(std::ostream&, APIMethod*, const std::vector<VariableExpression*>& scalars);
-
 void emit_simd_index_initialize(std::ostream& out, const std::list<index_prop>& indices, simd_expr_constraint constraint);
 
 void emit_simd_body_for_loop(std::ostream& out,
@@ -69,6 +79,14 @@ struct cprint {
     }
 };
 
+std::string do_cprint(Expression* cp, int ind) {
+    std::stringstream ss;
+    for (auto i = 0; i < ind; ++i) ss << indent;
+    ss << cprint(cp);
+    for (auto i = 0; i < ind; ++i) ss << popindent;
+    return ss.str();
+}
+
 struct simdprint {
     Expression* expr_;
     bool is_indirect_ = false;
@@ -99,19 +117,10 @@ struct simdprint {
     }
 };
 
-static std::string ion_state_field(std::string ion_name) {
-    return "ion_"+ion_name+"_";
-}
-
-static std::string ion_state_index(std::string ion_name) {
-    return "ion_"+ion_name+"_index_";
-}
-
 std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     auto name           = module_.module_name();
-    auto class_name     = make_cpu_class_name(name);
-    auto namespace_name = "kernel_" + class_name;
-    auto ppack_name     = make_cpu_ppack_name(name);
+    auto namespace_name = "kernel_" + name;
+    auto ppack_name     = "arb_mechanism_ppack";
     auto ns_components  = namespace_components(opt.cpp_namespace);
 
     APIMethod* net_receive_api = find_api_method(module_, "net_rec_api");
@@ -124,7 +133,7 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     bool with_simd = opt.simd.abi!=simd_spec::none;
 
     options_trace_codegen = opt.trace_codegen;
-    
+
     // init_api, state_api, current_api methods are mandatory:
 
     assert_has_scope(init_api, "init");
@@ -165,7 +174,7 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         "#include <cmath>\n"
         "#include <cstddef>\n"
         "#include <memory>\n"
-        "#include <" << arb_private_header_prefix() << "backends/multicore/mechanism.hpp>\n"
+        "#include <"  << arb_header_prefix() << "mechanism_abi.h>\n"
         "#include <" << arb_header_prefix() << "math.hpp>\n";
 
     opt.profile &&
@@ -177,11 +186,10 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         out << "#include <cassert>\n";
     }
 
-    out <<
-        "\n" << namespace_declaration_open(ns_components) <<
-        "\n"
-        "using backend = ::arb::multicore::backend;\n"
-        "using base = ::arb::multicore::mechanism;\n"
+    out <<"\n"
+        << namespace_declaration_open(ns_components)
+        << "namespace " << namespace_name << " {\n"
+        << "\n"
         "using ::arb::math::exprelr;\n"
         "using ::arb::math::safeinv;\n"
         "using ::std::abs;\n"
@@ -204,7 +212,7 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
 
         out << "static constexpr unsigned vector_length_ = ";
         if (opt.simd.size == no_size) {
-            out << "S::simd_abi::native_width<::arb::fvm_value_type>::value;\n";
+            out << "S::simd_abi::native_width<arb_value_type>::value;\n";
         } else {
             out << opt.simd.size << ";\n";
         }
@@ -229,9 +237,10 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         }
 
         out <<
-            "using simd_value = S::simd<::arb::fvm_value_type, vector_length_, " << abi << ">;\n"
-            "using simd_index = S::simd<::arb::fvm_index_type, vector_length_, " << abi << ">;\n"
-            "using simd_mask  = S::simd_mask<::arb::fvm_value_type, vector_length_, "<< abi << ">;\n"
+            "using simd_value = S::simd<arb_value_type, vector_length_, " << abi << ">;\n"
+            "using simd_index = S::simd<arb_index_type, vector_length_, " << abi << ">;\n"
+            "using simd_mask  = S::simd_mask<arb_value_type, vector_length_, "<< abi << ">;\n"
+            "static constexpr unsigned min_align_ = std::max(S::min_align(simd_value{}), S::min_align(simd_index{}));\n"
             "\n"
             "inline simd_value safeinv(simd_value x) {\n"
             "    simd_value ones = simd_cast<simd_value>(1.0);\n"
@@ -240,21 +249,11 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
             "    return S::div(ones, x);\n"
             "}\n"
             "\n";
+    } else {
+       out << "static constexpr unsigned simd_width_ = 1;\n"
+              "static constexpr unsigned min_align_ = std::max(alignof(arb_value_type), alignof(arb_index_type));\n\n";
     }
 
-    out << "struct " << ppack_name << ": public ::arb::multicore::mechanism_ppack {\n" << indent;
-    for (const auto& scalar: vars.scalars) {
-        out << "::arb::fvm_value_type " << scalar->name() <<  " = " << as_c_double(scalar->value()) << ";\n";
-    }
-    for (const auto& array: vars.arrays) {
-        out << "::arb::fvm_value_type* " << array->name() << ";\n";
-    }
-    for (const auto& dep: ion_deps) {
-        out << "::arb::ion_state_view " << ion_state_field(dep.name) << ";\n";
-        out << "::arb::fvm_index_type* " << ion_state_index(dep.name) << ";\n";
-    }
-    out << popindent << "};\n\n";
-
     // Make implementations
     auto emit_body = [&](APIMethod *p) {
         if (with_simd) {
@@ -264,7 +263,51 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         }
     };
 
-    out << "namespace " << namespace_name << " {\n";
+    out << fmt::format(FMT_COMPILE("#define PPACK_IFACE_BLOCK \\\n"
+                                   "[[maybe_unused]] auto  {0}width             = pp->width;\\\n"
+                                   "[[maybe_unused]] auto  {0}n_detectors       = pp->n_detectors;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_ci            = pp->vec_ci;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_di            = pp->vec_di;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_t             = pp->vec_t;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_dt            = pp->vec_dt;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_v             = pp->vec_v;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_i             = pp->vec_i;\\\n"
+                                   "[[maybe_unused]] auto* {0}vec_g             = pp->vec_g;\\\n"
+                                   "[[maybe_unused]] auto* {0}temperature_degC  = pp->temperature_degC;\\\n"
+                                   "[[maybe_unused]] auto* {0}diam_um           = pp->diam_um;\\\n"
+                                   "[[maybe_unused]] auto* {0}time_since_spike  = pp->time_since_spike;\\\n"
+                                   "[[maybe_unused]] auto* {0}node_index        = pp->node_index;\\\n"
+                                   "[[maybe_unused]] auto* {0}multiplicity      = pp->multiplicity;\\\n"
+                                   "[[maybe_unused]] auto* {0}weight            = pp->weight;\\\n"
+                                   "[[maybe_unused]] auto& {0}events            = pp->events;\\\n"
+                                   "[[maybe_unused]] auto& {0}mechanism_id      = pp->mechanism_id;\\\n"
+                                   "[[maybe_unused]] auto& {0}index_constraints = pp->index_constraints;\\\n"),
+                       pp_var_pfx);
+    auto global = 0;
+    for (const auto& scalar: vars.scalars) {
+        out << fmt::format("[[maybe_unused]] auto {}{} = pp->globals[{}];\\\n", pp_var_pfx, scalar->name(), global);
+        global++;
+    }
+    auto param = 0, state = 0;
+    for (const auto& array: vars.arrays) {
+        if (array->is_state()) {
+            out << fmt::format("[[maybe_unused]] auto* {}{} = pp->state_vars[{}];\\\n", pp_var_pfx, array->name(), state);
+            state++;
+        }
+    }
+    for (const auto& array: vars.arrays) {
+        if (!array->is_state()) {
+            out << fmt::format("[[maybe_unused]] auto* {}{} = pp->parameters[{}];\\\n", pp_var_pfx, array->name(), param);
+            param++;
+        }
+    }
+    auto idx = 0;
+    for (const auto& ion: module_.ion_deps()) {
+        out << fmt::format("[[maybe_unused]] auto& {}{} = pp->ion_states[{}];\\\n",       pp_var_pfx, ion_field(ion), idx);
+        out << fmt::format("[[maybe_unused]] auto* {}{} = pp->ion_states[{}].index;\\\n", pp_var_pfx, ion_index(ion), idx);
+        idx++;
+    }
+    out << "//End of IFACEBLOCK\n\n";
 
     out << "// procedure prototypes\n";
     for (auto proc: normal_procedures(module_)) {
@@ -278,208 +321,141 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
             out << ";\n";
         }
     }
-    out << "\n";
-
-    out << "// interface methods\n";
-    out << "void init(" << ppack_name << "* pp) {\n" << indent;
+    out << "\n"
+        << "// interface methods\n";
+    out << "static void init(arb_mechanism_ppack* pp) {\n" << indent;
     emit_body(init_api);
+    if (init_api && init_api->body() && !init_api->body()->statements().empty()) {
+        auto n = std::count_if(vars.arrays.begin(), vars.arrays.end(),
+                               [] (const auto& v) { return v->is_state(); });
+        out << fmt::format(FMT_COMPILE("if (!{0}multiplicity) return;\n"
+                                       "for (arb_size_type ix = 0; ix < {1}; ++ix) {{\n"
+                                       "    for (arb_size_type iy = 0; iy < {0}width; ++iy) {{\n"
+                                       "        pp->state_vars[ix][iy] *= {0}multiplicity[iy];\n"
+                                       "    }}\n"
+                                       "}}\n"),
+                           pp_var_pfx,
+                           n);
+    }
     out << popindent << "}\n\n";
 
-    out << "void advance_state(" << ppack_name << "* pp) {\n" << indent;
+    out << "static void advance_state(arb_mechanism_ppack* pp) {\n" << indent;
     out << profiler_enter("advance_integrate_state");
     emit_body(state_api);
     out << profiler_leave();
     out << popindent << "}\n\n";
 
-    out << "void compute_currents(" << ppack_name << "* pp) {\n" << indent;
+    out << "static void compute_currents(arb_mechanism_ppack* pp) {\n" << indent;
     out << profiler_enter("advance_integrate_current");
     emit_body(current_api);
     out << profiler_leave();
     out << popindent << "}\n\n";
 
-    out << "void write_ions(" << ppack_name << "* pp) {\n" << indent;
+    out << "static void write_ions(arb_mechanism_ppack* pp) {\n" << indent;
     emit_body(write_ions_api);
     out << popindent << "}\n\n";
 
     if (net_receive_api) {
-        const std::string weight_arg = net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name();
-        out <<
-            "void net_receive(" << ppack_name << "* pp, int i_, ::arb::fvm_value_type " << weight_arg << ") {\n" << indent;
-            emit_api_body(out, net_receive_api, false);
-            out << popindent <<
-            "}\n\n"
-            "void apply_events(" << ppack_name << "* pp, ::arb::fvm_size_type mechanism_id, ::arb::multicore::deliverable_event_stream::state events) {\n" << indent <<
-            "auto ncell = events.n_streams();\n"
-            "for (::arb::fvm_size_type c = 0; c<ncell; ++c) {\n" << indent <<
-            "auto begin = events.begin_marked(c);\n"
-            "auto end = events.end_marked(c);\n"
-            "for (auto p = begin; p<end; ++p) {\n" << indent <<
-            "if (p->mech_id==mechanism_id) " << namespace_name << "::net_receive(pp, p->mech_index, p->weight);\n" << popindent <<
-            "}\n" << popindent <<
-            "}\n" << popindent <<
-            "}\n"
-            "\n";
+        out << fmt::format(FMT_COMPILE("static void apply_events(arb_mechanism_ppack* pp, arb_deliverable_event_stream* stream_ptr) {{\n"
+                                       "    PPACK_IFACE_BLOCK;\n"
+                                       "    auto ncell = stream_ptr->n_streams;\n"
+                                       "    for (arb_size_type c = 0; c<ncell; ++c) {{\n"
+                                       "        auto begin  = stream_ptr->events + stream_ptr->begin[c];\n"
+                                       "        auto end    = stream_ptr->events + stream_ptr->end[c];\n"
+                                       "        for (auto p = begin; p<end; ++p) {{\n"
+                                       "            auto i_     = p->mech_index;\n"
+                                       "            auto {1} = p->weight;\n"
+                                       "            if (p->mech_id=={0}mechanism_id) {{\n"),
+                           pp_var_pfx,
+                           net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name());
+        out << indent << indent << indent << indent;
+        emit_api_body(out, net_receive_api, false, false);
+        out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n\n";
+    } else {
+        out << "static void apply_events(arb_mechanism_ppack*, arb_deliverable_event_stream*) {}\n\n";
     }
 
     if(post_event_api) {
         const std::string time_arg = post_event_api->args().empty() ? "time" : post_event_api->args().front()->is_argument()->name();
-        out <<
-            "void post_event(" << ppack_name << "* pp) {\n" << indent <<
-            "int n_ = pp->width_;\n"
-            "for (int i_ = 0; i_ < n_; ++i_) {\n" << indent <<
-            "auto node_index_i_ = pp->node_index_[i_];\n"
-            "auto cid_ = pp->vec_ci_[node_index_i_];\n"
-            "auto offset_ = pp->n_detectors_ * cid_;\n"
-            "for (::arb::fvm_index_type c = 0; c < pp->n_detectors_; c++) {\n" << indent <<
-            "auto " << time_arg << " = pp->time_since_spike_[offset_ + c];\n"
-            "if (" <<  time_arg << " >= 0) {\n" << indent;
-            emit_api_body(out, post_event_api, false);
-            out << popindent <<
-            "}\n" << popindent <<
-            "}\n" << popindent <<
-            "}\n" << popindent <<
-            "}\n\n";
+        out << fmt::format(FMT_COMPILE("static void post_event(arb_mechanism_ppack* pp) {{\n"
+                                       "    PPACK_IFACE_BLOCK;\n"
+                                       "    for (arb_size_type i_ = 0; i_ < {0}width; ++i_) {{\n"
+                                       "        auto node_index_i_ = {0}node_index[i_];\n"
+                                       "        auto cid_          = {0}vec_ci[node_index_i_];\n"
+                                       "        auto offset_       = {0}n_detectors * cid_;\n"
+                                       "        for (auto c = 0; c < {0}n_detectors; c++) {{\n"
+                                       "            auto {1} = {0}time_since_spike[offset_ + c];\n"
+                                       "            if ({1} >= 0) {{\n"),
+                           pp_var_pfx,
+                           time_arg);
+        out << indent << indent << indent << indent;
+        emit_api_body(out, post_event_api, false, false);
+        out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n";
+    } else {
+        out << "static void post_event(arb_mechanism_ppack*) {}\n";
     }
 
-
-    out << "// Procedure definitions\n";
+    out << "\n// Procedure definitions\n";
     for (auto proc: normal_procedures(module_)) {
         if (with_simd) {
             emit_simd_procedure_proto(out, proc, ppack_name);
             auto simd_print = simdprint(proc->body(), vars.scalars);
-            out << " {\n" << indent << simd_print << popindent <<  "}\n\n";
+            out << " {\n"
+                << indent
+                << "PPACK_IFACE_BLOCK;\n"
+                << simd_print
+                << popindent
+                << "}\n\n";
 
             emit_masked_simd_procedure_proto(out, proc, ppack_name);
             auto masked_print = simdprint(proc->body(), vars.scalars);
             masked_print.set_masked();
-            out << " {\n" << indent << masked_print << popindent << "}\n\n";
+            out << " {\n"
+                << indent
+                << "PPACK_IFACE_BLOCK;\n"
+                << masked_print
+                << popindent
+                << "}\n\n";
         } else {
             emit_procedure_proto(out, proc, ppack_name);
-            out <<
-                " {\n" << indent <<
-                cprint(proc->body()) << popindent <<
-                "}\n\n";
-        }
-    }
-
-    out << popindent << "}\n\n"; // close kernel namespace
-
-    out <<
-        "class " << class_name << ": public base {\n"
-        "public:\n" << indent <<
-        "const ::arb::mechanism_fingerprint& fingerprint() const override {\n" << indent <<
-        "static ::arb::mechanism_fingerprint hash = " << quote(fingerprint) << ";\n"
-        "return hash;\n" << popindent <<
-        "}\n"
-        "std::string internal_name() const override { return " << quote(name) << "; }\n"
-        "::arb::mechanismKind kind() const override { return " << module_kind_str(module_) << "; }\n"
-        "::arb::mechanism_ptr clone() const override { return ::arb::mechanism_ptr(new " << class_name << "()); }\n"
-        "\n"
-        "void init() override { " << namespace_name << "::init(&pp_); }\n"
-        "void advance_state() override { " << namespace_name << "::advance_state(&pp_); }\n"
-        "void compute_currents() override { " << namespace_name << "::compute_currents(&pp_); }\n"
-        "void write_ions() override{ " << namespace_name << "::write_ions(&pp_); }\n";
-
-    net_receive_api &&
-        out << "void apply_events(deliverable_event_stream::state events) override { " << namespace_name << "::apply_events(&pp_, mechanism_id_, events); }\n";
-
-    post_event_api &&
-        out << "void post_event() override { " << namespace_name <<  "::post_event(&pp_); };\n";
-
-    with_simd &&
-        out << "unsigned simd_width() const override { return simd_width_; }\n";
-
-    out <<
-        "\n" << popindent <<
-        "protected:\n" << indent <<
-        "std::size_t object_sizeof() const override { return sizeof(*this); }\n" <<
-        "virtual ::arb::mechanism_ppack* ppack_ptr() override { return &pp_; }\n";
-
-    io::separator sep("\n", ",\n");
-    if (!vars.scalars.empty()) {
-        out <<
-            "mechanism_global_table global_table() override {\n" << indent <<
-            "return {" << indent;
-
-        for (const auto& scalar: vars.scalars) {
-            auto memb = scalar->name();
-            out << sep << "{" << quote(memb) << ", &pp_." << memb << "}";
+            out << " {\n" << indent
+                << "PPACK_IFACE_BLOCK;\n"
+                << cprint(proc->body())
+                << popindent << "}\n";
         }
-        out << popindent << "\n};\n" << popindent << "}\n";
     }
 
-    if (!vars.arrays.empty()) {
-        out <<
-            "mechanism_field_table field_table() override {\n" << indent <<
-            "return {" << indent;
+    out << popindent
+        << "#undef PPACK_IFACE_BLOCK\n"
+        << "} // namespace kernel_" << name
+        << "\n"
+        << namespace_declaration_close(ns_components)
+        << "\n";
+
+    std::stringstream ss;
+    for (const auto& c: ns_components) ss << c << "::";
+    ss << namespace_name << "::";
+
+    out << fmt::format(FMT_COMPILE("extern \"C\" {{\n"
+                                   "  arb_mechanism_interface* make_{0}_{1}_interface_multicore() {{\n"
+                                   "    static arb_mechanism_interface result;\n"
+                                   "    result.partition_width = {3}simd_width_;\n"
+                                   "    result.backend = {2};\n"
+                                   "    result.alignment = {3}min_align_;\n"
+                                   "    result.init_mechanism = {3}init;\n"
+                                   "    result.compute_currents = {3}compute_currents;\n"
+                                   "    result.apply_events = {3}apply_events;\n"
+                                   "    result.advance_state = {3}advance_state;\n"
+                                   "    result.write_ions = {3}write_ions;\n"
+                                   "    result.post_event = {3}post_event;\n"
+                                   "    return &result;\n"
+                                   "  }}"
+                                   "}}\n\n"),
+                       std::regex_replace(opt.cpp_namespace, std::regex{"::"}, "_"),
+                       name,
+                       "arb_backend_kind_cpu",
+                       ss.str());
 
-        sep.reset();
-        for (const auto& array: vars.arrays) {
-            auto memb = array->name();
-            out << sep << "{" << quote(memb) << ", &pp_." << memb << "}";
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-        out <<
-            "mechanism_field_default_table field_default_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& array: vars.arrays) {
-            auto memb = array->name();
-            auto dflt = array->value();
-            if (!std::isnan(dflt)) {
-                out << sep << "{" << quote(memb) << ", " << as_c_double(dflt) << "}";
-            }
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-        out <<
-            "mechanism_state_table state_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& array: vars.arrays) {
-            auto memb = array->name();
-            if(array->is_state()) {
-                out << sep << "{" << quote(memb) << ", &pp_." << memb << "}";
-            }
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-    }
-
-    if (!ion_deps.empty()) {
-        out <<
-            "mechanism_ion_state_table ion_state_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& dep: ion_deps) {
-            out << sep << "{\"" << dep.name << "\", &pp_." << ion_state_field(dep.name) << "}";
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-        sep.reset();
-        out << "mechanism_ion_index_table ion_index_table() override {\n" << indent << "return {" << indent;
-        for (const auto& dep: ion_deps) {
-            out << sep << "{\"" << dep.name << "\", &pp_." << ion_state_index(dep.name) << "}";
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-    }
-
-    out << popindent << "\n"
-        "private:\n" << indent;
-    out << ppack_name << " pp_;\n";
-
-    out << popindent <<
-        "};\n\n"
-        "template <typename B> ::arb::concrete_mech_ptr<B> make_mechanism_" <<name << "();\n"
-        "template <> ::arb::concrete_mech_ptr<backend> make_mechanism_" << name << "<backend>() {\n" << indent <<
-        "return ::arb::concrete_mech_ptr<backend>(new " << class_name << "());\n" << popindent <<
-        "}\n\n";
-
-    out << namespace_declaration_close(ns_components);
     EXIT(out);
     return out.str();
 }
@@ -495,9 +471,10 @@ void CPrinter::visit(LocalVariable* sym) {
 }
 
 void CPrinter::visit(VariableExpression *sym) {
-    out_ << "pp->" << sym->name() << (sym->is_range()? "[i_]": "");
+    out_ << fmt::format("{}{}{}", pp_var_pfx, sym->name(), sym->is_range() ? "[i_]": "");
 }
 
+
 void CPrinter::visit(CallExpression* e) {
     out_ << e->name() << "(pp, i_";
     for (auto& arg: e->args()) {
@@ -513,7 +490,7 @@ void CPrinter::visit(BlockExpression* block) {
     if (!block->is_nested()) {
         auto locals = pure_locals(block->scope());
         if (!locals.empty()) {
-            out_ << "::arb::fvm_value_type ";
+            out_ << "arb_value_type ";
             io::separator sep(", ");
             for (auto local: locals) {
                 out_ << sep << local->name();
@@ -536,19 +513,19 @@ static std::string index_i_name(const std::string& index_var) {
 }
 
 void emit_procedure_proto(std::ostream& out, ProcedureExpression* e, const std::string& ppack_name, const std::string& qualified) {
-    out << "void " << qualified << (qualified.empty()? "": "::") << e->name() << "(" << ppack_name << "* pp, int i_";
+    out << "[[maybe_unused]] static void " << qualified << (qualified.empty()? "": "::") << e->name() << "(" << ppack_name << "* pp, int i_";
     for (auto& arg: e->args()) {
-        out << ", ::arb::fvm_value_type " << arg->is_argument()->name();
+        out << ", arb_value_type " << arg->is_argument()->name();
     }
     out << ")";
 }
 
 namespace {
     // Access through ppack
-    std::string data_via_ppack(const indexed_variable_info& i) { return "pp->" + i.data_var; }
+    std::string data_via_ppack(const indexed_variable_info& i) { return pp_var_pfx + i.data_var; }
     std::string node_index_i_name(const indexed_variable_info& i) { return i.node_index_var + "i_"; }
     std::string source_index_i_name(const index_prop& i) { return i.source_var + "i_"; }
-    std::string source_var(const index_prop& i) { return "pp->" + i.source_var; }
+    std::string source_var(const index_prop& i) { return pp_var_pfx + i.source_var; }
 
     // Convenience I/O wrapper for emitting indexed access to an external variable.
 
@@ -559,7 +536,7 @@ namespace {
         friend std::ostream& operator<<(std::ostream& o, const deref& wrap) {
             auto index_var = wrap.d.cell_index_var.empty() ? wrap.d.node_index_var : wrap.d.cell_index_var;
             auto i_name    = index_i_name(index_var);
-            index_var = "pp->" + index_var;
+            index_var = pp_var_pfx + index_var;
             return o << data_via_ppack(wrap.d) << '[' << (wrap.d.scalar() ? "0": i_name) << ']';
         }
     };
@@ -581,18 +558,15 @@ std::list<index_prop> gather_indexed_vars(const std::vector<LocalVariable*>& ind
         }
     }
     return indices;
-};
+}
 
 void emit_state_read(std::ostream& out, LocalVariable* local) {
     ENTER(out);
-    out << "::arb::fvm_value_type " << cprint(local) << " = ";
+    out << "arb_value_type " << cprint(local) << " = ";
 
     if (local->is_read()) {
         auto d = decode_indexed_variable(local->external_variable());
-        if (d.scale != 1) {
-            out << as_c_double(d.scale) << "*";
-        }
-        out << deref(d) << ";\n";
+        out << scaled(d.scale) << deref(d) << ";\n";
     }
     else {
         out << "0;\n";
@@ -611,33 +585,26 @@ void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* externa
     }
 
     if (d.accumulate) {
-        out << deref(d) << " = fma(";
-        if (coeff != 1) {
-            out << as_c_double(coeff) << '*';
-        }
-        out << "pp->weight_[i_], " << from->name() << ", " << deref(d) << ");\n";
+        out << deref(d) << " = fma("
+            << scaled(coeff) << pp_var_pfx << "weight[i_], "
+            << from->name() << ", " << deref(d) << ");\n";
     }
     else {
-        out << deref(d) << " = ";
-        if (coeff != 1) {
-            out << as_c_double(coeff) << '*';
-        }
-        out << from->name() << ";\n";
+        out << deref(d) << " = " << scaled(coeff) << from->name() << ";\n";
     }
     EXIT(out);
 }
 
-void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop) {
+void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppack_iface) {
     ENTER(out);
     auto body = method->body();
     auto indexed_vars = indexed_locals(method->scope());
 
     std::list<index_prop> indices = gather_indexed_vars(indexed_vars, "i_");
     if (!body->statements().empty()) {
-        cv_loop && out <<
-            "int n_ = pp->width_;\n"
-            "for (int i_ = 0; i_ < n_; ++i_) {\n" << indent;
-
+        ppack_iface && out << "PPACK_IFACE_BLOCK;\n";
+        cv_loop && out << fmt::format("for (arb_size_type i_ = 0; i_ < {}width; ++i_) {{\n", pp_var_pfx)
+                        << indent;
         for (auto index: indices) {
             out << "auto " << source_index_i_name(index) << " = " << source_var(index) << "[" << index.index_name << "];\n";
         }
@@ -673,10 +640,10 @@ void SimdPrinter::visit(VariableExpression *sym) {
     ENTERM(out_, "variable");
     if (sym->is_range()) {
         auto index = is_indirect_? "index_": "i_";
-        out_ << "simd_cast<simd_value>(indirect(pp->" << sym->name() << "+" << index << ", simd_width_))";
+        out_ << "simd_cast<simd_value>(indirect(" << pp_var_pfx << sym->name() << "+" << index << ", simd_width_))";
     }
     else {
-        out_ << "pp->" << sym->name();
+        out_ << pp_var_pfx << sym->name();
     }
     EXITM(out_, "variable");
 }
@@ -697,7 +664,7 @@ void SimdPrinter::visit(AssignmentExpression* e) {
     if (scalars_.count(e->lhs()->is_identifier()->name()))  cast = false;
 
     if (lhs->is_variable() && lhs->is_variable()->is_range()) {
-        std::string pfx = lhs->is_local_variable() ? "" : "pp->";
+        std::string pfx = lhs->is_local_variable() ? "" : pp_var_pfx;
         if(is_indirect_)
             out_ << "indirect(" << pfx << lhs->name() << "+index_, simd_width_) = ";
         else
@@ -713,14 +680,14 @@ void SimdPrinter::visit(AssignmentExpression* e) {
         if (!input_mask_.empty())
             out_ << ")";
     } else {
-        std::string pfx = lhs->is_local_variable() ? "" : "pp->";
+        std::string pfx = lhs->is_local_variable() ? "" : pp_var_pfx;
         out_ << "assign(" << pfx << lhs->name() << ", ";
         if (auto rhs = e->rhs()->is_identifier()) {
             if (auto sym = rhs->symbol()) {
                 // We shouldn't call the rhs visitor in this case because it automatically casts indirect expressions
                 if (sym->is_variable() && sym->is_variable()->is_range()) {
                     auto index = is_indirect_ ? "index_" : "i_";
-                    out_ << "indirect(pp->" << rhs->name() << "+" << index << ", simd_width_))";
+                    out_ << "indirect(" << pp_var_pfx << rhs->name() << "+" << index << ", simd_width_))";
                     return;
                 }
             }
@@ -773,7 +740,7 @@ void SimdPrinter::visit(BlockExpression* block) {
 
 void emit_simd_procedure_proto(std::ostream& out, ProcedureExpression* e, const std::string& ppack_name, const std::string& qualified) {
     ENTER(out);
-    out << "void " << qualified << (qualified.empty()? "": "::") << e->name() << "(" << ppack_name << "* pp, ::arb::fvm_index_type i_";
+    out << "[[maybe_unused]] static void " << qualified << (qualified.empty()? "": "::") << e->name() << "(arb_mechanism_ppack* pp, arb_index_type i_";
     for (auto& arg: e->args()) {
         out << ", const simd_value& " << arg->is_argument()->name();
     }
@@ -783,8 +750,8 @@ void emit_simd_procedure_proto(std::ostream& out, ProcedureExpression* e, const
 
 void emit_masked_simd_procedure_proto(std::ostream& out, ProcedureExpression* e, const std::string& ppack_name, const std::string& qualified) {
     ENTER(out);
-    out << "void " << qualified << (qualified.empty()? "": "::") << e->name()
-    << "(" << ppack_name << "* pp, ::arb::fvm_index_type i_, simd_mask mask_input_";
+    out << "[[maybe_unused]] static void " << qualified << (qualified.empty()? "": "::") << e->name()
+    << "(arb_mechanism_ppack* pp, arb_index_type i_, simd_mask mask_input_";
     for (auto& arg: e->args()) {
         out << ", const simd_value& " << arg->is_argument()->name();
     }
@@ -799,7 +766,7 @@ void emit_simd_state_read(std::ostream& out, LocalVariable* local, simd_expr_con
     if (local->is_read()) {
         auto d = decode_indexed_variable(local->external_variable());
         if (d.scalar()) {
-            out << " = simd_cast<simd_value>(pp->" << d.data_var
+            out << " = simd_cast<simd_value>(" << pp_var_pfx << d.data_var
                 << "[0]);\n";
         }
         else {
@@ -992,15 +959,16 @@ void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
                                   const simd_expr_constraint& constraint,
                                   std::string underlying_constraint_name) {
     ENTER(out);
-    out << "constraint_category_ = index_constraint::"<< underlying_constraint_name << ";\n";
-    out << "for (unsigned i_ = 0; i_ < pp->index_constraints_." << underlying_constraint_name
-        << ".size(); i_++) {\n"
+    out << fmt::format("constraint_category_ = index_constraint::{1};\n"
+                       "for (auto i_ = 0ul; i_ < {0}index_constraints.n_{1}; i_++) {{\n"
+                       "    arb_index_type index_ = {0}index_constraints.{1}[i_];\n",
+                       pp_var_pfx,
+                       underlying_constraint_name)
         << indent;
-
-    out << "::arb::fvm_index_type index_ = pp->index_constraints_." << underlying_constraint_name << "[i_];\n";
     if (requires_weight) {
-        out << "simd_value w_;\n"
-            << "assign(w_, indirect((pp->weight_+index_), simd_width_));\n";
+        out << fmt::format("simd_value w_;\n"
+                           "assign(w_, indirect(({}weight+index_), simd_width_));\n",
+                           pp_var_pfx);
     }
 
     emit_simd_body_for_loop(out, body, indexed_vars, scalars, indices, constraint);
@@ -1015,7 +983,6 @@ void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<
     bool requires_weight = false;
 
     ENTER(out);
-
     for (auto& s: body->is_block()->statements()) {
         if (s->is_assignment()) {
             for (auto& v: indexed_vars) {
@@ -1037,6 +1004,7 @@ void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<
         }
     }
     if (!body->statements().empty()) {
+        out << "PPACK_IFACE_BLOCK;\n";
         out << "assert(simd_width_ <= (unsigned)S::width(simd_cast<simd_value>(0)));\n";
         if (!indices.empty()) {
             out << "index_constraint constraint_category_;\n\n";
@@ -1072,10 +1040,12 @@ void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<
                 emit_simd_state_read(out, sym, simd_expr_constraint::other);
             }
 
-            out <<
-                "unsigned n_ = pp->width_;\n\n"
-                "for (unsigned i_ = 0; i_ < n_; i_ += simd_width_) {\n" << indent <<
-                simdprint(body, scalars) << popindent <<
+            out << fmt::format("for (arb_size_type i_ = 0; i_ < {}width; i_ += simd_width_) {{\n",
+                               pp_var_pfx)
+                << indent
+                << simdprint(body, scalars)
+                << popindent
+                <<
                 "}\n";
         }
     }
diff --git a/modcc/printer/gpuprinter.cpp b/modcc/printer/gpuprinter.cpp
index 3b5953b0..00dd9e7f 100644
--- a/modcc/printer/gpuprinter.cpp
+++ b/modcc/printer/gpuprinter.cpp
@@ -2,6 +2,12 @@
 #include <iostream>
 #include <string>
 #include <set>
+#include <regex>
+
+#define FMT_HEADER_ONLY YES
+#include <fmt/core.h>
+#include <fmt/format.h>
+#include <fmt/compile.h>
 
 #include "gpuprinter.hpp"
 #include "expression.hpp"
@@ -14,12 +20,11 @@ using io::indent;
 using io::popindent;
 using io::quote;
 
-void emit_common_defs(std::ostream&, const Module& module_);
-void emit_api_body_cu(std::ostream& out, APIMethod* method, bool is_point_proc, bool cv_loop = true);
+void emit_api_body_cu(std::ostream& out, APIMethod* method, bool is_point_proc, bool cv_loop = true, bool ppack=true);
 void emit_procedure_body_cu(std::ostream& out, ProcedureExpression* proc);
 void emit_state_read_cu(std::ostream& out, LocalVariable* local);
-void emit_state_update_cu(std::ostream& out, Symbol* from,
-                          IndexedVariable* external, bool is_point_proc);
+void emit_state_update_cu(std::ostream& out, Symbol* from, IndexedVariable* external, bool is_point_proc);
+
 const char* index_id(Symbol *s);
 
 struct cuprint {
@@ -32,246 +37,157 @@ struct cuprint {
     }
 };
 
-std::string make_class_name(const std::string& module_name) {
-    return "mechanism_gpu_"+module_name;
-}
+static std::string make_class_name(const std::string& n) { return "mechanism_" + n + "_gpu";}
+static std::string make_ppack_name(const std::string& module_name) { return make_class_name(module_name)+"_pp_"; }
+static std::string ion_field(const IonDep& ion) { return fmt::format("ion_{}",       ion.name); }
+static std::string ion_index(const IonDep& ion) { return fmt::format("ion_{}_index", ion.name); }
 
-std::string make_ppack_name(const std::string& module_name) {
-    return make_class_name(module_name)+"_pp_";
-}
-
-static std::string ion_state_field(const std::string& ion_name) {
-    return "ion_"+ion_name+"_";
-}
-
-static std::string ion_state_index(const std::string& ion_name) {
-    return "ion_"+ion_name+"_index_";
-}
 
 std::string emit_gpu_cpp_source(const Module& module_, const printer_options& opt) {
-    std::string name = module_.module_name();
+    std::string name       = module_.module_name();
     std::string class_name = make_class_name(name);
     std::string ppack_name = make_ppack_name(name);
-    auto ns_components = namespace_components(opt.cpp_namespace);
-
-    NetReceiveExpression* net_receive = find_net_receive(module_);
-    PostEventExpression*  post_event  = find_post_event(module_);
-
+    auto ns_components     = namespace_components(opt.cpp_namespace);
     auto vars = local_module_variables(module_);
     auto ion_deps = module_.ion_deps();
 
-    std::string fingerprint = "<placeholder>";
-
     io::pfxstringstream out;
 
-    net_receive && out <<
-        "#include <" << arb_private_header_prefix() << "backends/event.hpp>\n"
-        "#include <" << arb_private_header_prefix() << "backends/multi_event_stream_state.hpp>\n";
-
-    out << "#include <" << arb_private_header_prefix() << "backends/gpu/mechanism.hpp>\n"
-        << "#include <arbor/mechanism_ppack.hpp>\n";
-
-    out << "\n" << namespace_declaration_open(ns_components) << "\n";
-
-    emit_common_defs(out, module_);
-
-    out <<
-        "void " << class_name << "_init_(" << ppack_name << "&);\n"
-        "void " << class_name << "_advance_state_(" << ppack_name << "&);\n"
-        "void " << class_name << "_compute_currents_(" << ppack_name << "&);\n"
-        "void " << class_name << "_write_ions_(" << ppack_name << "&);\n";
-
-    net_receive && out <<
-        "void " << class_name << "_apply_events_(int mech_id, "
-        << ppack_name << "&, deliverable_event_stream_state events);\n";
-
-    post_event && out <<
-        "void " << class_name << "_post_event_(" << ppack_name << "&);\n";
-
-
-    out <<
-        "\n"
-        "class " << class_name << ": public ::arb::gpu::mechanism {\n"
-        "public:\n" << indent <<
-        "const ::arb::mechanism_fingerprint& fingerprint() const override {\n" << indent <<
-        "static ::arb::mechanism_fingerprint hash = " << quote(fingerprint) << ";\n"
-        "return hash;\n" << popindent <<
-        "}\n\n"
-        "std::string internal_name() const override { return " << quote(name) << "; }\n"
-        "::arb::mechanismKind kind() const override { return " << module_kind_str(module_) << "; }\n"
-        "::arb::mechanism_ptr clone() const override { return ::arb::mechanism_ptr(new " << class_name << "()); }\n"
-        "\n"
-        "void init() override {\n" << indent <<
-        class_name << "_init_(pp_);\n" << popindent <<
-        "}\n\n"
-        "void advance_state() override {\n" << indent <<
-        class_name << "_advance_state_(pp_);\n" << popindent <<
-        "}\n\n"
-        "void compute_currents() override {\n" << indent <<
-        class_name << "_compute_currents_(pp_);\n" << popindent <<
-        "}\n\n"
-        "void write_ions() override {\n" << indent <<
-        class_name << "_write_ions_(pp_);\n" << popindent <<
-        "}\n\n";
-
-    net_receive && out <<
-        "void apply_events(deliverable_event_stream_state events) override {\n" << indent <<
-        class_name << "_apply_events_(mechanism_id_, pp_, events);\n" << popindent <<
-        "}\n\n";
-
-    post_event && out <<
-        "void post_event() override {\n" << indent <<
-        class_name << "_post_event_(pp_);\n" << popindent <<
-        "}\n\n";
-
-    out << popindent <<
-        "protected:\n" << indent <<
-        "std::size_t object_sizeof() const override { return sizeof(*this); }\n"
-        "::arb::mechanism_ppack* ppack_ptr() override { return &pp_; }\n\n";
-
-    io::separator sep("\n", ",\n");
-    if (!vars.scalars.empty()) {
-        out <<
-            "mechanism_global_table global_table() override {\n" << indent <<
-            "return {" << indent;
-
-        for (const auto& scalar: vars.scalars) {
-            auto memb = scalar->name();
-            out << sep << "{" << quote(memb) << ", &pp_." << memb << "}";
-        }
-        out << popindent << "\n};\n" << popindent << "}\n";
-    }
-
-    if (!vars.arrays.empty()) {
-        out <<
-            "mechanism_field_table field_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& array: vars.arrays) {
-            auto memb = array->name();
-            out << sep << "{" << quote(memb) << ", &pp_." << memb << "}";
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-        out <<
-            "mechanism_field_default_table field_default_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& array: vars.arrays) {
-            auto memb = array->name();
-            auto dflt = array->value();
-            if (!std::isnan(dflt)) {
-                out << sep << "{" << quote(memb) << ", " << as_c_double(dflt) << "}";
-            }
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-        out <<
-            "mechanism_state_table state_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& array: vars.arrays) {
-            auto memb = array->name();
-            if(array->is_state()) {
-                out << sep << "{" << quote(memb) << ", &pp_." << memb << "}";
-            }
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-
-    }
-
-    if (!ion_deps.empty()) {
-        out <<
-            "mechanism_ion_state_table ion_state_table() override {\n" << indent <<
-            "return {" << indent;
-
-        sep.reset();
-        for (const auto& dep: ion_deps) {
-            out << sep << "{\"" << dep.name << "\", &pp_." << ion_state_field(dep.name) << "}";
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-
-        sep.reset();
-        out << "mechanism_ion_index_table ion_index_table() override {\n" << indent << "return {" << indent;
-        for (const auto& dep: ion_deps) {
-            out << sep << "{\"" << dep.name << "\", &pp_." << ion_state_index(dep.name) << "}";
-        }
-        out << popindent << "\n};" << popindent << "\n}\n";
-    }
-
-    out << popindent << "\n"
-        "private:\n" << indent <<
-        make_ppack_name(name) << " pp_;\n" << popindent <<
-        "};\n\n"
-        "template <typename B> ::arb::concrete_mech_ptr<B> make_mechanism_" << name << "();\n"
-        "template <> ::arb::concrete_mech_ptr<::arb::gpu::backend> make_mechanism_" << name << "<::arb::gpu::backend>() {\n" << indent <<
-        "return ::arb::concrete_mech_ptr<::arb::gpu::backend>(new " << class_name << "());\n" << popindent <<
-        "}\n\n";
-
-    out << namespace_declaration_close(ns_components);
+    out << "#include <arbor/mechanism_abi.h>\n"
+        << "#include <cmath>\n\n"
+        << namespace_declaration_open(ns_components)
+        << fmt::format("void {0}_init_(arb_mechanism_ppack*);\n"
+                       "void {0}_advance_state_(arb_mechanism_ppack*);\n"
+                       "void {0}_compute_currents_(arb_mechanism_ppack*);\n"
+                       "void {0}_write_ions_(arb_mechanism_ppack*);\n"
+                       "void {0}_apply_events_(arb_mechanism_ppack*, arb_deliverable_event_stream*);\n"
+                       "void {0}_post_event_(arb_mechanism_ppack*);\n\n",
+                       class_name)
+        << namespace_declaration_close(ns_components)
+        << "\n";
+
+    std::stringstream ss;
+    for (const auto& c: ns_components) ss << c <<"::";
+
+    out << fmt::format(FMT_COMPILE("extern \"C\" {{\n"
+                                   "  arb_mechanism_interface* make_{4}_{1}_interface_gpu() {{\n"
+                                   "    static arb_mechanism_interface result;\n"
+                                   "    result.backend={2};\n"
+                                   "    result.partition_width=1;\n"
+                                   "    result.alignment=1;\n"
+                                   "    result.init_mechanism={3}{0}_init_;\n"
+                                   "    result.compute_currents={3}{0}_compute_currents_;\n"
+                                   "    result.apply_events={3}{0}_apply_events_;\n"
+                                   "    result.advance_state={3}{0}_advance_state_;\n"
+                                   "    result.write_ions={3}{0}_write_ions_;\n"
+                                   "    result.post_event={3}{0}_post_event_;\n"
+                                   "    return &result;\n"
+                                   "  }}\n"
+                                   "}};\n\n"),
+                       class_name,
+                       name,
+                       "arb_backend_kind_gpu",
+                       ss.str(),
+                       std::regex_replace(opt.cpp_namespace, std::regex{"::"}, "_"));
+    EXIT(out);
     return out.str();
 }
 
 std::string emit_gpu_cu_source(const Module& module_, const printer_options& opt) {
     std::string name = module_.module_name();
     std::string class_name = make_class_name(name);
-    std::string ppack_name = make_ppack_name(name);
+
     auto ns_components = namespace_components(opt.cpp_namespace);
+
     const bool is_point_proc = module_.kind() == moduleKind::point;
 
     APIMethod* net_receive_api = find_api_method(module_, "net_rec_api");
-    APIMethod* post_event_api = find_api_method(module_, "post_event_api");
-    APIMethod* init_api = find_api_method(module_, "init");
-    APIMethod* state_api = find_api_method(module_, "advance_state");
-    APIMethod* current_api = find_api_method(module_, "compute_currents");
-    APIMethod* write_ions_api = find_api_method(module_, "write_ions");
-
-    assert_has_scope(init_api, "init");
-    assert_has_scope(state_api, "advance_state");
+    APIMethod* post_event_api  = find_api_method(module_, "post_event_api");
+    APIMethod* init_api        = find_api_method(module_, "init");
+    APIMethod* state_api       = find_api_method(module_, "advance_state");
+    APIMethod* current_api     = find_api_method(module_, "compute_currents");
+    APIMethod* write_ions_api  = find_api_method(module_, "write_ions");
+
+    assert_has_scope(init_api,    "init");
+    assert_has_scope(state_api,   "advance_state");
     assert_has_scope(current_api, "compute_currents");
 
     io::pfxstringstream out;
 
-    out <<
-        "#include <iostream>\n"
-        "#include <" << arb_private_header_prefix() << "backends/event.hpp>\n"
-        "#include <" << arb_private_header_prefix() << "backends/multi_event_stream_state.hpp>\n"
-        "#include <" << arb_private_header_prefix() << "backends/gpu/gpu_common.hpp>\n"
-        "#include <" << arb_private_header_prefix() << "backends/gpu/math_cu.hpp>\n"
-        "#include <arbor/mechanism.hpp>\n" <<
-        "#include <arbor/mechanism_ppack.hpp>\n";
+    auto vars = local_module_variables(module_);
 
-    is_point_proc && out <<
-        "#include <" << arb_private_header_prefix() << "backends/gpu/reduce_by_key.hpp>\n";
+    out << "#include <arbor/gpu/gpu_common.hpp>\n"
+           "#include <arbor/gpu/math_cu.hpp>\n"
+           "#include <arbor/gpu/reduce_by_key.hpp>\n"
+           "#include <arbor/mechanism_abi.h>\n";
 
     out << "\n" << namespace_declaration_open(ns_components) << "\n";
 
-    emit_common_defs(out, module_);
+    out << fmt::format(FMT_COMPILE("#define PPACK_IFACE_BLOCK \\\n"
+                                   "auto  {0}width             __attribute__((unused)) = params_.width;\\\n"
+                                   "auto  {0}n_detectors       __attribute__((unused)) = params_.n_detectors;\\\n"
+                                   "auto* {0}vec_ci            __attribute__((unused)) = params_.vec_ci;\\\n"
+                                   "auto* {0}vec_di            __attribute__((unused)) = params_.vec_di;\\\n"
+                                   "auto* {0}vec_t             __attribute__((unused)) = params_.vec_t;\\\n"
+                                   "auto* {0}vec_dt            __attribute__((unused)) = params_.vec_dt;\\\n"
+                                   "auto* {0}vec_v             __attribute__((unused)) = params_.vec_v;\\\n"
+                                   "auto* {0}vec_i             __attribute__((unused)) = params_.vec_i;\\\n"
+                                   "auto* {0}vec_g             __attribute__((unused)) = params_.vec_g;\\\n"
+                                   "auto* {0}temperature_degC  __attribute__((unused)) = params_.temperature_degC;\\\n"
+                                   "auto* {0}diam_um           __attribute__((unused)) = params_.diam_um;\\\n"
+                                   "auto* {0}time_since_spike  __attribute__((unused)) = params_.time_since_spike;\\\n"
+                                   "auto* {0}node_index        __attribute__((unused)) = params_.node_index;\\\n"
+                                   "auto* {0}multiplicity      __attribute__((unused)) = params_.multiplicity;\\\n"
+                                   "auto* {0}state_vars        __attribute__((unused)) = params_.state_vars;\\\n"
+                                   "auto* {0}weight            __attribute__((unused)) = params_.weight;\\\n"
+                                   "auto& {0}events            __attribute__((unused)) = params_.events;\\\n"
+                                   "auto& {0}mechanism_id      __attribute__((unused)) = params_.mechanism_id;\\\n"
+                                   "auto& {0}index_constraints __attribute__((unused)) = params_.index_constraints;\\\n"),
+                       pp_var_pfx);
+    auto global = 0;
+    for (const auto& scalar: vars.scalars) {
+        out << fmt::format("auto {}{} __attribute__((unused)) = params_.globals[{}];\\\n", pp_var_pfx, scalar->name(), global);
+        global++;
+    }
+    auto param = 0, state = 0;
+    for (const auto& array: vars.arrays) {
+        if (array->is_state()) {
+            out << fmt::format("auto* {}{} __attribute__((unused)) = params_.state_vars[{}];\\\n", pp_var_pfx, array->name(), state);
+            state++;
+        }
+    }
+    for (const auto& array: vars.arrays) {
+        if (!array->is_state()) {
+            out << fmt::format("auto* {}{} __attribute__((unused)) = params_.parameters[{}];\\\n", pp_var_pfx, array->name(), param);
+            param++;
+        }
+    }
+    auto idx = 0;
+    for (const auto& ion: module_.ion_deps()) {
+        out << fmt::format("auto& {}{} __attribute__((unused)) = params_.ion_states[{}];\\\n",       pp_var_pfx, ion_field(ion), idx);
+        out << fmt::format("auto* {}{} __attribute__((unused)) = params_.ion_states[{}].index;\\\n", pp_var_pfx, ion_index(ion), idx);
+        idx++;
+    }
+    out << "//End of IFACEBLOCK\n\n";
 
     // Print the CUDA code and kernels:
     //  - first __device__ functions that implement NMODL PROCEDUREs.
     //  - then __global__ kernels that implement API methods and call the procedures.
 
-    out << "namespace {\n\n"; // place inside an anonymous namespace
-
-    out << "using ::arb::gpu::exprelr;\n";
-    out << "using ::arb::gpu::safeinv;\n";
-    out << "using ::arb::gpu::min;\n";
-    out << "using ::arb::gpu::max;\n\n";
+    out << "namespace {\n\n" // place inside an anonymous namespace
+        << "using ::arb::gpu::exprelr;\n"
+        << "using ::arb::gpu::safeinv;\n"
+        << "using ::arb::gpu::min;\n"
+        << "using ::arb::gpu::max;\n\n";
 
     // Procedures as __device__ functions.
     auto emit_procedure_kernel = [&] (ProcedureExpression* e) {
-        out << "__device__\n"
-            << "void " << e->name()
-            << "(" << ppack_name << " params_, int tid_";
-        for(auto& arg: e->args()) {
-            out << ", ::arb::fvm_value_type " << arg->is_argument()->name();
-        }
+        out << fmt::format("__device__\n"
+                           "void {}(arb_mechanism_ppack params_, int tid_",
+                           e->name());
+        for(auto& arg: e->args()) out << ", arb_value_type " << arg->is_argument()->name();
         out << ") {\n" << indent
+            << "PPACK_IFACE_BLOCK;\n"
             << cuprint(e->body())
             << popindent << "}\n\n";
     };
@@ -285,8 +201,8 @@ std::string emit_gpu_cu_source(const Module& module_, const printer_options& opt
         // Only print the kernel if the method is not empty.
         if (!e->body()->statements().empty()) {
             out << "__global__\n"
-                << "void " << e->name() << "(" << ppack_name << " params_) {\n" << indent
-                << "int n_ = params_.width_;\n"
+                << "void " << e->name() << "(arb_mechanism_ppack params_) {\n" << indent
+                << "int n_ = params_.width;\n"
                 << "int tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n";
             emit_api_body_cu(out, e, is_point_proc);
             out << popindent << "}\n\n";
@@ -294,131 +210,140 @@ std::string emit_gpu_cu_source(const Module& module_, const printer_options& opt
     };
 
     emit_api_kernel(init_api);
+    if (init_api && !init_api->body()->statements().empty()) {
+        out << fmt::format(FMT_COMPILE("__global__\n"
+                                       "void multiply(arb_mechanism_ppack params_) {{\n"
+                                       "    PPACK_IFACE_BLOCK;\n"
+                                       "    auto tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n"
+                                       "    auto idx_ = blockIdx.y;"
+                                       "    if(tid_<{0}width) {{\n"
+                                       "        {0}state_vars[idx_][tid_] *= {0}multiplicity[tid_];\n"
+                                       "    }}\n"
+                                       "}}\n\n"),
+                           pp_var_pfx);
+    }
     emit_api_kernel(state_api);
     emit_api_kernel(current_api);
     emit_api_kernel(write_ions_api);
 
     // event delivery
     if (net_receive_api) {
-        const std::string weight_arg = net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name();
-        out << "__global__\n"
-            << "void apply_events(int mech_id_, " <<  ppack_name << " params_, "
-            << "deliverable_event_stream_state events) {\n" << indent
-            << "auto tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n"
-            << "auto const ncell_ = events.n;\n\n"
-
-            << "if(tid_<ncell_) {\n" << indent
-            << "auto begin = events.ev_data+events.begin_offset[tid_];\n"
-            << "auto end = events.ev_data+events.end_offset[tid_];\n"
-            << "for (auto p = begin; p<end; ++p) {\n" << indent
-            << "if (p->mech_id==mech_id_) {\n" << indent
-            << "auto tid_ = p->mech_index;\n"
-            << "auto " << weight_arg << " = p->weight;\n";
-            emit_api_body_cu(out, net_receive_api, is_point_proc, false);
-            out << popindent << "}\n"
-            << popindent << "}\n"
-            << popindent << "}\n"
-            << popindent << "}\n";
+        out << fmt::format(FMT_COMPILE("__global__\n"
+                                       "void apply_events(arb_mechanism_ppack params_, arb_deliverable_event_stream stream) {{\n"
+                                       "    PPACK_IFACE_BLOCK;\n"
+                                       "    auto tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n"
+                                       "    if(tid_<stream.n_streams) {{\n"
+                                       "        auto begin = stream.events + stream.begin[tid_];\n"
+                                       "        auto end   = stream.events + stream.end[tid_];\n"
+                                       "        for (auto p = begin; p<end; ++p) {{\n"
+                                       "            if (p->mech_id=={1}mechanism_id) {{\n"
+                                       "                auto tid_ = p->mech_index;\n"
+                                       "                auto {0} = p->weight;\n"),
+                           net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name(),
+                           pp_var_pfx);
+        out << indent << indent << indent << indent;
+        emit_api_body_cu(out, net_receive_api, is_point_proc, false, false);
+        out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n";
     }
 
     // event delivery
     if (post_event_api) {
         const std::string time_arg = post_event_api->args().empty() ? "time" : post_event_api->args().front()->is_argument()->name();
-        out << "__global__\n"
-            << "void post_event(" <<  ppack_name << " params_) {\n" << indent
-            << "int n_ = params_.width_;\n"
-            << "auto tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n"
-            << "if (tid_<n_) {\n" << indent
-            << "auto node_index_i_ = params_.node_index_[tid_];\n"
-            << "auto cid_ = params_.vec_ci_[node_index_i_];\n"
-            << "auto offset_ = params_.n_detectors_ * cid_;\n"
-            << "for (unsigned c = 0; c < params_.n_detectors_; c++) {\n" << indent
-            << "auto " << time_arg << " = params_.time_since_spike_[offset_ + c];\n"
-            << "if (" <<  time_arg << " >= 0) {\n" << indent;
-            emit_api_body_cu(out, post_event_api, is_point_proc, false);
-            out << popindent << "}\n"
-            << popindent << "}\n"
-            << popindent << "}\n"
-            << popindent << "}\n";
+        out << fmt::format(FMT_COMPILE("__global__\n"
+                                       "void post_event(arb_mechanism_ppack params_) {{\n"
+                                       "    PPACK_IFACE_BLOCK;\n"
+                                       "    auto tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n"
+                                       "    if (tid_<{1}width) {{\n"
+                                       "        auto node_index_i_ = {1}node_index[tid_];\n"
+                                       "        auto cid_ = {1}vec_ci[node_index_i_];\n"
+                                       "        auto offset_ = {1}n_detectors * cid_;\n"
+                                       "        for (unsigned c = 0; c < {1}n_detectors; c++) {{\n"
+                                       "            auto {0} = {1}time_since_spike[offset_ + c];\n"
+                                       "            if ({0} >= 0) {{\n"),
+                           time_arg,
+                           pp_var_pfx);
+        out << indent << indent << indent << indent;
+        emit_api_body_cu(out, post_event_api, is_point_proc, false, false);
+        out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n";
     }
 
-    out << "} // namspace\n\n"; // close anonymous namespace
+    out << "} // namespace\n\n"; // close anonymous namespace
 
     // Write wrappers.
-    auto emit_api_wrapper = [&] (APIMethod* e) {
-        out << "void " << class_name << "_" << e->name() << "_(" << ppack_name << "& p) {";
-
-        // Only call the kernel if the kernel is required.
-        !e->body()->statements().empty() && out
-            << "\n" << indent
-            << "auto n = p.width_;\n"
-            << "unsigned block_dim = 128;\n"
-            << "unsigned grid_dim = ::arb::gpu::impl::block_count(n, block_dim);\n"
-            << e->name() << "<<<grid_dim, block_dim>>>(p);\n"
-            << popindent;
-
+    auto emit_api_wrapper = [&] (APIMethod* e, const auto& width, std::string_view name="") {
+        auto api_name = name.empty() ? e->name() : name;
+        out << fmt::format(FMT_COMPILE("void {}_{}_(arb_mechanism_ppack* p) {{"), class_name, api_name);
+        if(!e->body()->statements().empty()) {
+            out << fmt::format(FMT_COMPILE("\n"
+                                           "    auto n = p->{};\n"
+                                           "    unsigned block_dim = 128;\n"
+                                           "    unsigned grid_dim = ::arb::gpu::impl::block_count(n, block_dim);\n"
+                                           "    {}<<<grid_dim, block_dim>>>(*p);\n"),
+                               width,
+                               api_name);
+        }
         out << "}\n\n";
     };
-    emit_api_wrapper(init_api);
-    emit_api_wrapper(current_api);
-    emit_api_wrapper(state_api);
-    emit_api_wrapper(write_ions_api);
-
-    net_receive_api && out
-        << "void " << class_name << "_apply_events_("
-        << "int mech_id, "
-        << ppack_name << "& p, deliverable_event_stream_state events) {\n" << indent
-        << "auto n = events.n;\n"
-        << "unsigned block_dim = 128;\n"
-        << "unsigned grid_dim = ::arb::gpu::impl::block_count(n, block_dim);\n"
-        << "apply_events<<<grid_dim, block_dim>>>(mech_id, p, events);\n"
-        << popindent << "}\n\n";
-
-    post_event_api && out
-        << "void " << class_name << "_post_event_("
-        << ppack_name << "& p) {\n" << indent
-        << "auto n = p.width_;\n"
-        << "unsigned block_dim = 128;\n"
-        << "unsigned grid_dim = ::arb::gpu::impl::block_count(n, block_dim);\n"
-        << "post_event<<<grid_dim, block_dim>>>(p);\n"
-        << popindent << "}\n\n";
-
-    out << namespace_declaration_close(ns_components);
-    return out.str();
-}
-
-void emit_common_defs(std::ostream& out, const Module& module_) {
-    std::string class_name = make_class_name(module_.module_name());
-    std::string ppack_name = make_ppack_name(module_.module_name());
-
-    auto vars = local_module_variables(module_);
-    auto ion_deps = module_.ion_deps();
 
-    find_net_receive(module_) && out <<
-        "using deliverable_event_stream_state =\n"
-        "    ::arb::multi_event_stream_state<::arb::deliverable_event_data>;\n\n";
+    auto emit_empty_wrapper = [&] (std::string_view name) {
+        out << fmt::format(FMT_COMPILE("void {}_{}_(arb_mechanism_ppack* p) {{}}\n"), class_name, name);
+    };
 
-    out << "struct " << ppack_name << ": ::arb::mechanism_ppack {\n" << indent;
 
-    for (const auto& scalar: vars.scalars) {
-        out << "::arb::fvm_value_type " << scalar->name() <<  " = " << as_c_double(scalar->value()) << ";\n";
+    {
+        auto api_name = init_api->name();
+        auto n = std::count_if(vars.arrays.begin(), vars.arrays.end(),
+                               [] (const auto& v) { return v->is_state(); });
+
+        out << fmt::format(FMT_COMPILE("void {}_{}_(arb_mechanism_ppack* p) {{"), class_name, api_name);
+        if(!init_api->body()->statements().empty()) {
+            out << fmt::format(FMT_COMPILE("\n"
+                                           "    auto n = p->{0};\n"
+                                           "    unsigned block_dim = 128;\n"
+                                           "    unsigned grid_dim = ::arb::gpu::impl::block_count(n, block_dim);\n"
+                                           "    {1}<<<grid_dim, block_dim>>>(*p);\n"
+                                           "    if (!p->multiplicity) return;\n"
+                                           "    multiply<<<{{grid_dim, {2}}}, block_dim>>>(*p);\n"),
+                               "width",
+                               api_name,
+                               n);
+        }
+        out << "}\n\n";
     }
-    for (const auto& array: vars.arrays) {
-        out << "::arb::fvm_value_type* " << array->name() << ";\n";
+
+    emit_api_wrapper(current_api,     "width");
+    emit_api_wrapper(state_api,       "width");
+    emit_api_wrapper(write_ions_api,  "width");
+    if (post_event_api) {
+        emit_api_wrapper(post_event_api, "width", "post_event");
+    } else {
+        emit_empty_wrapper("post_event");
     }
-    for (const auto& dep: ion_deps) {
-        out << "::arb::ion_state_view " << ion_state_field(dep.name) << ";\n";
-        out << "::arb::fvm_index_type* " << ion_state_index(dep.name) << ";\n";
+    if (net_receive_api) {
+        auto api_name = "apply_events";
+        out << fmt::format(FMT_COMPILE("void {}_{}_(arb_mechanism_ppack* p, arb_deliverable_event_stream* stream_ptr) {{"), class_name, api_name);
+        if(!net_receive_api->body()->statements().empty()) {
+            out << fmt::format(FMT_COMPILE("\n"
+                                           "    auto n = stream_ptr->n_streams;\n"
+                                           "    unsigned block_dim = 128;\n"
+                                           "    unsigned grid_dim = ::arb::gpu::impl::block_count(n, block_dim);\n"
+                                           "    {}<<<grid_dim, block_dim>>>(*p, *stream_ptr);\n"),
+                               api_name);
+        }
+        out << "}\n\n";
+    } else {
+        auto api_name = "apply_events";
+        out << fmt::format(FMT_COMPILE("void {}_{}_(arb_mechanism_ppack* p, arb_deliverable_event_stream* events) {{}}\n\n"), class_name, api_name);
     }
-
-    out << popindent << "};\n\n";
+    out << namespace_declaration_close(ns_components);
+    return out.str();
 }
 
 static std::string index_i_name(const std::string& index_var) {
     return index_var+"i_";
 }
 
-void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool cv_loop) {
+void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool cv_loop, bool ppack) {
     auto body = e->body();
     auto indexed_vars = indexed_locals(e->scope());
 
@@ -456,12 +381,12 @@ void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool
                 out << "unsigned lane_mask_ = arb::gpu::ballot(0xffffffff, tid_<n_);\n";
             }
         }
-
+        ppack && out << "PPACK_IFACE_BLOCK;\n";
         cv_loop && out << "if (tid_<n_) {\n" << indent;
 
         for (auto& index: indices) {
             out << "auto " << index_i_name(index.source_var)
-                << " = params_." << index.source_var << "[" << index.index_name << "];\n";
+                << " = " << pp_var_pfx << index.source_var << "[" << index.index_name << "];\n";
         }
 
         for (auto& sym: indexed_vars) {
@@ -490,14 +415,14 @@ namespace {
         deref(indexed_variable_info v): v(v) {}
         friend std::ostream& operator<<(std::ostream& o, const deref& wrap) {
             auto index_var = wrap.v.cell_index_var.empty() ? wrap.v.node_index_var : wrap.v.cell_index_var;
-            return o << "params_." << wrap.v.data_var << '['
+            return o << pp_var_pfx << wrap.v.data_var << '['
                      << (wrap.v.scalar()? "0": index_i_name(index_var)) << ']';
         }
     };
 }
 
 void emit_state_read_cu(std::ostream& out, LocalVariable* local) {
-    out << "::arb::fvm_value_type " << cuprint(local) << " = ";
+    out << "arb_value_type " << cuprint(local) << " = ";
 
     if (local->is_read()) {
         auto d = decode_indexed_variable(local->external_variable());
@@ -513,8 +438,7 @@ void emit_state_read_cu(std::ostream& out, LocalVariable* local) {
 
 
 void emit_state_update_cu(std::ostream& out, Symbol* from,
-                          IndexedVariable* external, bool is_point_proc)
-{
+                          IndexedVariable* external, bool is_point_proc) {
     if (!external->is_write()) return;
 
     auto d = decode_indexed_variable(external);
@@ -528,16 +452,16 @@ void emit_state_update_cu(std::ostream& out, Symbol* from,
         out << "::arb::gpu::reduce_by_key(";
         if (coeff != 1) out << as_c_double(coeff) << '*';
 
-        out << "params_.weight_[tid_]*" << from->name() << ',';
+        out << pp_var_pfx << "weight[tid_]*" << from->name() << ',';
 
         auto index_var = d.cell_index_var.empty() ? d.node_index_var : d.cell_index_var;
-        out << "params_." << d.data_var << ", " << index_i_name(index_var) << ", lane_mask_);\n";
+        out << pp_var_pfx << d.data_var << ", " << index_i_name(index_var) << ", lane_mask_);\n";
     }
     else if (d.accumulate) {
         out << deref(d) << " = fma(";
         if (coeff != 1) out << as_c_double(coeff) << '*';
 
-        out << "params_.weight_[tid_], " << from->name() << ", " << deref(d) << ");\n";
+        out << pp_var_pfx << "weight[tid_], " << from->name() << ", " << deref(d) << ");\n";
     }
     else {
         out << deref(d) << " = ";
@@ -550,7 +474,7 @@ void emit_state_update_cu(std::ostream& out, Symbol* from,
 // CUDA Printer visitors
 
 void GpuPrinter::visit(VariableExpression *sym) {
-    out_ << "params_." << sym->name() << (sym->is_range()? "[tid_]": "");
+    out_ << pp_var_pfx << sym->name() << (sym->is_range()? "[tid_]": "");
 }
 
 void GpuPrinter::visit(CallExpression* e) {
diff --git a/modcc/printer/infoprinter.cpp b/modcc/printer/infoprinter.cpp
index 7b4ea892..ce84fa01 100644
--- a/modcc/printer/infoprinter.cpp
+++ b/modcc/printer/infoprinter.cpp
@@ -1,6 +1,12 @@
 #include <ostream>
 #include <set>
 #include <string>
+#include <regex>
+
+#define FMT_HEADER_ONLY YES
+#include <fmt/core.h>
+#include <fmt/format.h>
+#include <fmt/compile.h>
 
 #include "blocks.hpp"
 #include "infoprinter.hpp"
@@ -12,134 +18,149 @@
 
 using io::quote;
 
-struct id_field_info {
-    id_field_info(const Id& id, const char* kind):
-        id(id),
-        kind(kind) {}
-
-    const Id& id;
-    const char* kind;
-};
-
-std::ostream& operator<<(std::ostream& out, const id_field_info& wrap) {
-    const Id& id = wrap.id;
-
-    out << "{" << quote(id.name()) << ", "
-        << "{spec::" << wrap.kind << ", " << quote(id.unit_string()) << ", "
-        << (id.has_value() ? id.value : "0");
-
-    if (id.has_range()) {
-        out << ", " << id.range.first << "," << id.range.second;
-    }
-
-    out << "}}";
-    return out;
-}
-
-struct ion_dep_info {
-    ion_dep_info(const IonDep& ion):
-        ion(ion) {}
-
-    const IonDep& ion;
-};
-
-std::ostream& operator<<(std::ostream& out, const ion_dep_info& wrap) {
-    const char* boolalpha[2] = {"false", "true"};
-    const IonDep& ion = wrap.ion;
-
-    return out << "{\"" << ion.name << "\", {"
-               << boolalpha[ion.writes_concentration_int()] << ", "
-               << boolalpha[ion.writes_concentration_ext()] << ", "
-               << boolalpha[ion.uses_rev_potential()] << ", "
-               << boolalpha[ion.writes_rev_potential()] << ", "
-               << boolalpha[ion.uses_valence()] << ", "
-               << boolalpha[ion.verifies_valence()] << ", "
-               << ion.expected_valence << "}}";
-}
-
-std::string build_info_header(const Module& m, const printer_options& opt) {
+std::string build_info_header(const Module& m, const printer_options& opt, bool cpu, bool gpu) {
     using io::indent;
     using io::popindent;
 
     std::string name = m.module_name();
-    auto ids = public_variable_ids(m);
-    auto ns_components = namespace_components(opt.cpp_namespace);
-
-    bool any_fields =
-        !ids.global_parameter_ids.empty() ||
-        !ids.range_parameter_ids.empty() ||
-        !ids.state_ids.empty();
 
     io::pfxstringstream out;
 
-    out << "#pragma once\n"
-           "#include <memory>\n"
-           "\n"
-           "#include <"
-        << arb_header_prefix() << "mechanism.hpp>\n"
-                                  "#include <"
-        << arb_header_prefix() << "mechinfo.hpp>\n"
-                                  "\n"
-        << namespace_declaration_open(ns_components) << "\n"
-                                                        "template <typename Backend>\n"
-                                                        "::arb::concrete_mech_ptr<Backend> make_mechanism_"
-        << name << "();\n"
-                   "\n"
-                   "inline const ::arb::mechanism_info& mechanism_"
-        << name << "_info() {\n"
-        << indent;
-
-    any_fields&& out << "using spec = ::arb::mechanism_field_spec;\n";
-
-    out << "static ::arb::mechanism_info info = {\n"
-        << indent << "// globals\n"
-                     "{\n"
-        << indent;
-
-    io::separator sep(",\n");
-    for (const auto& id: ids.global_parameter_ids) {
-        out << sep << id_field_info(id, "global");
-    }
-
-    out << popindent << "\n},\n// parameters\n{\n"
-        << indent;
+    std::string fingerprint = "<placeholder>";
 
-    sep.reset();
-    for (const auto& id: ids.range_parameter_ids) {
-        out << sep << id_field_info(id, "parameter");
+    out << fmt::format("#pragma once\n\n"
+                       "#include <cmath>\n"
+                       "#include <{}mechanism_abi.h>\n\n",
+                       arb_header_prefix());
+
+    auto vars = local_module_variables(m);
+    auto ion_deps = m.ion_deps();
+
+
+    std::unordered_map<std::string, Id> name2id;
+    for (const auto& id: m.parameter_block().parameters) name2id[id.name()] = id;
+    for (const auto& id: m.state_block().state_variables) name2id[id.name()] = id;
+
+    auto fmt_var = [&](const auto& v) {
+        auto kv = name2id.find(v->name());
+        auto lo = std::numeric_limits<double>::lowest();
+        auto hi = std::numeric_limits<double>::max();
+        std::string unit = "";
+        if (kv != name2id.end()) {
+            auto id = kv->second;
+            unit = id.unit_string();
+            if (id.has_range()) {
+                auto lo = id.range.first;
+                auto hi = id.range.second;
+            }
+        }
+        return fmt::format("{{ \"{}\", \"{}\", {}, {}, {} }}",
+                           v->name(),
+                           unit,
+                           std::isnan(v->value()) ? "NAN" : std::to_string(v->value()),
+                           lo, hi);
+    };
+
+    auto fmt_ion = [](const auto& i) {
+        return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {} }}"),
+                           i.name,
+                           i.writes_concentration_int(),
+                           i.writes_concentration_ext(),
+                           i.writes_rev_potential(),
+                           i.uses_rev_potential(),
+                           i.uses_valence(),
+                           i.verifies_valence(),
+                           i.expected_valence);
+    };
+
+
+    out << fmt::format("extern \"C\" {{\n"
+                       "  arb_mechanism_type make_{0}_{1}() {{\n",
+                       std::regex_replace(opt.cpp_namespace, std::regex{"::"}, "_"),
+                       name);
+
+    out << "    // Tables\n";
+    {
+        auto n = 0ul;
+        io::separator sep("", ",\n                                        ");
+        out << "    static arb_field_info globals[] = { ";
+        for (const auto& var: vars.scalars) {
+            out << sep << fmt_var(var);
+            ++n;
+        }
+        out << " };\n"
+            << "    static arb_size_type n_globals = " << n << ";\n";
     }
 
-    out << popindent << "\n},\n// state variables\n{\n"
-        << indent;
-
-    sep.reset();
-    for (const auto& id: ids.state_ids) {
-        out << sep << id_field_info(id, "state");
+    {
+        auto n = 0ul;
+        io::separator sep("", ",\n                                           ");
+        out << "    static arb_field_info state_vars[] = { ";
+        for (const auto& var: vars.arrays) {
+            if(var->is_state()) {
+                out << sep << fmt_var(var);
+                ++n;
+            }
+        }
+        out << " };\n"
+            << "    static arb_size_type n_state_vars = " << n << ";\n";
     }
-
-    out << popindent << "\n},\n// ion dependencies\n{\n"
-        << indent;
-
-    sep.reset();
-    for (const auto& ion: m.ion_deps()) {
-        out << sep << ion_dep_info(ion);
+    {
+        auto n = 0ul;
+        io::separator sep("", ",\n                                           ");
+        out << "    static arb_field_info parameters[] = { ";
+        for (const auto& var: vars.arrays) {
+            if(!var->is_state()) {
+                out << sep << fmt_var(var);
+                ++n;
+            }
+        }
+        out << " };\n"
+            << "    static arb_size_type n_parameters = " << n << ";\n";
     }
 
-    std::string fingerprint = "<placeholder>";
-    out << popindent << "\n"
-                        "},\n"
-                        "// fingerprint\n"
-        << quote(fingerprint) << ",\n"
-                                 "// linear, homogeneous mechanism\n"
-        << m.is_linear() << ",\n"
-                             "// post_events enabled mechanism\n"
-        << m.has_post_events() << "\n"
-        << popindent << "};\n"
-                        "\n"
-                        "return info;\n"
-        << popindent << "}\n"
-                        "\n"
-        << namespace_declaration_close(ns_components);
+    {
+        io::separator sep("", ",\n");
+        out << "    static arb_ion_info ions[] = { ";
+        auto n = 0ul;
+        for (const auto& var: ion_deps) {
+            out << sep << fmt_ion(var);
+            ++n;
+        }
+        out << " };\n"
+            << "    static arb_size_type n_ions = " << n << ";\n";
+    }
 
+    out << fmt::format(FMT_COMPILE("\n"
+                                   "    arb_mechanism_type result;\n"
+                                   "    result.abi_version=ARB_MECH_ABI_VERSION;\n"
+                                   "    result.fingerprint=\"{1}\";\n"
+                                   "    result.name=\"{0}\";\n"
+                                   "    result.kind={2};\n"
+                                   "    result.is_linear={3};\n"
+                                   "    result.has_post_events={4};\n"
+                                   "    result.globals=globals;\n"
+                                   "    result.n_globals=n_globals;\n"
+                                   "    result.ions=ions;\n"
+                                   "    result.n_ions=n_ions;\n"
+                                   "    result.state_vars=state_vars;\n"
+                                   "    result.n_state_vars=n_state_vars;\n"
+                                   "    result.parameters=parameters;\n"
+                                   "    result.n_parameters=n_parameters;\n"
+                                   "    return result;\n"
+                                   "  }}\n"
+                                   "\n"),
+                       name,
+                       fingerprint,
+                       module_kind_str(m),
+                       m.is_linear(),
+                       m.has_post_events())
+        << fmt::format("  arb_mechanism_interface* make_{0}_{1}_interface_multicore(){2}\n"
+                       "  arb_mechanism_interface* make_{0}_{1}_interface_gpu(){3}\n"
+                       "}}\n",
+                       std::regex_replace(opt.cpp_namespace, std::regex{"::"}, "_"),
+                       name,
+                       cpu ? ";" : " { return nullptr; }",
+                       gpu ? ";" : " { return nullptr; }");
     return out.str();
 }
diff --git a/modcc/printer/infoprinter.hpp b/modcc/printer/infoprinter.hpp
index 8f6e4e48..ae43c899 100644
--- a/modcc/printer/infoprinter.hpp
+++ b/modcc/printer/infoprinter.hpp
@@ -8,5 +8,4 @@
 // Build header file comprising mechanism metadata
 // and declarations of backend-specific mechanism implementations.
 
-std::string build_info_header(const Module& m, const printer_options& opt);
-
+std::string build_info_header(const Module& m, const printer_options& opt, bool cpu=false, bool gpu=false);
diff --git a/modcc/printer/printerutil.cpp b/modcc/printer/printerutil.cpp
index 5309e528..d36e9e6d 100644
--- a/modcc/printer/printerutil.cpp
+++ b/modcc/printer/printerutil.cpp
@@ -121,49 +121,49 @@ PostEventExpression* find_post_event(const Module& m) {
 
 indexed_variable_info decode_indexed_variable(IndexedVariable* sym) {
     indexed_variable_info v;
-    v.node_index_var = "node_index_";
+    v.node_index_var = "node_index";
     v.scale = 1;
     v.accumulate = true;
     v.readonly = true;
 
     std::string ion_pfx;
     if (sym->is_ion()) {
-        ion_pfx = "ion_"+sym->ion_channel()+"_";
-        v.node_index_var = ion_pfx+"index_";
+        ion_pfx = "ion_"+sym->ion_channel();
+        v.node_index_var = ion_pfx+"_index";
     }
 
     switch (sym->data_source()) {
     case sourceKind::voltage:
-        v.data_var="vec_v_";
+        v.data_var="vec_v";
         v.readonly = true;
         break;
     case sourceKind::current_density:
-        v.data_var = "vec_i_";
+        v.data_var = "vec_i";
         v.readonly = false;
         v.scale = 0.1;
         break;
     case sourceKind::current:
         // unit scale; sourceKind for point processes updating current variable.
-        v.data_var = "vec_i_";
+        v.data_var = "vec_i";
         v.readonly = false;
         break;
     case sourceKind::conductivity:
-        v.data_var = "vec_g_";
+        v.data_var = "vec_g";
         v.readonly = false;
         v.scale = 0.1;
         break;
     case sourceKind::conductance:
         // unit scale; sourceKind for point processes updating conductivity.
-        v.data_var = "vec_g_";
+        v.data_var = "vec_g";
         v.readonly = false;
         break;
     case sourceKind::dt:
-        v.data_var = "vec_dt_";
+        v.data_var = "vec_dt";
         v.readonly = true;
         break;
     case sourceKind::time:
-        v.data_var = "vec_t_";
-        v.cell_index_var = "vec_di_";
+        v.data_var = "vec_t";
+        v.cell_index_var = "vec_di";
         v.readonly = true;
         break;
     case sourceKind::ion_current_density:
@@ -195,11 +195,11 @@ indexed_variable_info decode_indexed_variable(IndexedVariable* sym) {
         v.readonly = true;
         break;
     case sourceKind::temperature:
-        v.data_var = "temperature_degC_";
+        v.data_var = "temperature_degC";
         v.readonly = true;
         break;
     case sourceKind::diameter:
-        v.data_var = "diam_um_";
+        v.data_var = "diam_um";
         v.readonly = true;
         break;
     default:
diff --git a/modcc/printer/printerutil.hpp b/modcc/printer/printerutil.hpp
index 902290a1..053c7fbc 100644
--- a/modcc/printer/printerutil.hpp
+++ b/modcc/printer/printerutil.hpp
@@ -6,6 +6,7 @@
 #include <string>
 #include <vector>
 
+#include "io/ostream_wrappers.hpp"
 #include "blocks.hpp"
 #include "error.hpp"
 #include "expression.hpp"
@@ -13,6 +14,9 @@
 
 std::vector<std::string> namespace_components(const std::string& qualified_namespace);
 
+// Can use this in a namespace. No __ allowed anywhere, neither _[A-Z], and in _global namespace_ _ followed by anything is verboten.
+const static std::string pp_var_pfx = "_pp_var_";
+
 inline const char* arb_header_prefix() {
     static const char* prefix = "arbor/";
     return prefix;
@@ -53,9 +57,12 @@ struct namespace_declaration_close {
 // Enum representation:
 
 inline const char* module_kind_str(const Module& m) {
-    return m.kind()==moduleKind::density?
-        "::arb::mechanismKind::density":
-        "::arb::mechanismKind::point";
+    switch (m.kind()) {
+    case moduleKind::density: return "arb_mechanism_kind_density";            break;
+    case moduleKind::point:   return "arb_mechanism_kind_point";              break;
+    case moduleKind::revpot:  return "arb_mechanism_kind_reversal_potential"; break;
+    default: throw compiler_exception("Unknown module kind " + std::to_string((int)m.kind()));
+    }
 }
 
 // Check expression non-null and scoped, or else throw.
@@ -131,3 +138,16 @@ struct indexed_variable_info {
 };
 
 indexed_variable_info decode_indexed_variable(IndexedVariable* sym);
+
+template<typename C>
+size_t emit_array(std::ostream& out, const C& vars) {
+    auto n = 0ul;
+    io::separator sep("", ", ");
+    out << "{ ";
+    for (const auto& var: vars) {
+        out << sep << var;
+        ++n;
+    }
+    out << " }";
+    return n;
+}
diff --git a/python/example/single_cell_model.py b/python/example/single_cell_model.py
index 51431cc6..e8e461c4 100755
--- a/python/example/single_cell_model.py
+++ b/python/example/single_cell_model.py
@@ -4,6 +4,8 @@
 import arbor
 import pandas, seaborn # You may have to pip install these.
 
+print(arbor.__config__)
+
 # (1) Create a morphology with a single (cylindrical) segment of length=diameter=6 Î¼m
 tree = arbor.segment_tree()
 tree.append(arbor.mnpos, arbor.mpoint(-3, 0, 0, 3), arbor.mpoint(3, 0, 0, 3), tag=1)
diff --git a/scripts/build-catalogue b/scripts/build-catalogue.in
similarity index 87%
rename from scripts/build-catalogue
rename to scripts/build-catalogue.in
index 6351b800..70c7fdfb 100755
--- a/scripts/build-catalogue
+++ b/scripts/build-catalogue.in
@@ -9,7 +9,7 @@ import shutil
 import stat
 import string
 import argparse
-
+import re
 def parse_arguments():
     def append_slash(s):
         return s+'/' if s and not s.endswith('/') else s
@@ -39,12 +39,6 @@ def parse_arguments():
                         type=str,
                         help='Catalogue name.')
 
-    parser.add_argument('-s', '--source',
-                        metavar='source',
-                        type=str,
-                        default=Path(__file__).parents[1].resolve(),
-                        help='Path to arbor sources; defaults to parent of this script\'s directory.')
-
     parser.add_argument('modpfx',
                         metavar='modpfx',
                         type=str,
@@ -66,12 +60,14 @@ def parse_arguments():
 
 args    = parse_arguments()
 pwd     = Path.cwd()
-name    = args['name']
+name    = re.sub(r'_+', r'_',
+                 re.sub(r'[^a-zA-Z0-9_]', r'_',
+                        args['name']))
+
 mod_dir = pwd / Path(args['modpfx'])
 mods    = [ f[:-4] for f in os.listdir(mod_dir) if f.endswith('.mod') ]
 verbose = args['verbose'] and not args['quiet']
 quiet   = args['quiet']
-arb     = args['source']
 
 cmake = f"""
 cmake_minimum_required(VERSION 3.9)
@@ -92,8 +88,8 @@ make_catalogue(
   SOURCES "${{CMAKE_CURRENT_SOURCE_DIR}}/mod"
   OUTPUT "CAT_{name.upper()}_SOURCES"
   MECHS {' '.join(mods)}
+  PREFIX @ARB_INSTALL_DATADIR@
   CXX_FLAGS_TARGET ${{ARB_CXX_FLAGS_TARGET}}
-  ARBOR {arb}
   STANDALONE ON
   VERBOSE {"ON" if verbose else "OFF"})
 """
@@ -110,8 +106,8 @@ with TemporaryDirectory() as tmp:
     os.chdir(tmp / 'build')
     with open(tmp / 'CMakeLists.txt', 'w') as fd:
         fd.write(cmake)
-    shutil.copy2(f'{arb}/mechanisms/BuildModules.cmake', tmp)
-    shutil.copy2(f'{arb}/mechanisms/generate_catalogue', tmp)
+    shutil.copy2(f'@ARB_INSTALL_DATADIR@/BuildModules.cmake', tmp)
+    shutil.copy2(f'@ARB_INSTALL_DATADIR@/generate_catalogue', tmp)
     sp.run('cmake ..', shell=True, check=True, capture_output=not verbose)
     sp.run('make',     shell=True, check=True, capture_output=not verbose)
     shutil.copy2(f'{name}-catalogue.so', pwd)
diff --git a/test/unit-modcc/test_printers.cpp b/test/unit-modcc/test_printers.cpp
index b9643896..a188edc0 100644
--- a/test/unit-modcc/test_printers.cpp
+++ b/test/unit-modcc/test_printers.cpp
@@ -124,11 +124,11 @@ TEST(CPrinter, proc_body) {
             "    htau = 1500\n"
             "}"
             ,
-            "::arb::fvm_value_type k;\n"
-            "pp->minf[i_] = 1.0-1.0/(1.0+exp((v-k)/k));\n"
-            "pp->hinf[i_] = 1.0/(1.0+exp((v-k)/k));\n"
-            "pp->mtau[i_] = 0.5;\n"
-            "pp->htau[i_] = 1500.0;\n"
+            "arb_value_type k;\n"
+            "_pp_var_minf[i_] = 1.0-1.0/(1.0+exp((v-k)/k));\n"
+            "_pp_var_hinf[i_] = 1.0/(1.0+exp((v-k)/k));\n"
+            "_pp_var_mtau[i_] = 0.5;\n"
+            "_pp_var_htau[i_] = 1500.0;\n"
         }
     };
 
@@ -167,7 +167,7 @@ TEST(CPrinter, proc_body_const) {
                     "    mtau = 0.5 - t0 + t1\n"
                     "}"
                     ,
-                    "pp->mtau[i_] = 0.5 - -0.5 + 1.2;\n"
+                    "_pp_var_mtau[i_] = 0.5 - -0.5 + 1.2;\n"
             }
     };
 
@@ -204,27 +204,27 @@ TEST(CPrinter, proc_body_inlined) {
         "r_6_ = 0.;\n"
         "r_7_ = 0.;\n"
         "r_8_ = 0.;\n"
-        "r_9_=pp->s2[i_]*0.33333333333333331;\n"
-        "r_8_=pp->s1[i_]+2.0;\n"
-        "if(pp->s1[i_]==3.0){\n"
+        "r_9_=_pp_var_s2[i_]*0.33333333333333331;\n"
+        "r_8_=_pp_var_s1[i_]+2.0;\n"
+        "if(_pp_var_s1[i_]==3.0){\n"
         "   r_7_=2.0*r_8_;\n"
         "}\n"
         "else{\n"
-        "   if(pp->s1[i_]==4.0){\n"
+        "   if(_pp_var_s1[i_]==4.0){\n"
         "       r_11_ = 0.;\n"
         "       r_12_ = 0.;\n"
-        "       r_12_=6.0+pp->s1[i_];\n"
+        "       r_12_=6.0+_pp_var_s1[i_];\n"
         "       r_11_=r_12_;\n"
         "       r_7_=r_8_*r_11_;\n"
         "   }\n"
         "   else{\n"
         "       r_10_=exp(r_8_);\n"
-        "       r_7_=r_10_*pp->s1[i_];\n"
+        "       r_7_=r_10_*_pp_var_s1[i_];\n"
         "   }\n"
         "}\n"
         "r_13_=0.;\n"
         "r_14_=0.;\n"
-        "r_14_=r_9_/pp->s2[i_];\n"
+        "r_14_=r_9_/_pp_var_s2[i_];\n"
         "r_15_=log(r_14_);\n"
         "r_13_=42.0*r_15_;\n"
         "r_6_=r_9_*r_13_;\n"
@@ -247,7 +247,7 @@ TEST(CPrinter, proc_body_inlined) {
         "       t2=r_16_*ll0_;\n"
         "   }\n"
         "}\n"
-        "pp->s2[i_]=t2+4.0;\n";
+        "_pp_var_s2[i_]=t2+4.0;\n";
 
     Module m(io::read_all(DATADIR "/mod_files/test6.mod"), "test6.mod");
     Parser p(m, false);
@@ -280,22 +280,22 @@ TEST(SimdPrinter, simd_if_else) {
             "simd_mask mask_0_ = S::cmp_gt(i, (double)2.0);\n"
             "S::where(mask_0_,u) = (double)7.0;\n"
             "S::where(S::logical_not(mask_0_),u) = (double)5.0;\n"
-            "indirect(pp->s+i_, simd_width_) = S::where(S::logical_not(mask_0_),simd_cast<simd_value>((double)42.0));\n"
-            "indirect(pp->s+i_, simd_width_) = u;"
+            "indirect(_pp_var_s+i_, simd_width_) = S::where(S::logical_not(mask_0_),simd_cast<simd_value>((double)42.0));\n"
+            "indirect(_pp_var_s+i_, simd_width_) = u;"
             ,
             "simd_value u;\n"
             "simd_mask mask_1_ = S::cmp_gt(i, (double)2.0);\n"
             "S::where(mask_1_,u) = (double)7.0;\n"
             "S::where(S::logical_not(mask_1_),u) = (double)5.0;\n"
-            "indirect(pp->s+i_, simd_width_) = S::where(S::logical_and(S::logical_not(mask_1_), mask_input_),simd_cast<simd_value>((double)42.0));\n"
-            "indirect(pp->s+i_, simd_width_) = S::where(mask_input_, u);"
+            "indirect(_pp_var_s+i_, simd_width_) = S::where(S::logical_and(S::logical_not(mask_1_), mask_input_),simd_cast<simd_value>((double)42.0));\n"
+            "indirect(_pp_var_s+i_, simd_width_) = S::where(mask_input_, u);"
             ,
-            "simd_mask mask_2_ = S::cmp_gt(simd_cast<simd_value>(indirect(pp->g+i_, simd_width_)), (double)2.0);\n"
-            "simd_mask mask_3_ = S::cmp_gt(simd_cast<simd_value>(indirect(pp->g+i_, simd_width_)), (double)3.0);\n"
+            "simd_mask mask_2_ = S::cmp_gt(simd_cast<simd_value>(indirect(_pp_var_g+i_, simd_width_)), (double)2.0);\n"
+            "simd_mask mask_3_ = S::cmp_gt(simd_cast<simd_value>(indirect(_pp_var_g+i_, simd_width_)), (double)3.0);\n"
             "S::where(S::logical_and(mask_2_,mask_3_),i) = (double)0.;\n"
             "S::where(S::logical_and(mask_2_,S::logical_not(mask_3_)),i) = (double)1.0;\n"
-            "simd_mask mask_4_ = S::cmp_lt(simd_cast<simd_value>(indirect(pp->g+i_, simd_width_)), (double)1.0);\n"
-            "indirect(pp->s+i_, simd_width_) = S::where(S::logical_and(S::logical_not(mask_2_),mask_4_),simd_cast<simd_value>((double)2.0));\n"
+            "simd_mask mask_4_ = S::cmp_lt(simd_cast<simd_value>(indirect(_pp_var_g+i_, simd_width_)), (double)1.0);\n"
+            "indirect(_pp_var_s+i_, simd_width_) = S::where(S::logical_and(S::logical_not(mask_2_),mask_4_),simd_cast<simd_value>((double)2.0));\n"
             "rates(i_, S::logical_and(S::logical_not(mask_2_),S::logical_not(mask_4_)), i);"
     };
 
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index b2a363df..c52c27e6 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -85,6 +85,7 @@ endforeach()
 
 set(unit_sources
     ../common_cells.cpp
+    test_abi.cpp
     test_asc.cpp
     test_any_cast.cpp
     test_any_ptr.cpp
@@ -215,11 +216,23 @@ if(${CMAKE_POSITION_INDEPENDENT_CODE})
     SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/dummy"
     OUTPUT "CAT_DUMMY_SOURCES"
     MECHS dummy
+    PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"
     CXX_FLAGS_TARGET ${ARB_CXX_FLAGS_TARGET_FULL}
-    ARBOR "${PROJECT_SOURCE_DIR}"
     STANDALONE ON
-    VERBOSE OFF)
+    VERBOSE ON)
   target_compile_definitions(unit PRIVATE USE_DYNAMIC_CATALOGUES)
+  if(ARB_WITH_NVCC)
+    target_compile_options(dummy-catalogue PRIVATE -DARB_CUDA)
+  endif()
+  if(ARB_WITH_CUDA_CLANG)
+    set(clang_options_ -DARB_CUDA -xcuda --cuda-gpu-arch=sm_60 --cuda-path=${CUDA_TOOLKIT_ROOT_DIR})
+    target_compile_options(unit PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${clang_options_}>)
+  endif()
+
+  if(ARB_WITH_HIP_CLANG)
+    set(clang_options_ -DARB_HIP -xhip --amdgpu-target=gfx906 --amdgpu-target=gfx900)
+    target_compile_options(unit PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${clang_options_}>)
+  endif()
   add_dependencies(unit dummy-catalogue)
 endif()
 
diff --git a/test/unit/gpu_vector.hpp b/test/unit/gpu_vector.hpp
index 02f08181..afc7d65b 100644
--- a/test/unit/gpu_vector.hpp
+++ b/test/unit/gpu_vector.hpp
@@ -2,7 +2,7 @@
 
 #include <vector>
 
-#include "backends/gpu/gpu_api.hpp"
+#include <arbor/gpu/gpu_api.hpp>
 
 /*
  * Helpers for using GPU memory in unit tests.
diff --git a/test/unit/mech_private_field_access.cpp b/test/unit/mech_private_field_access.cpp
index a0af0953..a7f46e3c 100644
--- a/test/unit/mech_private_field_access.cpp
+++ b/test/unit/mech_private_field_access.cpp
@@ -1,12 +1,11 @@
+#include <cstddef>
+
 #include <arbor/version.hpp>
+#include <arbor/mechanism.hpp>
 
 #include "backends/multicore/fvm.hpp"
-#include "backends/multicore/mechanism.hpp"
-#include "util/maputil.hpp"
-
 #ifdef ARB_GPU_ENABLED
 #include "backends/gpu/fvm.hpp"
-#include "backends/gpu/mechanism.hpp"
 #include "memory/gpu_wrappers.hpp"
 #endif
 
@@ -14,50 +13,147 @@
 #include "mech_private_field_access.hpp"
 
 using namespace arb;
-using field_table_type = std::vector<std::pair<const char*, fvm_value_type**>>;
+
+namespace {
+arb_value_type** field_lookup(const mechanism* m, const std::string& key) {
+    for (arb_size_type i = 0; i<m->mech_.n_parameters; ++i) {
+        if (key==m->mech_.parameters[i].name) return m->ppack_.parameters+i;
+    }
+    for (arb_size_type i = 0; i<m->mech_.n_state_vars; ++i) {
+        if (key==m->mech_.state_vars[i].name) return m->ppack_.state_vars+i;
+    }
+    throw std::logic_error("internal error: no such field in mechanism");
+}
+
+arb_value_type* global_lookup(const mechanism* m, const std::string& key) {
+    for (arb_size_type i = 0; i<m->mech_.n_globals; ++i) {
+        if (key==m->mech_.globals[i].name) return m->ppack_.globals+i;
+    }
+    throw std::logic_error("internal error: no such field in mechanism");
+}
+
+arb_ion_state* ion_lookup(const mechanism* m, const std::string& ion) {
+    for (arb_size_type i = 0; i<m->mech_.n_ions; ++i) {
+        if (ion==m->mech_.ions[i].name) return m->ppack_.ion_states+i;
+    }
+    throw std::logic_error("internal error: no such field in mechanism");
+}
 
 // Multicore mechanisms:
+std::vector<arb_value_type> mc_mechanism_field(const mechanism* m, const std::string& key) {
+    auto p = *field_lookup(m, key);
+    return std::vector<arb_value_type>(p, p+m->ppack_.width);
+}
 
-ACCESS_BIND(field_table_type (concrete_mechanism<multicore::backend>::*)(), multicore_field_table_ptr, &concrete_mechanism<multicore::backend>::field_table)
+void mc_write_mechanism_field(const arb::mechanism* m, const std::string& key, const std::vector<arb::arb_value_type>& values) {
+    auto p = *field_lookup(m, key);
+    std::size_t n = std::min(values.size(), std::size_t(m->ppack_.width));
+    std::copy_n(values.data(), n, p);
+}
 
-std::vector<fvm_value_type> mechanism_field(multicore::mechanism* m, const std::string& key) {
-    auto opt_ptr = util::value_by_key((m->*multicore_field_table_ptr)(), key);
-    if (!opt_ptr) throw std::logic_error("internal error: no such field in mechanism");
+std::vector<arb_index_type> mc_mechanism_ion_index(const mechanism* m, const std::string& ion) {
+    auto istate = *ion_lookup(m, ion);
+    return std::vector<arb_index_type>(istate.index, istate.index+m->ppack_.width);
+}
 
-    const fvm_value_type* field_data = *opt_ptr.value();
-    return std::vector<fvm_value_type>(field_data, field_data+m->size());
+arb_value_type mc_mechanism_global(const mechanism* m, const std::string& key) {
+    return *global_lookup(m, key);
 }
 
 // GPU mechanisms:
-
 #ifdef ARB_GPU_ENABLED
-ACCESS_BIND(field_table_type (concrete_mechanism<gpu::backend>::*)(), gpu_field_table_ptr, &concrete_mechanism<gpu::backend>::field_table)
+std::vector<arb_value_type> gpu_mechanism_field(const mechanism* m, const std::string& key) {
+    auto p_ptr = field_lookup(m, key);
+    arb_value_type* p;
+    memory::gpu_memcpy_d2h(&p, p_ptr, sizeof(p));
+
+    std::size_t n = m->ppack_.width;
+    std::vector<arb_value_type> values(n);
+    memory::gpu_memcpy_d2h(values.data(), p, sizeof(arb_value_type)*n);
+    return values;
+}
 
-std::vector<fvm_value_type> mechanism_field(gpu::mechanism* m, const std::string& key) {
-    auto opt_ptr = util::value_by_key((m->*gpu_field_table_ptr)(), key);
-    if (!opt_ptr) throw std::logic_error("internal error: no such field in mechanism");
+void gpu_write_mechanism_field(const arb::mechanism* m, const std::string& key, const std::vector<arb::arb_value_type>& values) {
+    auto p_ptr = field_lookup(m, key);
+    arb_value_type* p;
+    memory::gpu_memcpy_d2h(&p, p_ptr, sizeof(p));
 
-    const fvm_value_type* field_data = *opt_ptr.value();
-    std::vector<fvm_value_type> values(m->size());
+    std::size_t n = std::min(values.size(), std::size_t(m->ppack_.width));
+    memory::gpu_memcpy_h2d(const_cast<arb_value_type*>(values.data()), p, sizeof(arb_value_type)*n);
+}
 
-    memory::gpu_memcpy_d2h(values.data(), field_data, sizeof(fvm_value_type)*m->size());
-    return values;
+std::vector<arb_index_type> gpu_mechanism_ion_index(const mechanism* m, const std::string& ion) {
+    auto istate_ptr = ion_lookup(m, ion);
+    arb_ion_state istate;
+    memory::gpu_memcpy_d2h(&istate, istate_ptr, sizeof(istate));
+    std::vector<arb_index_type> vec(m->ppack_.width);
+    memory::gpu_memcpy_d2h(vec.data(), istate.index, sizeof(arb_index_type)*m->ppack_.width);
+    return vec;
+}
+
+arb_value_type gpu_mechanism_global(const mechanism* m, const std::string& key) {
+    auto p = global_lookup(m, key);
+    arb_value_type v;
+    memory::gpu_memcpy_d2h(p, &v, sizeof(v));
+    return v;
 }
 #endif
+} // anonymous namespace
 
 // Generic access:
 
-std::vector<fvm_value_type> mechanism_field(mechanism* m, const std::string& key) {
-    if (auto p = dynamic_cast<multicore::mechanism*>(m)) {
-        return mechanism_field(p, key);
+std::vector<arb_value_type> mechanism_field(const mechanism* m, const std::string& key) {
+    if (m->iface_.backend == arb_backend_kind_cpu) {
+        return mc_mechanism_field(m, key);
+    }
+
+#ifdef ARB_GPU_ENABLED
+    if (m->iface_.backend == arb_backend_kind_gpu) {
+        return gpu_mechanism_field(m, key);
+    }
+#endif
+
+    throw std::logic_error("internal error: mechanism instantiated on unknown backend");
+}
+
+void write_mechanism_field(const arb::mechanism* m, const std::string& key, const std::vector<arb::arb_value_type>& values) {
+    if (m->iface_.backend == arb_backend_kind_cpu) {
+        return mc_write_mechanism_field(m, key, values);
+    }
+
+#ifdef ARB_GPU_ENABLED
+    if (m->iface_.backend == arb_backend_kind_gpu) {
+        return gpu_write_mechanism_field(m, key, values);
+    }
+#endif
+
+    throw std::logic_error("internal error: mechanism instantiated on unknown backend");
+}
+
+std::vector<arb_index_type> mechanism_ion_index(const mechanism* m, const std::string& ion) {
+    if (m->iface_.backend == arb_backend_kind_cpu) {
+        return mc_mechanism_ion_index(m, ion);
     }
 
 #ifdef ARB_GPU_ENABLED
-    if (auto p = dynamic_cast<gpu::mechanism*>(m)) {
-        return mechanism_field(p, key);
+    if (m->iface_.backend == arb_backend_kind_gpu) {
+        return gpu_mechanism_ion_index(m, ion);
     }
 #endif
 
     throw std::logic_error("internal error: mechanism instantiated on unknown backend");
 }
 
+arb_value_type mechanism_global(const mechanism* m, const std::string& key) {
+    if (m->iface_.backend == arb_backend_kind_cpu) {
+        return mc_mechanism_global(m, key);
+    }
+
+#ifdef ARB_GPU_ENABLED
+    if (m->iface_.backend == arb_backend_kind_gpu) {
+        return gpu_mechanism_global(m, key);
+    }
+#endif
+
+    throw std::logic_error("internal error: mechanism instantiated on unknown backend");
+}
diff --git a/test/unit/mech_private_field_access.hpp b/test/unit/mech_private_field_access.hpp
index 50c7b26b..af2878f8 100644
--- a/test/unit/mech_private_field_access.hpp
+++ b/test/unit/mech_private_field_access.hpp
@@ -4,13 +4,25 @@
 #include <string>
 #include <vector>
 
+#include <arbor/arb_types.hpp>
 #include <arbor/mechanism.hpp>
 
-// Get a copy of the data within a mechanisms's (private) named field.
+// Get a copy of the data within a mechanisms's named field.
 
-std::vector<arb::fvm_value_type> mechanism_field(arb::mechanism* m, const std::string& key);
+std::vector<arb::arb_value_type> mechanism_field(const arb::mechanism* m, const std::string& key);
+void write_mechanism_field(const arb::mechanism* m, const std::string& key, const std::vector<arb::arb_value_type>& values);
+std::vector<arb_index_type> mechanism_ion_index(const arb::mechanism* m, const std::string& ion);
+arb::arb_value_type mechanism_global(const arb::mechanism* m, const std::string& key);
 
-template <typename DerivedMechPtr>
-inline std::vector<arb::fvm_value_type> mechanism_field(const std::unique_ptr<DerivedMechPtr>& m, const std::string& key) {
-    return mechanism_field(static_cast<arb::mechanism*>(m.get()), key);
+inline std::vector<arb::arb_value_type> mechanism_field(const std::unique_ptr<arb::mechanism>& m, const std::string& key) {
+    return mechanism_field(m.get(), key);
+}
+inline void write_mechanism_field(const std::unique_ptr<arb::mechanism>& m, const std::string& key, const std::vector<arb::arb_value_type>& values) {
+    write_mechanism_field(m.get(), key, values);
+}
+inline std::vector<arb::arb_index_type> mechanism_ion_index(const std::unique_ptr<arb::mechanism>& m, const std::string& ion) {
+    return mechanism_ion_index(m.get(), ion);
+}
+inline arb::arb_value_type mechanism_global(const std::unique_ptr<arb::mechanism>& m, const std::string& key) {
+    return mechanism_global(m.get(), key);
 }
diff --git a/test/unit/test_abi.cpp b/test/unit/test_abi.cpp
new file mode 100644
index 00000000..5db2510b
--- /dev/null
+++ b/test/unit/test_abi.cpp
@@ -0,0 +1,190 @@
+#include <vector>
+#include <string>
+
+#include "../test/gtest.h"
+
+#include <arbor/mechanism_abi.h>
+#include <arbor/mechanism.hpp>
+#include <arbor/version.hpp>
+
+#include "backends/multicore/shared_state.hpp"
+#ifdef ARB_GPU_ENABLED
+#include "backends/gpu/shared_state.hpp"
+#include "memory/gpu_wrappers.hpp"
+#endif
+
+using namespace std::string_literals;
+
+TEST(abi, multicore_initialisation) {
+    std::vector<arb_field_info> globals = {{ "G0", "kg",  123.0,     0.0, 2000.0},
+                                           { "G1", "lb",  456.0,     0.0, 2000.0},
+                                           { "G2", "gr",  789.0,     0.0, 2000.0}};
+    std::vector<arb_field_info> states  = {{ "S0", "nA",      0.123, 0.0, 2000.0},
+                                           { "S1", "mV",      0.456, 0.0, 2000.0}};
+    std::vector<arb_field_info> params  = {{ "P0", "lm", -123.0,     0.0, 2000.0}};
+
+    arb_mechanism_type type{};
+    type.abi_version = ARB_MECH_ABI_VERSION;
+    type.globals    = globals.data(); type.n_globals    = globals.size();
+    type.parameters = params.data();  type.n_parameters = params.size();
+    type.state_vars = states.data();  type.n_state_vars = states.size();
+
+    arb_mechanism_interface iface { arb_backend_kind_cpu,
+                                    1,
+                                    1,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr };
+
+    auto mech = arb::mechanism(type, iface);
+
+    arb_size_type ncell = 1;
+    arb_size_type ncv = 1;
+    std::vector<arb_index_type> cv_to_intdom(ncv, 0);
+    std::vector<arb_value_type> temp(ncv, 23);
+    std::vector<arb_value_type> diam(ncv, 1.);
+    std::vector<arb_value_type> vinit(ncv, -65);
+    std::vector<arb::fvm_gap_junction> gj = {};
+    std::vector<arb_index_type> src_to_spike = {};
+
+    arb::multicore::shared_state shared_state(ncell, ncell, 0,
+                                              cv_to_intdom, cv_to_intdom,
+                                              gj, vinit, temp, diam, src_to_spike,
+                                              mech.data_alignment());
+
+    arb::mechanism_layout layout;
+    layout.weight.assign(ncv, 1.);
+    for (arb_size_type i = 0; i<ncv; ++i) layout.cv.push_back(i);
+
+    shared_state.instantiate(mech, 42, {}, layout);
+
+    {
+        ASSERT_EQ(globals.size(), mech.mech_.n_globals);
+        for (auto i = 0ul; i < globals.size(); ++i) {
+            EXPECT_EQ(globals[i].default_value, mech.ppack_.globals[i]);
+        }
+    }
+
+    {
+        ASSERT_EQ(states.size(), mech.mech_.n_state_vars);
+        for (auto i = 0ul; i < states.size(); ++i) {
+            const auto* var_data = mech.ppack_.state_vars[i];
+
+            std::vector<arb_value_type> expected(ncv, states[i].default_value);
+            std::vector<arb_value_type> values(var_data, var_data+ncv);
+
+            EXPECT_EQ(expected, values);
+        }
+    }
+
+    {
+        ASSERT_EQ(params.size(), mech.mech_.n_parameters);
+        for (auto i = 0ul; i < params.size(); ++i) {
+            const auto* param_data = mech.ppack_.parameters[i];
+
+            std::vector<arb_value_type> expected(ncv, params[i].default_value);
+            std::vector<arb_value_type> values(param_data, param_data+ncv);
+
+            EXPECT_EQ(expected, values);
+        }
+    }
+}
+
+#ifdef ARB_GPU_ENABLED
+
+namespace {
+template <typename T>
+T deref(const T* device_ptr) {
+    T r;
+    arb::memory::gpu_memcpy_d2h(&r, device_ptr, sizeof(T));
+    return r;
+}
+
+template <typename T>
+std::vector<T> vec_n(const T* device_ptr, std::size_t n) {
+    std::vector<T> r(n);
+    arb::memory::gpu_memcpy_d2h(r.data(), device_ptr, n*sizeof(T));
+    return r;
+}
+}
+
+TEST(abi, gpu_initialisation) {
+    std::vector<arb_field_info> globals = {{ "G0", "kg",  123.0,     0.0, 2000.0},
+                                           { "G1", "lb",  456.0,     0.0, 2000.0},
+                                           { "G2", "gr",  789.0,     0.0, 2000.0}};
+    std::vector<arb_field_info> states  = {{ "S0", "nA",      0.123, 0.0, 2000.0},
+                                           { "S1", "mV",      0.456, 0.0, 2000.0}};
+    std::vector<arb_field_info> params  = {{ "P0", "lm", -123.0,     0.0, 2000.0}};
+
+    arb_mechanism_type type{};
+    type.abi_version = ARB_MECH_ABI_VERSION;
+    type.globals    = globals.data(); type.n_globals    = globals.size();
+    type.parameters = params.data();  type.n_parameters = params.size();
+    type.state_vars = states.data();  type.n_state_vars = states.size();
+
+    arb_mechanism_interface iface { arb_backend_kind_gpu,
+                                    1,
+                                    1,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr,
+                                    nullptr };
+
+    auto mech = arb::mechanism(type, iface);
+
+    arb_size_type ncell = 1;
+    arb_size_type ncv = 1;
+    std::vector<arb_index_type> cv_to_intdom(ncv, 0);
+    std::vector<arb_value_type> temp(ncv, 23);
+    std::vector<arb_value_type> diam(ncv, 1.);
+    std::vector<arb_value_type> vinit(ncv, -65);
+    std::vector<arb::fvm_gap_junction> gj = {};
+    std::vector<arb_index_type> src_to_spike = {};
+
+    arb::gpu::shared_state shared_state(ncell, ncell, 0,
+                                        cv_to_intdom, cv_to_intdom,
+                                        gj, vinit, temp, diam, src_to_spike,
+                                        1);
+
+    arb::mechanism_layout layout;
+    layout.weight.assign(ncv, 1.);
+    for (arb_size_type i = 0; i<ncv; ++i) layout.cv.push_back(i);
+
+    shared_state.instantiate(mech, 42, {}, layout);
+
+    {
+        ASSERT_EQ(globals.size(), mech.mech_.n_globals);
+        for (auto i = 0ul; i < globals.size(); ++i) {
+            EXPECT_EQ(globals[i].default_value, deref(mech.ppack_.globals+i));
+        }
+    }
+
+    {
+        ASSERT_EQ(states.size(), mech.mech_.n_state_vars);
+        auto state_var_ptrs = vec_n(mech.ppack_.state_vars, states.size());
+
+        for (auto i = 0ul; i < states.size(); ++i) {
+            std::vector<arb_value_type> expected(ncv, states[i].default_value);
+            std::vector<arb_value_type> values = vec_n(state_var_ptrs[i], ncv);
+
+            EXPECT_EQ(expected, values);
+        }
+    }
+
+    {
+        ASSERT_EQ(params.size(), mech.mech_.n_parameters);
+        auto param_ptrs = vec_n(mech.ppack_.parameters, params.size());
+        for (auto i = 0ul; i < params.size(); ++i) {
+            std::vector<arb_value_type> expected(ncv, params[i].default_value);
+            std::vector<arb_value_type> values = vec_n(param_ptrs[i], ncv);
+
+            EXPECT_EQ(expected, values);
+        }
+    }
+}
+#endif
diff --git a/test/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp
index 1f58d34e..6d4e3a5a 100644
--- a/test/unit/test_fvm_layout.cpp
+++ b/test/unit/test_fvm_layout.cpp
@@ -157,7 +157,7 @@ TEST(fvm_layout, mech_index) {
     // HH on somas of two cells, with CVs 0 and 5.
     // Proportional area contrib: soma area/CV area.
 
-    EXPECT_EQ(mechanismKind::density, hh_config.kind);
+    EXPECT_EQ((unsigned)arb_mechanism_kind_density, hh_config.kind);
     EXPECT_EQ(ivec({0,6}), hh_config.cv);
 
     // Three expsyn synapses, two 0.4 along branch 1, and one 0.4 along branch 5.
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index e26e6d90..fcaa3dd9 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -17,22 +17,23 @@
 #include <arbor/sampling.hpp>
 #include <arbor/simulation.hpp>
 #include <arbor/schedule.hpp>
+#include <arbor/mechanism.hpp>
 #include <arbor/util/any_ptr.hpp>
 
 #include <arborenv/concurrency.hpp>
 
 #include "backends/multicore/fvm.hpp"
-#include "backends/multicore/mechanism.hpp"
 #include "execution_context.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "fvm_lowered_cell_impl.hpp"
-#include "mech_private_field_access.hpp"
 #include "util/meta.hpp"
 #include "util/maputil.hpp"
 #include "util/rangeutil.hpp"
+#include "util/span.hpp"
 #include "util/transform.hpp"
 
 #include "common.hpp"
+#include "mech_private_field_access.hpp"
 #include "unit_test_catalogue.hpp"
 #include "../common_cells.hpp"
 #include "../simple_recipes.hpp"
@@ -67,27 +68,6 @@ arb::mechanism* find_mechanism(fvm_cell& fvcell, int index) {
     return index<(int)mechs.size()? mechs[index].get(): nullptr;
 }
 
-// Access to mechanism-internal data:
-
-using mechanism_global_table = std::vector<std::pair<const char*, arb::fvm_value_type*>>;
-using mechanism_field_table = std::vector<std::pair<const char*, arb::fvm_value_type**>>;
-using mechanism_ion_index_table = std::vector<std::pair<const char*, arb::fvm_index_type**>>;
-
-ACCESS_BIND(\
-    mechanism_global_table (arb::concrete_mechanism<arb::multicore::backend>::*)(), \
-    private_global_table_ptr,\
-    &arb::concrete_mechanism<arb::multicore::backend>::global_table)
-
-ACCESS_BIND(\
-    mechanism_field_table (arb::concrete_mechanism<arb::multicore::backend>::*)(),\
-    private_field_table_ptr,\
-    &arb::concrete_mechanism<arb::multicore::backend>::field_table)
-
-ACCESS_BIND(\
-    mechanism_ion_index_table (arb::concrete_mechanism<arb::multicore::backend>::*)(),\
-    private_ion_index_table_ptr,\
-    &arb::concrete_mechanism<arb::multicore::backend>::ion_index_table)
-
 using namespace arb;
 
 class gap_recipe_0: public recipe {
@@ -511,14 +491,10 @@ TEST(fvm_lowered, derived_mechs) {
         using fvec = std::vector<fvm_value_type>;
         fvec tau_values;
         for (auto& mech: fvcell.*private_mechanisms_ptr) {
+            ASSERT_TRUE(mech);
             EXPECT_EQ("test_kin1"s, mech->internal_name());
 
-            auto cmech = dynamic_cast<multicore::mechanism*>(mech.get());
-            ASSERT_TRUE(cmech);
-
-            auto opt_tau_ptr = util::value_by_key((cmech->*private_global_table_ptr)(), "tau"s);
-            ASSERT_TRUE(opt_tau_ptr);
-            tau_values.push_back(*opt_tau_ptr.value());
+            tau_values.push_back(mechanism_global(mech, "tau"));
         }
         util::sort(tau_values);
         EXPECT_EQ(fvec({10., 20.}), tau_values);
@@ -584,14 +560,11 @@ TEST(fvm_lowered, read_valence) {
         fvcell.initialize({0}, rec);
 
         // test_ca_read_valence initialization should write ca ion valence
-        // to state variable 'record_zca':
-
-        auto mech_ptr = dynamic_cast<multicore::mechanism*>(find_mechanism(fvcell, "test_ca_read_valence"));
-        auto opt_record_z_ptr = util::value_by_key((mech_ptr->*private_field_table_ptr)(), "record_z"s);
+        // to state variable 'record_z':
 
-        ASSERT_TRUE(opt_record_z_ptr);
-        auto& record_z = *opt_record_z_ptr.value();
-        ASSERT_EQ(2.0, record_z[0]);
+        auto mech_ptr = find_mechanism(fvcell, "test_ca_read_valence");
+        auto record_z = mechanism_field(mech_ptr, "record_z");
+        ASSERT_EQ(2.0, record_z.at(0));
     }
 
     {
@@ -611,12 +584,9 @@ TEST(fvm_lowered, read_valence) {
         fvm_cell fvcell(context);
         fvcell.initialize({0}, rec);
 
-        auto cr_mech_ptr = dynamic_cast<multicore::mechanism*>(find_mechanism(fvcell, 0));
-        auto cr_opt_record_z_ptr = util::value_by_key((cr_mech_ptr->*private_field_table_ptr)(), "record_z"s);
-
-        ASSERT_TRUE(cr_opt_record_z_ptr);
-        auto& cr_record_z = *cr_opt_record_z_ptr.value();
-        ASSERT_EQ(7.0, cr_record_z[0]);
+        auto cr_mech_ptr = find_mechanism(fvcell, 0);
+        auto cr_record_z = mechanism_field(cr_mech_ptr, "record_z");
+        ASSERT_EQ(7.0, cr_record_z.at(0));
     }
 }
 
@@ -649,8 +619,8 @@ TEST(fvm_lowered, ionic_concentrations) {
     ion_config.reset_econc.assign(ncv, 0.);
     ion_config.reset_iconc.assign(ncv, 2.3e-4);
 
-    auto read_cai  = cat.instance<backend>("read_cai_init");
-    auto write_cai = cat.instance<backend>("write_cai_breakpoint");
+    auto read_cai  = cat.instance(backend::kind, "read_cai_init");
+    auto write_cai = cat.instance(backend::kind, "write_cai_breakpoint");
 
     auto& read_cai_mech  = read_cai.mech;
     auto& write_cai_mech = write_cai.mech;
@@ -659,8 +629,8 @@ TEST(fvm_lowered, ionic_concentrations) {
             ncell, ncell, 0, cv_to_intdom, cv_to_intdom, gj, vinit, temp, diam, src_to_spike, read_cai_mech->data_alignment());
     shared_state->add_ion("ca", 2, ion_config);
 
-    read_cai_mech->instantiate(0, *shared_state, overrides, layout);
-    write_cai_mech->instantiate(1, *shared_state, overrides, layout);
+    shared_state->instantiate(*read_cai_mech, 0, overrides, layout);
+    shared_state->instantiate(*write_cai_mech, 1, overrides, layout);
 
     shared_state->reset();
 
@@ -854,25 +824,20 @@ TEST(fvm_lowered, weighted_write_ion) {
     std::vector<double> expected_init_iconc = {0.75*con_int, 1.*con_int, 0};
     EXPECT_TRUE(testing::seq_almost_eq<double>(expected_init_iconc, ion_init_iconc));
 
-    auto test_ca = dynamic_cast<multicore::mechanism*>(find_mechanism(fvcell, "test_ca"));
-
-    auto opt_cai_ptr = util::value_by_key((test_ca->*private_field_table_ptr)(), "cai"s);
-    ASSERT_TRUE(opt_cai_ptr);
-    auto& test_ca_cai = *opt_cai_ptr.value();
-
-    auto opt_ca_index_ptr = util::value_by_key((test_ca->*private_ion_index_table_ptr)(), "ca"s);
-    ASSERT_TRUE(opt_ca_index_ptr);
-    auto& test_ca_ca_index = *opt_ca_index_ptr.value();
+    auto test_ca = find_mechanism(fvcell, "test_ca");
+    auto test_ca_ca_index = mechanism_ion_index(test_ca, "ca");
 
     double cai_contrib[3] = {200., 0., 300.};
     double test_ca_weight[3] = {0.25, 0., 1.};
 
-    for (int i = 0; i<2; ++i) {
-        test_ca_cai[i] = cai_contrib[test_ca_ca_index[i]];
+    std::vector<double> test_ca_cai;
+    for (auto i: util::count_along(test_ca_ca_index)) {
+        test_ca_cai.push_back(cai_contrib[test_ca_ca_index[i]]);
     }
+    write_mechanism_field(test_ca, "cai", test_ca_cai);
 
     std::vector<double> expected_iconc(3);
-    for (int i = 0; i<3; ++i) {
+    for (auto i: util::count_along(expected_iconc)) {
         expected_iconc[i] = test_ca_weight[i]*cai_contrib[i] + ion_init_iconc[i];
     }
 
@@ -1544,4 +1509,4 @@ TEST(fvm_lowered, label_data) {
         }
         EXPECT_EQ(actual_labeled_ranges, expected_labeled_ranges);
     }
-}
\ No newline at end of file
+}
diff --git a/test/unit/test_intrin.cu b/test/unit/test_intrin.cu
index b8d1da9c..4c8f4bd1 100644
--- a/test/unit/test_intrin.cu
+++ b/test/unit/test_intrin.cu
@@ -2,8 +2,8 @@
 
 #include <limits>
 
-#include "backends/gpu/gpu_api.hpp"
-#include "backends/gpu/math_cu.hpp"
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/math_cu.hpp>
 
 #include "gpu_vector.hpp"
 
diff --git a/test/unit/test_kinetic_linear.cpp b/test/unit/test_kinetic_linear.cpp
index b45731a2..fc2557dd 100644
--- a/test/unit/test_kinetic_linear.cpp
+++ b/test/unit/test_kinetic_linear.cpp
@@ -39,7 +39,7 @@ void run_test(std::string mech_name,
     std::vector<fvm_index_type> cv_to_intdom(ncv, 0);
 
     std::vector<fvm_gap_junction> gj = {};
-    auto instance = cat.instance<backend>(mech_name);
+    auto instance = cat.instance(backend::kind, mech_name);
     auto& test = instance.mech;
 
     std::vector<fvm_value_type> temp(ncv, 300.);
@@ -58,10 +58,10 @@ void run_test(std::string mech_name,
         layout.cv.push_back(i);
     }
 
-    test->instantiate(0, *shared_state, overrides, layout);
+    shared_state->instantiate(*test, 0, overrides, layout);
 
     for (auto a: assigned_variables) {
-        test->set_parameter(a.first, std::vector<fvm_value_type>(ncv,a.second));
+        shared_state->set_parameter(*test, a.first, std::vector<fvm_value_type>(ncv,a.second));
     }
 
     shared_state->reset();
@@ -90,7 +90,7 @@ void run_test(std::string mech_name,
     }
 }
 
-TEST(mech_kinetic, kintetic_linear_scaled) {
+TEST(mech_kinetic, kinetic_linear_scaled) {
     std::vector<std::string> state_variables = {"s", "h", "d"};
     std::vector<fvm_value_type> t0_values = {0.5, 0.2, 0.3};
     std::vector<fvm_value_type> t1_0_values = {0.373297, 0.591621, 0.0350817};
@@ -101,7 +101,7 @@ TEST(mech_kinetic, kintetic_linear_scaled) {
 
 }
 
-TEST(mech_kinetic, kintetic_linear_1_conserve) {
+TEST(mech_kinetic, kinetic_linear_1_conserve) {
     std::vector<std::string> state_variables = {"s", "h", "d"};
     std::vector<fvm_value_type> t0_values = {0.5, 0.2, 0.3};
     std::vector<fvm_value_type> t1_0_values = {0.380338, 0.446414, 0.173247};
@@ -112,7 +112,7 @@ TEST(mech_kinetic, kintetic_linear_1_conserve) {
     run_test<multicore::backend>("test0_kin_steadystate", state_variables, {}, t0_values, t1_1_values, 0.5);
 }
 
-TEST(mech_kinetic, kintetic_linear_2_conserve) {
+TEST(mech_kinetic, kinetic_linear_2_conserve) {
     std::vector<std::string> state_variables = {"a", "b", "x", "y"};
     std::vector<fvm_value_type> t0_values = {0.2, 0.8, 0.6, 0.4};
     std::vector<fvm_value_type> t1_0_values = {0.217391304, 0.782608696, 0.33333333, 0.66666666};
@@ -123,7 +123,7 @@ TEST(mech_kinetic, kintetic_linear_2_conserve) {
     run_test<multicore::backend>("test1_kin_steadystate", state_variables, {}, t0_values, t1_1_values, 0.5);
 }
 
-TEST(mech_kinetic, kintetic_nonlinear) {
+TEST(mech_kinetic, kinetic_nonlinear) {
     std::vector<std::string> state_variables = {"a", "b", "c"};
     std::vector<fvm_value_type> t0_values = {0.2, 0.3, 0.5};
     std::vector<fvm_value_type> t1_0_values = {0.222881, 0.31144, 0.48856};
@@ -134,7 +134,7 @@ TEST(mech_kinetic, kintetic_nonlinear) {
 
 }
 
-TEST(mech_kinetic, kintetic_nonlinear_scaled) {
+TEST(mech_kinetic, kinetic_nonlinear_scaled) {
     std::vector<std::string> state_variables = {"A", "B", "C", "d", "e"};
     std::vector<fvm_value_type> t0_values = {4.5, 6.6, 0.28, 2, 0};
     std::vector<fvm_value_type> t1_values = {4.087281958014442,
@@ -157,8 +157,8 @@ TEST(mech_linear, linear_system) {
 }
 
 #ifdef ARB_GPU_ENABLED
-TEST(mech_kinetic_gpu, kintetic_linear_scaled) {
-     std::vector<std::string> state_variables = {"s", "h", "d"};
+TEST(mech_kinetic_gpu, kinetic_linear_scaled) {
+    std::vector<std::string> state_variables = {"s", "h", "d"};
     std::vector<fvm_value_type> t0_values = {0.5, 0.2, 0.3};
     std::vector<fvm_value_type> t1_0_values = {0.373297, 0.591621, 0.0350817};
     std::vector<fvm_value_type> t1_1_values = {0.329897, 0.537371, 0.132732};
@@ -167,7 +167,7 @@ TEST(mech_kinetic_gpu, kintetic_linear_scaled) {
     run_test<gpu::backend>("test1_kin_compartment", state_variables, {}, t0_values, t1_1_values, 0.5);
 }
 
-TEST(mech_kinetic_gpu, kintetic_linear_1_conserve) {
+TEST(mech_kinetic_gpu, kinetic_linear_1_conserve) {
     std::vector<std::string> state_variables = {"s", "h", "d"};
     std::vector<fvm_value_type> t0_values = {0.5, 0.2, 0.3};
     std::vector<fvm_value_type> t1_0_values = {0.380338, 0.446414, 0.173247};
@@ -178,7 +178,7 @@ TEST(mech_kinetic_gpu, kintetic_linear_1_conserve) {
     run_test<gpu::backend>("test0_kin_steadystate", state_variables, {}, t0_values, t1_1_values, 0.5);
 }
 
-TEST(mech_kinetic_gpu, kintetic_linear_2_conserve) {
+TEST(mech_kinetic_gpu, kinetic_linear_2_conserve) {
     std::vector<std::string> state_variables = {"a", "b", "x", "y"};
     std::vector<fvm_value_type> t0_values = {0.2, 0.8, 0.6, 0.4};
     std::vector<fvm_value_type> t1_0_values = {0.217391304, 0.782608696, 0.33333333, 0.66666666};
@@ -189,7 +189,7 @@ TEST(mech_kinetic_gpu, kintetic_linear_2_conserve) {
     run_test<gpu::backend>("test1_kin_steadystate", state_variables, {}, t0_values, t1_1_values, 0.5);
 }
 
-TEST(mech_kinetic_gpu, kintetic_nonlinear) {
+TEST(mech_kinetic_gpu, kinetic_nonlinear) {
     std::vector<std::string> state_variables = {"a", "b", "c"};
     std::vector<fvm_value_type> t0_values = {0.2, 0.3, 0.5};
     std::vector<fvm_value_type> t1_0_values = {0.222881, 0.31144, 0.48856};
@@ -199,7 +199,7 @@ TEST(mech_kinetic_gpu, kintetic_nonlinear) {
     run_test<gpu::backend>("test3_kin_diff", state_variables, {}, t0_values, t1_1_values, 0.025);
 }
 
-TEST(mech_kinetic_gpu, kintetic_nonlinear_scaled) {
+TEST(mech_kinetic_gpu, kinetic_nonlinear_scaled) {
     std::vector<std::string> state_variables = {"A", "B", "C", "d", "e"};
     std::vector<fvm_value_type> t0_values = {4.5, 6.6, 0.28, 2, 0};
     std::vector<fvm_value_type> t1_values = {4.087281958014442,
diff --git a/test/unit/test_matrix_gpu.cpp b/test/unit/test_matrix_gpu.cpp
index a2f8e2fd..d015d2af 100644
--- a/test/unit/test_matrix_gpu.cpp
+++ b/test/unit/test_matrix_gpu.cpp
@@ -11,12 +11,12 @@
 #endif
 
 #include <arbor/math.hpp>
+#include <arbor/gpu/gpu_common.hpp>
 
 #include "matrix.hpp"
 #include "memory/memory.hpp"
 #include "util/span.hpp"
 
-#include "backends/gpu/gpu_common.hpp"
 #include "backends/gpu/matrix_state_flat.hpp"
 #include "backends/gpu/matrix_state_fine.hpp"
 
diff --git a/test/unit/test_mech_temp_diam.cpp b/test/unit/test_mech_temp_diam.cpp
index ba051960..cb872d84 100644
--- a/test/unit/test_mech_temp_diam.cpp
+++ b/test/unit/test_mech_temp_diam.cpp
@@ -25,7 +25,7 @@ void run_celsius_test() {
     std::vector<fvm_index_type> cv_to_intdom(ncv, 0);
 
     std::vector<fvm_gap_junction> gj = {};
-    auto instance = cat.instance<backend>("celsius_test");
+    auto instance = cat.instance(backend::kind, "celsius_test");
     auto& celsius_test = instance.mech;
 
     double temperature_K = 300.;
@@ -47,7 +47,7 @@ void run_celsius_test() {
         layout.cv.push_back(i);
     }
 
-    celsius_test->instantiate(0, *shared_state, overrides, layout);
+    shared_state->instantiate(*celsius_test, 0, overrides, layout);
     shared_state->reset();
 
     // expect 0 value in state 'c' after init:
@@ -76,7 +76,7 @@ void run_diam_test() {
     std::vector<fvm_index_type> cv_to_intdom(ncv, 0);
 
     std::vector<fvm_gap_junction> gj = {};
-    auto instance = cat.instance<backend>("diam_test");
+    auto instance = cat.instance(backend::kind, "diam_test");
     auto& celsius_test = instance.mech;
 
     std::vector<fvm_value_type> temp(ncv, 300.);
@@ -98,7 +98,7 @@ void run_diam_test() {
             ncell, ncell, 0, cv_to_intdom, cv_to_intdom, gj, vinit, temp, diam, src_to_spike, celsius_test->data_alignment());
 
 
-    celsius_test->instantiate(0, *shared_state, overrides, layout);
+    shared_state->instantiate(*celsius_test, 0, overrides, layout);
     shared_state->reset();
 
     // expect 0 value in state 'd' after init:
diff --git a/test/unit/test_mechcat.cpp b/test/unit/test_mechcat.cpp
index 79f0e5a4..488534d0 100644
--- a/test/unit/test_mechcat.cpp
+++ b/test/unit/test_mechcat.cpp
@@ -34,192 +34,155 @@ using namespace arb;
 
 using field_kind = mechanism_field_spec::field_kind;
 
-mechanism_info burble_info = {
-    {{"quux",  {field_kind::global, "nA", 2.3,   0, 10.}},
-     {"xyzzy", {field_kind::global, "mV", 5.1, -20, 20.}}},
-    {},
-    {},
-    {{"x", {}}},
-    "burbleprint"
-};
+mechanism_info mk_burble_info() {
+    mechanism_info info;
+    info.globals     = {{"quux",  {field_kind::global, "nA", 2.3,   0, 10.}},
+                        {"xyzzy", {field_kind::global, "mV", 5.1, -20, 20.}}};
+    info.ions        = {{"x", {}}};
+    info.fingerprint = "burbleprint";
+    return info;
+}
 
-mechanism_info fleeb_info = {
-    {{"plugh", {field_kind::global, "C",   2.3,  0, 10.}},
-     {"norf",  {field_kind::global, "mGy", 0.1,  0, 5000.}}},
-    {},
-    {},
-    {{"a", {}}, {"b", {}}, {"c", {}}, {"d", {}}},
-    "fleebprint"
-};
+mechanism_info mk_fleeb_info() {
+    mechanism_info info;
+    info.globals     = {{"plugh", {field_kind::global, "C",   2.3,  0, 10.}},
+                        {"norf",  {field_kind::global, "mGy", 0.1,  0, 5000.}}};
+    info.ions        = {{"a", {}}, {"b", {}}, {"c", {}}, {"d", {}}};
+    info.fingerprint = "fleebprint";
+    return info;
+}
 
 // Backend classes:
+struct test_backend {
+    using iarray = std::vector<fvm_index_type>;
+    using array  = std::vector<fvm_value_type>;
 
-template <typename B>
-struct common_impl: concrete_mechanism<B> {
-    void instantiate(fvm_size_type id, typename B::shared_state& state, const mechanism_overrides& o, const mechanism_layout& l) override {
-        this->width_ = l.cv.size();
-        // Write mechanism global values to shared state to test instatiation call and catalogue global
-        // variable overrides.
-        for (auto& kv: o.globals) {
-            state.overrides.insert(kv);
-        }
+    test_backend(const std::unordered_map<std::string, arb_ion_state>& ions_): shared_{ions_} {}
 
-        for (auto& ion: mech_ions) {
-            if (o.ion_rebind.count(ion)) {
-                ion_bindings_[ion] = state.ions.at(o.ion_rebind.at(ion));
-            } else {
-                ion_bindings_[ion] = state.ions.at(ion);
+    struct shared_state {
+        shared_state(const std::unordered_map<std::string, arb_ion_state>& ions_): ions{ions_} {}
+
+        void instantiate(mechanism& m, fvm_size_type id, const mechanism_overrides& o, const mechanism_layout& l) {
+            m.ppack_ = {0};
+            m.ppack_.width = l.cv.size();
+            m.ppack_.mechanism_id = id;
+
+            // Write mechanism global values to shared state to test instantiation call and catalogue global
+            // variable overrides.
+            for (auto& kv: o.globals) overrides.insert(kv);
+
+            ASSERT_EQ(storage.count(id), 0ul);
+            storage[id].resize(m.mech_.n_ions);
+            m.ppack_.ion_states = storage[id].data();
+            for (arb_size_type idx = 0; idx < m.mech_.n_ions; ++idx) {
+                auto ion = m.mech_.ions[idx].name;
+                if (o.ion_rebind.count(ion)) {
+                    m.ppack_.ion_states[idx].current_density = ions.at(o.ion_rebind.at(ion)).current_density;
+                } else {
+                    m.ppack_.ion_states[idx] = ions.at(ion);
+                }
             }
         }
-    }
-
-    std::size_t memory() const override { return 10u; }
-
-    void set_parameter(const std::string& key, const std::vector<fvm_value_type>& vs) override {}
-
-    fvm_value_type* field_data(const std::string& var) override { return nullptr; }
-    std::size_t object_sizeof() const override { return sizeof(*this); }
 
-    void initialize() override {}
-    void update_state() override {}
-    void update_current() override {}
-    void deliver_events() override {}
-    void update_ions() override {}
-
-    std::vector<std::string> mech_ions;
-
-    std::unordered_map<std::string, std::string> ion_bindings_;
-
-protected:
-    mechanism_ppack* ppack_ptr() override { return nullptr; }
-};
-
-template <typename B>
-std::string ion_binding(const std::unique_ptr<concrete_mechanism<B>>& mech, const char* ion) {
-    const common_impl<B>& impl = dynamic_cast<const common_impl<B>&>(*mech.get());
-    return impl.ion_bindings_.count(ion)? impl.ion_bindings_.at(ion): "";
-}
-
-struct foo_stream_state {};
+        std::unordered_map<std::string, fvm_value_type> overrides;
+        std::unordered_map<std::string, arb_ion_state> ions;
+        std::unordered_map<arb_size_type, std::vector<arb_ion_state>> storage;
+    };
 
-struct foo_stream {
-    using state = foo_stream_state;
-    state& marked_events() { return state_; }
-    state state_;
-};
+    shared_state shared_;
 
-struct foo_backend {
-    using iarray = std::vector<fvm_index_type>;
-    using array  = std::vector<fvm_value_type>;
-    using deliverable_event_stream = foo_stream;
-
-    struct shared_state {
-        std::unordered_map<std::string, fvm_value_type> overrides;
-        std::unordered_map<std::string, std::string> ions = {
-            { "a", "foo_ion_a" },
-            { "b", "foo_ion_b" },
-            { "c", "foo_ion_c" },
-            { "d", "foo_ion_d" },
-            { "e", "foo_ion_e" },
-            { "f", "foo_ion_f" }
+    struct deliverable_event_stream {
+        struct state {
+            void* ev_data;
+            int* begin_offset;
+            int* end_offset;
+            int n;
         };
+        state& marked_events() { return state_; }
+        state state_;
     };
 };
 
-using foo_mechanism = common_impl<foo_backend>;
-
-struct bar_stream_state {};
-
-struct bar_stream {
-    using state = bar_stream_state;
-    state& marked_events() { return state_; }
-    state state_;
+struct foo_backend: test_backend {
+    static constexpr arb_backend_kind kind = 42;
+    foo_backend(): test_backend{{{ "a", arb_ion_state{(arb_value_type*)0x1, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "b", arb_ion_state{(arb_value_type*)0x2, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "c", arb_ion_state{(arb_value_type*)0x3, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "d", arb_ion_state{(arb_value_type*)0x4, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "e", arb_ion_state{(arb_value_type*)0x5, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "f", arb_ion_state{(arb_value_type*)0x6, nullptr, nullptr, nullptr, nullptr, nullptr}}}} {}
 };
 
-struct bar_backend {
-    using iarray = std::vector<fvm_index_type>;
-    using array  = std::vector<fvm_value_type>;
-    using deliverable_event_stream = bar_stream;
-    struct shared_state {
-        std::unordered_map<std::string, fvm_value_type> overrides;
-        std::unordered_map<std::string, std::string> ions = {
-            { "a", "bar_ion_a" },
-            { "b", "bar_ion_b" },
-            { "c", "bar_ion_c" },
-            { "d", "bar_ion_d" },
-            { "e", "bar_ion_e" },
-            { "f", "bar_ion_f" }
-        };
-    };
+struct bar_backend: test_backend {
+    static constexpr arb_backend_kind kind = 23;
+    bar_backend(): test_backend{{{ "a", arb_ion_state{(arb_value_type*)0x7, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "b", arb_ion_state{(arb_value_type*)0x8, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "c", arb_ion_state{(arb_value_type*)0x8, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "d", arb_ion_state{(arb_value_type*)0x9, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "e", arb_ion_state{(arb_value_type*)0xa, nullptr, nullptr, nullptr, nullptr, nullptr}},
+                                 { "f", arb_ion_state{(arb_value_type*)0xb, nullptr, nullptr, nullptr, nullptr, nullptr}}}} {}
 };
 
-using bar_mechanism = common_impl<bar_backend>;
-
 // Fleeb implementations:
 
-struct fleeb_foo: foo_mechanism {
-    fleeb_foo() {
-        this->mech_ions = {"a", "b", "c", "d"};
-    }
+static arb_ion_info ion_list[] {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, {"f"}};
 
-    const mechanism_fingerprint& fingerprint() const override {
-        static mechanism_fingerprint hash = "fleebprint";
-        return hash;
-    }
+mechanism_ptr mk_fleeb_foo() {
+    arb_mechanism_type m = {ARB_MECH_ABI_VERSION};
+    m.fingerprint = "fleebprint";
+    m.name        = "fleeb";
+    m.kind        = arb_mechanism_kind_density;
+    m.n_ions      = 6;
+    m.ions        = ion_list;
 
-    std::string internal_name() const override { return "fleeb"; }
-    mechanismKind kind() const override { return mechanismKind::density; }
-    mechanism_ptr clone() const override { return mechanism_ptr(new fleeb_foo()); }
-};
+    arb_mechanism_interface i = {0};
+    i.backend    = foo_backend::kind;
 
-struct special_fleeb_foo: foo_mechanism {
-    special_fleeb_foo() {
-        this->mech_ions = {"a", "b", "c", "d"};
-    }
+    return std::make_unique<mechanism>(m, i);
+}
 
-    const mechanism_fingerprint& fingerprint() const override {
-        static mechanism_fingerprint hash = "fleebprint";
-        return hash;
-    }
+mechanism_ptr mk_special_fleeb_foo() {
+    arb_mechanism_type m = {ARB_MECH_ABI_VERSION};
+    m.fingerprint = "fleebprint";
+    m.name        = "special fleeb";
+    m.kind        = arb_mechanism_kind_density;
+    m.n_ions      = 6;
+    m.ions        = ion_list;
 
-    std::string internal_name() const override { return "special fleeb"; }
-    mechanismKind kind() const override { return mechanismKind::density; }
-    mechanism_ptr clone() const override { return mechanism_ptr(new special_fleeb_foo()); }
-};
+    arb_mechanism_interface i = {0};
+    i.backend    = foo_backend::kind;
 
-struct fleeb_bar: bar_mechanism {
-    fleeb_bar() {
-        this->mech_ions = {"a", "b", "c", "d"};
-    }
+    return std::make_unique<mechanism>(m, i);
+}
 
-    const mechanism_fingerprint& fingerprint() const override {
-        static mechanism_fingerprint hash = "fleebprint";
-        return hash;
-    }
+mechanism_ptr mk_fleeb_bar() {
+    arb_mechanism_type m = {ARB_MECH_ABI_VERSION};
+    m.fingerprint = "fleebprint";
+    m.name        = "fleeb";
+    m.kind        = arb_mechanism_kind_density;
+    m.n_ions      = 6;
+    m.ions        = ion_list;
 
-    std::string internal_name() const override { return "fleeb"; }
-    mechanismKind kind() const override { return mechanismKind::density; }
-    mechanism_ptr clone() const override { return mechanism_ptr(new fleeb_bar()); }
-};
+    arb_mechanism_interface i = {0};
+    i.backend    = bar_backend::kind;
 
-// Burble implementation:
+    return std::make_unique<mechanism>(m, i);
+}
 
-struct burble_bar: bar_mechanism {
-    const mechanism_fingerprint& fingerprint() const override {
-        static mechanism_fingerprint hash = "fnord";
-        return hash;
-    }
+// Burble implementation:
 
-    std::string internal_name() const override { return "burble"; }
-    mechanismKind kind() const override { return mechanismKind::density; }
-    mechanism_ptr clone() const override { return mechanism_ptr(new burble_bar()); }
-};
+mechanism_ptr mk_burble_bar() {
+    arb_mechanism_type m = {ARB_MECH_ABI_VERSION};
+    m.fingerprint = "fnord";
+    m.name        = "burble";
+    m.kind        = arb_mechanism_kind_density;
+    m.n_ions      = 6;
+    m.ions        = ion_list;
 
-// Implementation register helper:
+    arb_mechanism_interface i = {0};
+    i.backend    = bar_backend::kind;
 
-template <typename B, typename M>
-std::unique_ptr<concrete_mechanism<B>> make_mech() {
-    return std::unique_ptr<concrete_mechanism<B>>(new M());
+    return std::make_unique<mechanism>(m, i);
 }
 
 // Mechinfo equality test:
@@ -241,8 +204,8 @@ static bool operator==(const mechanism_info& a, const mechanism_info& b) {
 mechanism_catalogue build_fake_catalogue() {
     mechanism_catalogue cat;
 
-    cat.add("fleeb", fleeb_info);
-    cat.add("burble", burble_info);
+    cat.add("fleeb", mk_fleeb_info());
+    cat.add("burble", mk_burble_info());
 
     // Add derived versions with global overrides:
 
@@ -254,9 +217,9 @@ mechanism_catalogue build_fake_catalogue() {
 
     // Attach implementations:
 
-    cat.register_implementation<bar_backend>("fleeb", make_mech<bar_backend, fleeb_bar>());
-    cat.register_implementation<foo_backend>("fleeb", make_mech<foo_backend, fleeb_foo>());
-    cat.register_implementation<foo_backend>("special_fleeb", make_mech<foo_backend, special_fleeb_foo>());
+    cat.register_implementation("fleeb",         mk_fleeb_bar());
+    cat.register_implementation("fleeb",         mk_fleeb_foo());
+    cat.register_implementation("special_fleeb", mk_special_fleeb_foo());
 
     return cat;
 }
@@ -269,7 +232,7 @@ TEST(mechcat, fingerprint) {
     EXPECT_EQ("burbleprint", cat.fingerprint("burble"));
     EXPECT_EQ("burbleprint", cat.fingerprint("bleeble"));
 
-    EXPECT_THROW(cat.register_implementation<bar_backend>("burble", make_mech<bar_backend, burble_bar>()),
+    EXPECT_THROW(cat.register_implementation("burble", std::unique_ptr<mechanism>{mk_burble_bar()}),
         arb::fingerprint_mismatch);
 }
 
@@ -330,14 +293,14 @@ TEST(mechcat, loading) {
 TEST(mechcat, derived_info) {
     auto cat = build_fake_catalogue();
 
-    EXPECT_EQ(fleeb_info,  cat["fleeb"]);
-    EXPECT_EQ(burble_info, cat["burble"]);
+    EXPECT_EQ(mk_fleeb_info(),  cat["fleeb"]);
+    EXPECT_EQ(mk_burble_info(), cat["burble"]);
 
-    mechanism_info expected_special_fleeb = fleeb_info;
+    mechanism_info expected_special_fleeb = mk_fleeb_info();
     expected_special_fleeb.globals["plugh"].default_value = 2.0;
     EXPECT_EQ(expected_special_fleeb, cat["special_fleeb"]);
 
-    mechanism_info expected_fleeb2 = fleeb_info;
+    mechanism_info expected_fleeb2 = mk_fleeb_info();
     expected_fleeb2.globals["plugh"].default_value = 2.0;
     expected_fleeb2.globals["norf"].default_value = 11.0;
     EXPECT_EQ(expected_fleeb2, cat["fleeb2"]);
@@ -372,37 +335,63 @@ TEST(mechcat, remove) {
     EXPECT_FALSE(cat.has("fleeb2")); // fleeb2 derived from special_fleeb.
 }
 
+bool cmp_mechs(const mechanism& a, const mechanism& b) {
+    return
+        (a.iface_.backend == b.iface_.backend) &&
+        (a.iface_.partition_width == b.iface_.partition_width) &&
+        (a.iface_.alignment == b.iface_.alignment) &&
+        (a.iface_.init_mechanism == b.iface_.init_mechanism) &&
+        (a.iface_.compute_currents == b.iface_.compute_currents) &&
+        (a.iface_.apply_events == b.iface_.apply_events) &&
+        (a.iface_.advance_state == b.iface_.advance_state) &&
+        (a.iface_.write_ions == b.iface_.write_ions) &&
+        (a.iface_.post_event == b.iface_.post_event) &&
+        (a.mech_.abi_version == b.mech_.abi_version) &&
+        (std::string{a.mech_.fingerprint} == std::string{b.mech_.fingerprint}) &&
+        (std::string{a.mech_.name} == std::string{b.mech_.name}) &&
+        (a.mech_.kind == b.mech_.kind) &&
+        (a.mech_.is_linear == b.mech_.is_linear) &&
+        (a.mech_.has_post_events == b.mech_.has_post_events) &&
+        (a.mech_.globals == b.mech_.globals) && (a.mech_.n_globals == b.mech_.n_globals) &&
+        (a.mech_.state_vars == b.mech_.state_vars) && (a.mech_.n_state_vars == b.mech_.n_state_vars) &&
+        (a.mech_.parameters == b.mech_.parameters) && (a.mech_.n_parameters == b.mech_.n_parameters) &&
+        (a.mech_.ions == b.mech_.ions) && (a.mech_.n_ions == b.mech_.n_ions);
+}
+
 TEST(mechcat, instance) {
     auto cat = build_fake_catalogue();
 
-    EXPECT_THROW(cat.instance<bar_backend>("burble"), arb::no_such_implementation);
+    EXPECT_THROW(cat.instance(bar_backend::kind, "burble"), arb::no_such_implementation);
 
     // All fleebs on the bar backend have the same implementation:
 
-    auto fleeb_bar_inst = cat.instance<bar_backend>("fleeb");
-    auto fleeb1_bar_inst = cat.instance<bar_backend>("fleeb1");
-    auto special_fleeb_bar_inst = cat.instance<bar_backend>("special_fleeb");
-    auto fleeb2_bar_inst = cat.instance<bar_backend>("fleeb2");
+    auto fleeb_bar_inst = cat.instance(bar_backend::kind, "fleeb");
+    auto fleeb1_bar_inst = cat.instance(bar_backend::kind, "fleeb1");
+    auto special_fleeb_bar_inst = cat.instance(bar_backend::kind, "special_fleeb");
+    auto fleeb2_bar_inst = cat.instance(bar_backend::kind, "fleeb2");
 
-    EXPECT_EQ(typeid(fleeb_bar), typeid(*fleeb_bar_inst.mech.get()));
-    EXPECT_EQ(typeid(fleeb_bar), typeid(*fleeb1_bar_inst.mech.get()));
-    EXPECT_EQ(typeid(fleeb_bar), typeid(*special_fleeb_bar_inst.mech.get()));
-    EXPECT_EQ(typeid(fleeb_bar), typeid(*fleeb2_bar_inst.mech.get()));
+    auto fleeb_bar = mk_fleeb_bar();
+    EXPECT_TRUE(cmp_mechs(*fleeb_bar, *fleeb_bar_inst.mech));
+    EXPECT_TRUE(cmp_mechs(*fleeb_bar, *fleeb1_bar_inst.mech));
+    EXPECT_TRUE(cmp_mechs(*fleeb_bar, *special_fleeb_bar_inst.mech));
+    EXPECT_TRUE(cmp_mechs(*fleeb_bar, *fleeb2_bar_inst.mech));
 
     EXPECT_EQ("fleeb"s, fleeb2_bar_inst.mech->internal_name());
 
     // special_fleeb and fleeb2 (deriving from special_fleeb) have a specialized
     // implementation:
 
-    auto fleeb_foo_inst = cat.instance<foo_backend>("fleeb");
-    auto fleeb1_foo_inst = cat.instance<foo_backend>("fleeb1");
-    auto special_fleeb_foo_inst = cat.instance<foo_backend>("special_fleeb");
-    auto fleeb2_foo_inst = cat.instance<foo_backend>("fleeb2");
+    auto fleeb_foo_inst = cat.instance(foo_backend::kind, "fleeb");
+    auto fleeb1_foo_inst = cat.instance(foo_backend::kind, "fleeb1");
+    auto special_fleeb_foo_inst = cat.instance(foo_backend::kind, "special_fleeb");
+    auto fleeb2_foo_inst = cat.instance(foo_backend::kind,"fleeb2");
 
-    EXPECT_EQ(typeid(fleeb_foo), typeid(*fleeb_foo_inst.mech.get()));
-    EXPECT_EQ(typeid(fleeb_foo), typeid(*fleeb1_foo_inst.mech.get()));
-    EXPECT_EQ(typeid(special_fleeb_foo), typeid(*special_fleeb_foo_inst.mech.get()));
-    EXPECT_EQ(typeid(special_fleeb_foo), typeid(*fleeb2_foo_inst.mech.get()));
+    auto fleeb_foo = mk_fleeb_foo();
+    auto special_fleeb_foo = mk_special_fleeb_foo();
+    EXPECT_TRUE(cmp_mechs(*fleeb_foo,  *fleeb_foo_inst.mech));
+    EXPECT_TRUE(cmp_mechs(*fleeb_foo,  *fleeb1_foo_inst.mech));
+    EXPECT_TRUE(cmp_mechs(*special_fleeb_foo, *special_fleeb_foo_inst.mech));
+    EXPECT_TRUE(cmp_mechs(*special_fleeb_foo, *fleeb2_foo_inst.mech));
 
     EXPECT_EQ("fleeb"s, fleeb1_foo_inst.mech->internal_name());
     EXPECT_EQ("special fleeb"s, fleeb2_foo_inst.mech->internal_name());
@@ -414,47 +403,65 @@ TEST(mechcat, instantiate) {
     // these tests for testing purposes.
 
     mechanism_layout layout = {{0u, 1u, 2u}, {1., 2., 1.}, {1u, 1u, 1u}};
-    bar_backend::shared_state bar_state;
+    bar_backend bar;
 
     auto cat = build_fake_catalogue();
 
-    auto fleeb = cat.instance<bar_backend>("fleeb");
-    fleeb.mech->instantiate(0, bar_state, fleeb.overrides, layout);
-    EXPECT_TRUE(bar_state.overrides.empty());
-
-    bar_state.overrides.clear();
-    auto fleeb2 = cat.instance<bar_backend>("fleeb2");
-    fleeb2.mech->instantiate(0, bar_state, fleeb2.overrides, layout);
-    EXPECT_EQ(2.0,  bar_state.overrides.at("plugh"));
-    EXPECT_EQ(11.0, bar_state.overrides.at("norf"));
-
     // Check ion rebinding:
     // fleeb1 should have ions 'a' and 'b' swapped;
-    // fleeb2 should swap 'b' and 'c' relative to fleeb1, so that
-    // 'b' maps to the state 'c' ion, 'c' maps to the state 'a' ion,
-    // and 'a' maps to the state 'b' ion.
+    auto fleeb = cat.instance(bar_backend::kind, "fleeb/a=b,b=a");
+    bar.shared_.instantiate(*fleeb.mech, 0, fleeb.overrides, layout);
+    EXPECT_TRUE(bar.shared_.overrides.empty());
 
-    EXPECT_EQ("bar_ion_a", ion_binding(fleeb.mech, "a"));
-    EXPECT_EQ("bar_ion_b", ion_binding(fleeb.mech, "b"));
-    EXPECT_EQ("bar_ion_c", ion_binding(fleeb.mech, "c"));
-    EXPECT_EQ("bar_ion_d", ion_binding(fleeb.mech, "d"));
 
-    auto fleeb3 = cat.instance<bar_backend>("fleeb3");
-    fleeb3.mech->instantiate(0, bar_state, fleeb3.overrides, layout);
+    EXPECT_EQ(bar.shared_.ions.at("b").current_density, fleeb.mech->ppack_.ion_states[0].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("a").current_density, fleeb.mech->ppack_.ion_states[1].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("c").current_density, fleeb.mech->ppack_.ion_states[2].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("d").current_density, fleeb.mech->ppack_.ion_states[3].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("e").current_density, fleeb.mech->ppack_.ion_states[4].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("f").current_density, fleeb.mech->ppack_.ion_states[5].current_density);
 
-    foo_backend::shared_state foo_state;
-    auto fleeb1 = cat.instance<foo_backend>("fleeb1");
-    fleeb1.mech->instantiate(0, foo_state, fleeb1.overrides, layout);
+    bar.shared_.overrides.clear();
 
-    EXPECT_EQ("foo_ion_b", ion_binding(fleeb1.mech, "a"));
-    EXPECT_EQ("foo_ion_a", ion_binding(fleeb1.mech, "b"));
-    EXPECT_EQ("foo_ion_c", ion_binding(fleeb1.mech, "c"));
-    EXPECT_EQ("foo_ion_d", ion_binding(fleeb1.mech, "d"));
+    // fleeb2 should swap 'b' and 'c' relative to fleeb1, so that
+    // 'b' maps to the state 'c' ion, 'c' maps to the state 'a' ion,
+    // and 'a' maps to the state 'b' ion.
 
-    EXPECT_EQ("bar_ion_c", ion_binding(fleeb3.mech, "a"));
-    EXPECT_EQ("bar_ion_a", ion_binding(fleeb3.mech, "b"));
-    EXPECT_EQ("bar_ion_b", ion_binding(fleeb3.mech, "c"));
-    EXPECT_EQ("bar_ion_d", ion_binding(fleeb3.mech, "d"));
+    auto fleeb2 = cat.instance(bar_backend::kind, "fleeb2/a=b,b=c,c=a");
+    bar.shared_.instantiate(*fleeb2.mech, 1, fleeb2.overrides, layout);
+
+    EXPECT_EQ(bar.shared_.ions.at("b").current_density, fleeb2.mech->ppack_.ion_states[0].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("c").current_density, fleeb2.mech->ppack_.ion_states[1].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("a").current_density, fleeb2.mech->ppack_.ion_states[2].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("d").current_density, fleeb2.mech->ppack_.ion_states[3].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("e").current_density, fleeb2.mech->ppack_.ion_states[4].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("f").current_density, fleeb2.mech->ppack_.ion_states[5].current_density);
+
+    EXPECT_EQ(2.0,  bar.shared_.overrides.at("plugh"));
+    EXPECT_EQ(11.0, bar.shared_.overrides.at("norf"));
+
+    // fleeb3 has a global ion binding
+    auto fleeb3 = cat.instance(bar_backend::kind, "fleeb3");
+    bar.shared_.instantiate(*fleeb3.mech, 3, fleeb3.overrides, layout);
+
+    EXPECT_EQ(bar.shared_.ions.at("c").current_density, fleeb3.mech->ppack_.ion_states[0].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("a").current_density, fleeb3.mech->ppack_.ion_states[1].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("b").current_density, fleeb3.mech->ppack_.ion_states[2].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("d").current_density, fleeb3.mech->ppack_.ion_states[3].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("e").current_density, fleeb3.mech->ppack_.ion_states[4].current_density);
+    EXPECT_EQ(bar.shared_.ions.at("f").current_density, fleeb3.mech->ppack_.ion_states[5].current_density);
+
+    foo_backend foo;
+    // fleeb1 has a global ion binding
+    auto fleeb1 = cat.instance(foo_backend::kind, "fleeb1");
+    foo.shared_.instantiate(*fleeb1.mech, 4, fleeb1.overrides, layout);
+
+    EXPECT_EQ(foo.shared_.ions.at("b").current_density, fleeb1.mech->ppack_.ion_states[0].current_density);
+    EXPECT_EQ(foo.shared_.ions.at("a").current_density, fleeb1.mech->ppack_.ion_states[1].current_density);
+    EXPECT_EQ(foo.shared_.ions.at("c").current_density, fleeb1.mech->ppack_.ion_states[2].current_density);
+    EXPECT_EQ(foo.shared_.ions.at("d").current_density, fleeb1.mech->ppack_.ion_states[3].current_density);
+    EXPECT_EQ(foo.shared_.ions.at("e").current_density, fleeb1.mech->ppack_.ion_states[4].current_density);
+    EXPECT_EQ(foo.shared_.ions.at("f").current_density, fleeb1.mech->ppack_.ion_states[5].current_density);
 }
 
 TEST(mechcat, bad_ion_rename) {
@@ -498,14 +505,14 @@ TEST(mechcat, implicit_deriv) {
     EXPECT_THROW(cat["fleeb/fish"], invalid_ion_remap);
 
     // Implicitly derived mechanisms should inherit implementations.
-    auto fleeb2 = cat.instance<foo_backend>("fleeb2");
-    auto fleeb2_derived = cat.instance<foo_backend>("fleeb2/plugh=4.5");
+    auto fleeb2 = cat.instance(foo_backend::kind, "fleeb2");
+    auto fleeb2_derived = cat.instance(foo_backend::kind, "fleeb2/plugh=4.5");
     EXPECT_EQ("special fleeb", fleeb2.mech->internal_name());
     EXPECT_EQ("special fleeb", fleeb2_derived.mech->internal_name());
     EXPECT_EQ(4.5, fleeb2_derived.overrides.globals.at("plugh"));
 
     // Requesting an implicitly derived instance with improper parameters should throw.
-    EXPECT_THROW(cat.instance<foo_backend>("fleeb2/fidget=7"), no_such_parameter);
+    EXPECT_THROW(cat.instance(foo_backend::kind, "fleeb2/fidget=7"), no_such_parameter);
 
     // Testing for implicit derivation though should not throw.
     EXPECT_TRUE(cat.has("fleeb2/plugh=7"));
@@ -520,8 +527,8 @@ TEST(mechcat, copy) {
 
     EXPECT_EQ(cat["fleeb2"], cat2["fleeb2"]);
 
-    auto fleeb2_inst = cat.instance<foo_backend>("fleeb2");
-    auto fleeb2_inst2 = cat2.instance<foo_backend>("fleeb2");
+    auto fleeb2_inst = cat.instance(foo_backend::kind, "fleeb2");
+    auto fleeb2_inst2 = cat2.instance(foo_backend::kind, "fleeb2");
 
     EXPECT_EQ(typeid(*fleeb2_inst.mech.get()), typeid(*fleeb2_inst2.mech.get()));
 }
@@ -539,8 +546,8 @@ TEST(mechcat, import) {
 
     EXPECT_EQ(cat["fleeb2"], cat2["fake_fleeb2"]);
 
-    auto fleeb2_inst  = cat.instance<foo_backend>("fleeb2");
-    auto fleeb2_inst2 = cat2.instance<foo_backend>("fake_fleeb2");
+    auto fleeb2_inst  = cat.instance(foo_backend::kind, "fleeb2");
+    auto fleeb2_inst2 = cat2.instance(foo_backend::kind, "fake_fleeb2");
 
     EXPECT_EQ(typeid(*fleeb2_inst.mech.get()), typeid(*fleeb2_inst2.mech.get()));
 }
@@ -567,10 +574,10 @@ TEST(mechcat, import_collisions) {
             auto cat = build_fake_catalogue();
 
             mechanism_catalogue other;
-            other.add("fleeb", burble_info); // Note different mechanism info!
+            other.add("fleeb", mk_burble_info()); // Note different mechanism info!
 
             EXPECT_THROW(cat.import(other, ""), arb::duplicate_mechanism);
-            ASSERT_EQ(cat["fleeb"], fleeb_info);
+            ASSERT_EQ(cat["fleeb"], mk_fleeb_info());
         }
 
         // Collision derived vs base.
@@ -578,7 +585,7 @@ TEST(mechcat, import_collisions) {
             auto cat = build_fake_catalogue();
 
             mechanism_catalogue other;
-            other.add("fleeb2", burble_info);
+            other.add("fleeb2", mk_burble_info());
 
             auto fleeb2_info = cat["fleeb2"];
             EXPECT_THROW(cat.import(other, ""), arb::duplicate_mechanism);
@@ -590,13 +597,13 @@ TEST(mechcat, import_collisions) {
             auto cat = build_fake_catalogue();
 
             mechanism_catalogue other;
-            other.add("zonkers", fleeb_info);
+            other.add("zonkers", mk_fleeb_info());
             other.derive("fleeb", "zonkers", {{"plugh", 8.}});
-            ASSERT_FALSE(other["fleeb"]==fleeb_info);
+            ASSERT_FALSE(other["fleeb"]==mk_fleeb_info());
 
             ASSERT_FALSE(cat.has("zonkers"));
             EXPECT_THROW(cat.import(other, ""), arb::duplicate_mechanism);
-            EXPECT_EQ(cat["fleeb"], fleeb_info);
+            EXPECT_EQ(cat["fleeb"], mk_fleeb_info());
             EXPECT_FALSE(cat.has("zonkers"));
         }
 
@@ -605,7 +612,7 @@ TEST(mechcat, import_collisions) {
             auto cat = build_fake_catalogue();
 
             mechanism_catalogue other;
-            other.add("zonkers", fleeb_info);
+            other.add("zonkers", mk_fleeb_info());
             other.derive("fleeb2", "zonkers", {{"plugh", 8.}});
 
             auto fleeb2_info = cat["fleeb2"];
diff --git a/test/unit/test_probe.cpp b/test/unit/test_probe.cpp
index 8c465da2..6ea8ad80 100644
--- a/test/unit/test_probe.cpp
+++ b/test/unit/test_probe.cpp
@@ -19,13 +19,12 @@
 #include <arbor/util/pp_util.hpp>
 #include <arbor/version.hpp>
 #include <arborenv/gpu_env.hpp>
+#include <arbor/mechanism.hpp>
 
 #include "backends/event.hpp"
 #include "backends/multicore/fvm.hpp"
-#include "backends/multicore/mechanism.hpp"
 #ifdef ARB_GPU_ENABLED
 #include "backends/gpu/fvm.hpp"
-#include "backends/gpu/mechanism.hpp"
 #endif
 #include "fvm_lowered_cell_impl.hpp"
 #include "memory/gpu_wrappers.hpp"
diff --git a/test/unit/test_reduce_by_key.cu b/test/unit/test_reduce_by_key.cu
index 30998cd1..386dda75 100644
--- a/test/unit/test_reduce_by_key.cu
+++ b/test/unit/test_reduce_by_key.cu
@@ -3,7 +3,7 @@
 #include <algorithm>
 #include <vector>
 
-#include <backends/gpu/reduce_by_key.hpp>
+#include <arbor/gpu/reduce_by_key.hpp>
 
 #include "gpu_vector.hpp"
 
diff --git a/test/unit/test_simd.cpp b/test/unit/test_simd.cpp
index 94bf37e8..69cad808 100644
--- a/test/unit/test_simd.cpp
+++ b/test/unit/test_simd.cpp
@@ -98,6 +98,17 @@ struct simd_value: public ::testing::Test {};
 
 TYPED_TEST_CASE_P(simd_value);
 
+// Test agreement between simd::width(), simd::min_align() and corresponding type attributes.
+TYPED_TEST_P(simd_value, meta) {
+    using simd = TypeParam;
+    using scalar = typename simd::scalar_type;
+
+    ASSERT_EQ((int)simd::width, ::arb::simd::width(simd{}));
+    ASSERT_EQ(simd::min_align, ::arb::simd::min_align(simd{}));
+
+    EXPECT_LE(alignof(scalar), simd::min_align);
+}
+
 // Initialization and element access.
 TYPED_TEST_P(simd_value, elements) {
     using simd = TypeParam;
@@ -577,7 +588,7 @@ TYPED_TEST_P(simd_value, simd_array_cast) {
     }
 }
 
-REGISTER_TYPED_TEST_CASE_P(simd_value, elements, element_lvalue, copy_to_from, copy_to_from_masked, construct_masked, arithmetic, compound_assignment, comparison, mask_elements, mask_element_lvalue, mask_copy_to_from, mask_unpack, maths, simd_array_cast, reductions);
+REGISTER_TYPED_TEST_CASE_P(simd_value, meta, elements, element_lvalue, copy_to_from, copy_to_from_masked, construct_masked, arithmetic, compound_assignment, comparison, mask_elements, mask_element_lvalue, mask_copy_to_from, mask_unpack, maths, simd_array_cast, reductions);
 
 typedef ::testing::Types<
 
diff --git a/test/unit/test_synapses.cpp b/test/unit/test_synapses.cpp
index 264eae23..2fc897ae 100644
--- a/test/unit/test_synapses.cpp
+++ b/test/unit/test_synapses.cpp
@@ -7,11 +7,9 @@
 #include <arbor/constants.hpp>
 #include <arbor/mechcat.hpp>
 #include <arbor/mechanism.hpp>
-#include <arbor/mechanism_ppack.hpp>
 #include <arbor/cable_cell.hpp>
 
 #include "backends/multicore/fvm.hpp"
-#include "backends/multicore/mechanism.hpp"
 #include "util/maputil.hpp"
 #include "util/range.hpp"
 
@@ -21,13 +19,12 @@
 
 using namespace arb;
 
-using backend = ::arb::multicore::backend;
+using backend = multicore::backend;
 using shared_state = backend::shared_state;
 using value_type = backend::value_type;
 using size_type = backend::size_type;
 
-// Access to more mechanism protected data:
-ACCESS_BIND(::arb::mechanism_ppack* (::arb::concrete_mechanism<backend>::*)(), pp_ptr, &::arb::concrete_mechanism<backend>::ppack_ptr);
+ACCESS_BIND(arb_mechanism_ppack mechanism::*, get_ppack, &mechanism::ppack_);
 
 TEST(synapses, add_to_cell) {
     using namespace arb;
@@ -82,10 +79,10 @@ TEST(synapses, syn_basic_state) {
 
     value_type temp_K = *neuron_parameter_defaults.temperature_K;
 
-    auto expsyn = unique_cast<multicore::mechanism>(global_default_catalogue().instance<backend>("expsyn").mech);
+    auto expsyn = unique_cast<mechanism>(global_default_catalogue().instance(backend::kind, "expsyn").mech);
     ASSERT_TRUE(expsyn);
 
-    auto exp2syn = unique_cast<multicore::mechanism>(global_default_catalogue().instance<backend>("exp2syn").mech);
+    auto exp2syn = unique_cast<mechanism>(global_default_catalogue().instance(backend::kind, "exp2syn").mech);
     ASSERT_TRUE(exp2syn);
 
     std::vector<fvm_gap_junction> gj = {};
@@ -112,8 +109,8 @@ TEST(synapses, syn_basic_state) {
     std::vector<index_type> syn_mult(num_syn, 1);
     std::vector<value_type> syn_weight(num_syn, 1.0);
 
-    expsyn->instantiate(0, state, {}, {syn_cv, syn_weight, syn_mult});
-    exp2syn->instantiate(1, state, {}, {syn_cv, syn_weight, syn_mult});
+    state.instantiate(*expsyn,  0, {}, {syn_cv, syn_weight, syn_mult});
+    state.instantiate(*exp2syn, 1, {}, {syn_cv, syn_weight, syn_mult});
 
     // Parameters initialized to default values?
 
@@ -128,19 +125,18 @@ TEST(synapses, syn_basic_state) {
     EXPECT_TRUE(all_equal_to(mechanism_field(exp2syn, "B"),    NAN));
 
     // Current and voltage views correctly hooked up?
-
     const value_type* v_ptr;
-    v_ptr = (expsyn.get()->*pp_ptr)()->vec_v_;
+    v_ptr = (expsyn.get()->*get_ppack).vec_v;
     EXPECT_TRUE(all_equal_to(util::make_range(v_ptr, v_ptr+num_comp), -65.));
 
-    v_ptr = (exp2syn.get()->*pp_ptr)()->vec_v_;
+    v_ptr = (exp2syn.get()->*get_ppack).vec_v;
     EXPECT_TRUE(all_equal_to(util::make_range(v_ptr, v_ptr+num_comp), -65.));
 
     const value_type* i_ptr;
-    i_ptr = (expsyn.get()->*pp_ptr)()->vec_i_;
+    i_ptr = (expsyn.get()->*get_ppack).vec_i;
     EXPECT_TRUE(all_equal_to(util::make_range(i_ptr, i_ptr+num_comp), 1.));
 
-    i_ptr = (exp2syn.get()->*pp_ptr)()->vec_i_;
+    i_ptr = (exp2syn.get()->*get_ppack).vec_i;
     EXPECT_TRUE(all_equal_to(util::make_range(i_ptr, i_ptr+num_comp), 1.));
 
     // Initialize state then check g, A, B have been set to zero.
@@ -164,8 +160,15 @@ TEST(synapses, syn_basic_state) {
     state.deliverable_events.init(events);
     state.deliverable_events.mark_until_after(state.time);
 
-    expsyn->deliver_events();
-    exp2syn->deliver_events();
+    auto marked = state.deliverable_events.marked_events();
+    arb_deliverable_event_stream evts;
+    evts.n_streams = marked.n;
+    evts.begin     = marked.begin_offset;
+    evts.end       = marked.end_offset;
+    evts.events    = (arb_deliverable_event_data*) marked.ev_data; // FIXME(TH): This relies on bit-castability
+
+    expsyn->deliver_events(evts);
+    exp2syn->deliver_events(evts);
 
     using fvec = std::vector<fvm_value_type>;
 
diff --git a/test/unit/unit_test_catalogue.cpp b/test/unit/unit_test_catalogue.cpp
index f137d4e9..aef54594 100644
--- a/test/unit/unit_test_catalogue.cpp
+++ b/test/unit/unit_test_catalogue.cpp
@@ -46,13 +46,13 @@
 
 #ifndef ARB_GPU_ENABLED
 #define ADD_MECH(c, x)\
-c.add(#x, testing::mechanism_##x##_info());\
-c.register_implementation(#x, testing::make_mechanism_##x<multicore::backend>());
+c.add(#x, make_testing_##x());   \
+c.register_implementation(#x, std::make_unique<arb::mechanism>(make_testing_##x(), *make_testing_##x##_interface_multicore()));
 #else
 #define ADD_MECH(c, x)\
-c.add(#x, testing::mechanism_##x##_info());\
-c.register_implementation(#x, testing::make_mechanism_##x<multicore::backend>());\
-c.register_implementation(#x, testing::make_mechanism_##x<gpu::backend>());
+c.add(#x, make_testing_##x());                                      \
+c.register_implementation(#x, std::make_unique<arb::mechanism>(make_testing_##x(), *make_testing_##x##_interface_multicore())); \
+c.register_implementation(#x, std::make_unique<arb::mechanism>(make_testing_##x(), *make_testing_##x##_interface_gpu()));
 #endif
 
 using namespace arb;
-- 
GitLab