From 4a305b4f946ee857245f674f7b1d9e56929aaf84 Mon Sep 17 00:00:00 2001
From: Thorsten Hater <24411438+thorstenhater@users.noreply.github.com>
Date: Tue, 21 Jun 2022 14:31:40 +0200
Subject: [PATCH] Ionic Diffusion along the Morphology
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

# Introduction

Plasticity processes are mediated by signalling ions, eg Ca++, which are generated by
synapses upon reception of a spike. This adds a quantity `Xd` for any ion `X`
- initialised as `Xi`
- read from and written to by NMODL density and point mechanisms.
- propagates according to a diffusion law `âˆ‚_t Xd = âˆ‚_z c X âˆ‚_z Xd + iX/qi`.
- in contrast to `Xi` and `Xo` there's no buffering and the update in mechanisms
  occurs atomically (and at a different time)

More details can be found in the documentation.

For the future there are some low hanging optimisations
- per-ion conductivity for the matrix solver could be disabled if no diffusion is computed
- cable and diffusion solvers store duplicates of the solver state, could be merged

Closes #1651
---
 arbor/CMakeLists.txt                          |   2 +
 arbor/arbexcept.cpp                           |   6 +
 arbor/backends/gpu/diffusion.cu               | 274 +++++++++++
 arbor/backends/gpu/diffusion.hpp              |  40 ++
 arbor/backends/gpu/diffusion_state.hpp        | 462 ++++++++++++++++++
 arbor/backends/gpu/fine.cu                    |  65 +++
 arbor/backends/gpu/fine.hpp                   |  85 ++++
 arbor/backends/gpu/fvm.hpp                    |  10 +-
 arbor/backends/gpu/matrix_fine.cu             |  57 ---
 arbor/backends/gpu/matrix_fine.hpp            |  25 +-
 arbor/backends/gpu/matrix_state_fine.hpp      |  60 +--
 arbor/backends/gpu/shared_state.cpp           |  48 +-
 arbor/backends/gpu/shared_state.hpp           |  26 +-
 .../{matrix_state.hpp => cable_solver.hpp}    |  61 +--
 arbor/backends/multicore/diffusion_solver.hpp | 145 ++++++
 arbor/backends/multicore/fvm.hpp              |  16 +-
 arbor/backends/multicore/shared_state.cpp     |  52 +-
 arbor/backends/multicore/shared_state.hpp     |  30 +-
 arbor/cable_cell.cpp                          |   4 +
 arbor/cable_cell_param.cpp                    |  22 +-
 arbor/fvm_layout.cpp                          | 149 +++++-
 arbor/fvm_layout.hpp                          |  13 +
 arbor/fvm_lowered_cell_impl.hpp               |  57 ++-
 arbor/include/arbor/arbexcept.hpp             |   6 +
 arbor/include/arbor/cable_cell.hpp            |  20 +-
 arbor/include/arbor/cable_cell_param.hpp      |  20 +-
 arbor/include/arbor/mechanism_abi.h           |   7 +-
 arbor/include/arbor/mechinfo.hpp              |   2 +
 arbor/include/arbor/simd/avx.hpp              |   1 -
 arbor/include/arbor/simd/implbase.hpp         |   1 -
 arbor/include/arbor/simple_sampler.hpp        |   2 +-
 arbor/matrix.hpp                              |  78 +--
 arbor/mechinfo.cpp                            |   1 +
 arborio/cableio.cpp                           |   9 +-
 doc/concepts/decor.rst                        |  24 +
 doc/dev/axial-diff.tex                        | 203 ++++++++
 doc/fileformat/nmodl.rst                      |  19 +-
 example/CMakeLists.txt                        |   1 +
 example/diffusion/CMakeLists.txt              |   3 +
 example/diffusion/README.md                   |  18 +
 example/diffusion/diffusion.cpp               | 126 +++++
 mechanisms/CMakeLists.txt                     |   2 +-
 mechanisms/default/decay.mod                  |  18 +
 mechanisms/default/inject.mod                 |  20 +
 modcc/blocks.hpp                              |   3 +
 modcc/identifier.hpp                          |   4 +
 modcc/module.cpp                              |  32 +-
 modcc/printer/cprinter.cpp                    | 207 ++++----
 modcc/printer/gpuprinter.cpp                  |  68 +--
 modcc/printer/infoprinter.cpp                 |   3 +-
 modcc/printer/printerutil.cpp                 |  12 +
 modcc/printer/printerutil.hpp                 |   1 +
 python/cable_probes.cpp                       |  16 +
 python/cells.cpp                              |  14 +-
 python/example/diffusion.py                   |  68 +++
 test/unit/CMakeLists.txt                      |   1 +
 test/unit/test_diffusion.cpp                  | 288 +++++++++++
 test/unit/test_fvm_layout.cpp                 |   2 +-
 test/unit/test_fvm_lowered.cpp                |  14 +-
 test/unit/test_matrix.cpp                     |  44 +-
 test/unit/test_matrix_cpuvsgpu.cpp            |   4 +-
 test/unit/test_s_expr.cpp                     |   3 +
 62 files changed, 2572 insertions(+), 502 deletions(-)
 create mode 100644 arbor/backends/gpu/diffusion.cu
 create mode 100644 arbor/backends/gpu/diffusion.hpp
 create mode 100644 arbor/backends/gpu/diffusion_state.hpp
 create mode 100644 arbor/backends/gpu/fine.cu
 create mode 100644 arbor/backends/gpu/fine.hpp
 rename arbor/backends/multicore/{matrix_state.hpp => cable_solver.hpp} (73%)
 create mode 100644 arbor/backends/multicore/diffusion_solver.hpp
 create mode 100644 doc/dev/axial-diff.tex
 create mode 100644 example/diffusion/CMakeLists.txt
 create mode 100644 example/diffusion/README.md
 create mode 100644 example/diffusion/diffusion.cpp
 create mode 100644 mechanisms/default/decay.mod
 create mode 100644 mechanisms/default/inject.mod
 create mode 100644 python/example/diffusion.py
 create mode 100644 test/unit/test_diffusion.cpp

diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 7a1fb7a4..569a27ec 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -71,6 +71,8 @@ if(ARB_WITH_GPU)
         backends/gpu/threshold_watcher.cu
         backends/gpu/matrix_assemble.cu
         backends/gpu/matrix_fine.cu
+        backends/gpu/diffusion.cu
+        backends/gpu/fine.cu
         backends/gpu/matrix_solve.cu
         backends/gpu/multi_event_stream.cpp
         backends/gpu/multi_event_stream.cu
diff --git a/arbor/arbexcept.cpp b/arbor/arbexcept.cpp
index 37cb7a92..520883fe 100644
--- a/arbor/arbexcept.cpp
+++ b/arbor/arbexcept.cpp
@@ -88,6 +88,12 @@ no_such_parameter::no_such_parameter(const std::string& mech_name, const std::st
     param_name(param_name)
 {}
 
+illegal_diffusive_mechanism::illegal_diffusive_mechanism(const std::string& m, const std::string& i):
+        arbor_exception(pprintf("mechanism '{}' accesses diffusive value of ion '{}', but diffusivity is disabled for it.", m, i)),
+        mech{m},
+        ion{i}
+{}
+
 invalid_parameter_value::invalid_parameter_value(const std::string& mech_name, const std::string& param_name, double value):
     arbor_exception(pprintf("invalid parameter value for mechanism {} parameter {}: {}", mech_name, param_name, value)),
     mech_name(mech_name),
diff --git a/arbor/backends/gpu/diffusion.cu b/arbor/backends/gpu/diffusion.cu
new file mode 100644
index 00000000..a4f129bc
--- /dev/null
+++ b/arbor/backends/gpu/diffusion.cu
@@ -0,0 +1,274 @@
+#include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
+
+#include "matrix_common.hpp"
+#include "diffusion.hpp"
+
+namespace arb {
+namespace gpu {
+namespace kernels {
+/// GPU implementation of Hines matrix assembly.
+/// Fine layout.
+/// For a given time step size dt:
+///     - use the precomputed alpha and alpha_d values to construct the diagonal
+///       and off diagonal of the symmetric Hines matrix.
+///     - compute the RHS of the linear system to solve.
+template <typename T, typename I>
+__global__
+void assemble_diffusion(
+        T* __restrict__ const d,
+        T* __restrict__ const rhs,
+        const T* __restrict__ const invariant_d,
+        const T* __restrict__ const concentration,
+        const T* __restrict__ const voltage,
+        const T* __restrict__ const current,
+        const T q,
+        const T* __restrict__ const conductivity,
+        const T* __restrict__ const area,
+        const I* __restrict__ const cv_to_intdom,
+        const T* __restrict__ const dt_intdom,
+        const I* __restrict__ const perm,
+        unsigned n) {
+    const unsigned tid = threadIdx.x + blockDim.x*blockIdx.x;
+    if (tid < n) {
+        const auto dt = dt_intdom[cv_to_intdom[tid]];
+        const auto p = dt > 0;
+        const auto pid = perm[tid];
+        auto u = voltage[tid];        // mV
+        auto g = conductivity[tid];   // ÂµS
+        auto J = current[tid];        // A/m^2
+        auto A = 1e-3*area[tid];      // 1e-9Â·mÂ²
+        auto X = concentration[tid];  // mM
+        // conversion from current density to concentration change
+        // using Faraday's constant
+        auto F = A/(q*96.485332);
+
+        d[pid]   = p ? (1e-3/dt   + F*g + invariant_d[tid]) : 0;
+        rhs[pid] = p ? (1e-3/dt*X + F*(u*g - J))            : concentration[tid];
+    }
+}
+
+/// GPU implementation of Hines Matrix solver.
+/// Fine-grained tree based solver.
+/// Each block solves a set of matricesb iterating over the levels of matrix
+/// and perfoming a backward and forward substitution. On each level one thread
+/// gets assigned to one branch on this level of a matrix and solves and
+/// performs the substitution. Afterwards all threads continue on the next
+/// level.
+/// To avoid idle threads, one should try that on each level, there is a similar
+/// number of branches.
+template <typename T>
+__global__
+void solve_diffusion(
+    T* __restrict__ const rhs,
+    T* __restrict__ const d,
+    const T* __restrict__ const u,
+    const level_metadata* __restrict__ const level_meta,
+    const fvm_index_type* __restrict__ const level_lengths,
+    const fvm_index_type* __restrict__ const level_parents,
+    const fvm_index_type* __restrict__ const block_index,
+    const fvm_index_type* __restrict__ const num_matrix) // number of packed matrices = number of cells
+{
+    const auto tid = threadIdx.x;
+    const auto bid = blockIdx.x;
+
+    const auto first_level = block_index[bid];
+    const auto num_levels  = block_index[bid + 1] - first_level;
+
+    const auto block_level_meta = &level_meta[first_level];
+
+    // backward substitution
+
+    for (unsigned l=0; l<num_levels-1; ++l) {
+        // Metadata for this level and the next level
+        const auto& lvl_meta = block_level_meta[l];
+        const auto& next_lvl_meta = block_level_meta[l+1];
+
+        // Addresses of the first elements of level_lengths and level_parents
+        // that belong to this level
+        const auto lvl_lengths = level_lengths + lvl_meta.level_data_index;
+        const auto lvl_parents = level_parents + lvl_meta.level_data_index;
+
+        const unsigned width = lvl_meta.num_branches;
+
+
+        // Perform backward substitution for each branch on this level.
+        // One thread per branch.
+        if (tid < width) {
+            const unsigned len = lvl_lengths[tid];
+            unsigned pos = lvl_meta.matrix_data_index + tid;
+
+            // Zero diagonal term implies dt==0; just leave rhs (for whole matrix)
+            // alone in that case.
+
+            // Each cell has a different `dt`, because we choose time step size
+            // according to when the next event is arriving at a cell. So, some
+            // cells require more time steps than others, but we have to solve
+            // all the matrices at the same time. When a cell finishes, we put a
+            // `0` on the diagonal to mark that it should not be solved for.
+            if (d[pos]!=0) {
+                // each branch perform substitution
+                for (unsigned i=0; i<len-1; ++i) {
+                    const unsigned next_pos = pos + width;
+                    const auto d_next = d[next_pos];
+                    const auto rhs_next = rhs[next_pos];
+                    const T factor = -u[pos]/d[pos];
+                    d[next_pos] = fma(factor, u[pos], d_next);
+                    rhs[next_pos] = fma(factor, rhs[pos], rhs_next);
+                    pos = next_pos;
+                }
+                // Update d and rhs at the parent node of this branch.
+                // A parent may have more than one contributing to it, so we use
+                // atomic updates to avoid races conditions.
+                const unsigned parent_index = next_lvl_meta.matrix_data_index;
+                const unsigned p = parent_index + lvl_parents[tid];
+
+                const T factor = -u[pos] / d[pos];
+                gpu_atomic_add(d + p, factor*u[pos]);
+                gpu_atomic_add(rhs + p, factor*rhs[pos]);
+            }
+        }
+        __syncthreads();
+    }
+
+    // Solve the root
+    {
+        // The levels are sorted such that the root is the last level
+        const auto& last_lvl_meta = block_level_meta[num_levels-1];
+        const auto lvl_lengths = level_lengths + last_lvl_meta.level_data_index;
+
+        const unsigned width = num_matrix[bid];
+
+        if (tid < width) {
+            const unsigned len = lvl_lengths[tid];
+            unsigned pos = last_lvl_meta.matrix_data_index + tid;
+
+            if (d[pos]!=0) {
+                // backward
+                for (unsigned i=0; i<len-1; ++i) {
+                    const unsigned next_pos = pos + width;
+                    const T factor = -u[pos] / d[pos];
+                    const auto rhs_next = rhs[next_pos];
+                    const auto d_next = d[next_pos];
+                    d[next_pos]   = fma(factor, u[pos], d_next);
+                    rhs[next_pos] = fma(factor, rhs[pos], rhs_next);
+                    pos = next_pos;
+                }
+
+                auto rhsp = rhs[pos] / d[pos];
+                rhs[pos] = rhsp;
+                pos -= width;
+
+                // forward
+                for (unsigned i=0; i<len-1; ++i) {
+                    rhsp = rhs[pos] - u[pos]*rhsp;
+                    rhsp /= d[pos];
+                    rhs[pos] = rhsp;
+                    pos -= width;
+                }
+            }
+        }
+    }
+
+    // forward substitution
+
+    // take great care with loop limits decrementing unsigned counter l
+    for (unsigned l=num_levels-1; l>0; --l) {
+        const auto& lvl_meta = block_level_meta[l-1];
+
+        // Addresses of the first elements of level_lengths and level_parents
+        // that belong to this level
+        const auto lvl_lengths = level_lengths + lvl_meta.level_data_index;
+        const auto lvl_parents = level_parents + lvl_meta.level_data_index;
+
+        const unsigned width = lvl_meta.num_branches;
+        const unsigned parent_index = block_level_meta[l].matrix_data_index;
+
+        __syncthreads();
+
+        // Perform forward-substitution for each branch on this level.
+        // One thread per branch.
+        if (tid < width) {
+            // Find the index of the first node in this branch.
+            const unsigned len = lvl_lengths[tid];
+            unsigned pos = lvl_meta.matrix_data_index + (len-1)*width + tid;
+
+            if (d[pos]!=0) {
+                // Load the rhs value for the parent node of this branch.
+                const unsigned p = parent_index + lvl_parents[tid];
+                T rhsp = rhs[p];
+                // each branch perform substitution
+                for (unsigned i=0; i<len; ++i) {
+                    rhsp = rhs[pos] - u[pos]*rhsp;
+                    rhsp /= d[pos];
+                    rhs[pos] = rhsp;
+                    pos -= width;
+                }
+            }
+        }
+    }
+}
+
+} // namespace kernels
+
+ARB_ARBOR_API void assemble_diffusion(
+    fvm_value_type* d,
+    fvm_value_type* rhs,
+    const fvm_value_type* invariant_d,
+    const fvm_value_type* concentration,
+    const fvm_value_type* voltage,
+    const fvm_value_type* current,
+    fvm_value_type q,
+    const fvm_value_type* conductivity,
+    const fvm_value_type* area,
+    const fvm_index_type* cv_to_intdom,
+    const fvm_value_type* dt_intdom,
+    const fvm_index_type* perm,
+    unsigned n)
+{
+    const unsigned block_dim = 128;
+    const unsigned num_blocks = impl::block_count(n, block_dim);
+
+    kernels::assemble_diffusion<<<num_blocks, block_dim>>>(
+        d, rhs, invariant_d, concentration, voltage, current, q, conductivity, area,
+        cv_to_intdom, dt_intdom, perm, n);
+}
+
+// Example:
+//
+//         block 0                  block 1              block 2
+// .~~~~~~~~~~~~~~~~~~.  .~~~~~~~~~~~~~~~~~~~~~~~~.  .~~~~~~~~~~~ ~ ~
+//
+//  L0 \  /                                           L5    \  /
+//      \/                                                   \/
+//  L1   \   /   \   /    L3 \   /   \ | /   \   /    L6 \   /  . . .
+//        \ /     \ /         \ /     \|/     \ /         \ /
+//  L2     |       |      L4   |       |       |      L7   |
+//         |       |           |       |       |           |
+//
+// levels       = [L0, L1, L2, L3, L4, L5, L6, L7, ... ]
+// block_index  = [0, 3, 5, 8, ...]
+// num_levels   = [3, 2, 3, ...]
+// num_cells    = [2, 3, ...]
+// num_blocks   = level_start.size() - 1 = num_levels.size() = num_cells.size()
+ARB_ARBOR_API void solve_diffusion(
+    fvm_value_type* rhs,
+    fvm_value_type* d,                     // diagonal values
+    const fvm_value_type* u,               // upper diagonal (and lower diagonal as the matrix is SPD)
+    const level_metadata* level_meta,      // information pertaining to each level
+    const fvm_index_type* level_lengths,   // lengths of branches of every level concatenated
+    const fvm_index_type* level_parents,   // parents of branches of every level concatenated
+    const fvm_index_type* block_index,     // start index into levels for each gpu block
+    fvm_index_type* num_cells,             // the number of cells packed into this single matrix
+    fvm_index_type* padded_size,           // length of rhs, d, u, including padding
+    unsigned num_blocks,                   // number of blocks
+    unsigned blocksize)                    // size of each block
+{
+    kernels::solve_diffusion<<<num_blocks, blocksize>>>(
+        rhs, d, u, level_meta, level_lengths, level_parents, block_index,
+        num_cells);
+}
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/diffusion.hpp b/arbor/backends/gpu/diffusion.hpp
new file mode 100644
index 00000000..f07c0539
--- /dev/null
+++ b/arbor/backends/gpu/diffusion.hpp
@@ -0,0 +1,40 @@
+#include <arbor/fvm_types.hpp>
+#include <arbor/export.hpp>
+
+#include <ostream>
+
+#include "fine.hpp"
+
+namespace arb {
+namespace gpu {
+
+ARB_ARBOR_API void assemble_diffusion(
+    fvm_value_type* d,
+    fvm_value_type* rhs,
+    const fvm_value_type* invariant_d,
+    const fvm_value_type* concentration,
+    const fvm_value_type* voltage,
+    const fvm_value_type* current,
+    const fvm_value_type q,
+    const fvm_value_type* conductivity,
+    const fvm_value_type* area,
+    const fvm_index_type* cv_to_intdom,
+    const fvm_value_type* dt_intdom,
+    const fvm_index_type* perm,
+    unsigned n);
+
+ARB_ARBOR_API void solve_diffusion(
+    fvm_value_type* rhs,
+    fvm_value_type* d,                     // diagonal values
+    const fvm_value_type* u,               // upper diagonal (and lower diagonal as the matrix is SPD)
+    const level_metadata* level_meta,      // information pertaining to each level
+    const fvm_index_type* level_lengths,   // lengths of branches of every level concatenated
+    const fvm_index_type* level_parents,   // parents of branches of every level concatenated
+    const fvm_index_type* block_index,     // start index (exclusive) into levels for each gpu block
+    fvm_index_type* num_cells,             // the number of cells packed into this single matrix
+    fvm_index_type* padded_size,           // length of rhs, d, u, including padding
+    unsigned num_blocks,                   // number of blocks
+    unsigned blocksize);                   // size of each block
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/diffusion_state.hpp b/arbor/backends/gpu/diffusion_state.hpp
new file mode 100644
index 00000000..cb18bcb9
--- /dev/null
+++ b/arbor/backends/gpu/diffusion_state.hpp
@@ -0,0 +1,462 @@
+#pragma once
+
+#include <cstring>
+
+#include <vector>
+#include <type_traits>
+
+#include <arbor/common_types.hpp>
+
+#include "memory/memory.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
+#include "tree.hpp"
+#include "diffusion.hpp"
+#include "forest.hpp"
+#include "fine.hpp"
+
+namespace arb {
+namespace gpu {
+
+template <typename T, typename I>
+struct diffusion_state {
+public:
+    using value_type = T;
+    using size_type = I;
+
+    using array      = memory::device_vector<value_type>;
+    using view       = typename array::view_type;
+    using const_view = typename array::const_view_type;
+    using iarray     = memory::device_vector<size_type>;
+
+    using metadata_array = memory::device_vector<level_metadata>;
+
+    // Maps control volume to integration domain
+    iarray cv_to_intdom;
+
+    array d;     // [Î¼S]
+    array u;     // [Î¼S]
+    array rhs;   // [nA]
+
+    // Required for matrix assembly
+    array cv_area;             // [Î¼m^2]
+
+    // Invariant part of the matrix diagonal
+    array invariant_d;         // [Î¼S]
+
+    // Solution in unpacked format
+    array solution_;
+
+    // Maximum number of branches in each level per block
+    unsigned max_branches_per_level;
+
+    // Number of rows in matrix
+    unsigned matrix_size;
+
+    // Number of cells
+    unsigned num_cells;
+    iarray num_cells_in_block;
+
+    // End of the data of each level
+    iarray data_partition;
+    std::size_t data_size;
+
+    // Metadata for each level
+    // Includes indices into d, u, rhs and level_lengths, level_parents
+    metadata_array level_meta;
+
+    // Stores the lengths (number of compartments) of the branches of each
+    // level sequentially in memory. Indexed by level_metadata::level_data_index
+    iarray level_lengths;
+
+    // Stores the indices of the parent of each of the branches in each
+    // level sequentially in memory. Indexed by level_metadata::level_data_index
+    iarray level_parents;
+
+    // Stores the indices to the first level belonging to each block
+    // block b owns { levels[block_index[b]], ..., levels[block_index[b+1] - 1] }
+    // there is an additional entry at the end of the vector to make the above
+    // computation safe
+    iarray block_index;
+
+    // Permutation from front end storage to packed storage
+    //      `solver_format[perm[i]] = external_format[i]`
+    iarray perm;
+
+
+    diffusion_state() = default;
+
+    // constructor for fine-grained matrix.
+    diffusion_state(const std::vector<size_type>& p,
+                    const std::vector<size_type>& cell_cv_divs,
+                    const std::vector<value_type>& face_diffusivity,
+                    const std::vector<value_type>& area,
+                    const std::vector<size_type>& cell_intdom) {
+        using util::make_span;
+        constexpr unsigned npos = unsigned(-1);
+
+        max_branches_per_level = 128;
+
+        num_cells = cell_cv_divs.size()-1;
+
+        forest trees(p, cell_cv_divs);
+        trees.optimize();
+
+        // Now distribute the cells into gpu blocks.
+        // While the total number of branches on each level of theses cells in a
+        // block are less than `max_branches_per_level` we add more cells. If
+        // one block is full, we start a new gpu block.
+
+        unsigned current_block = 0;
+        std::vector<unsigned> block_num_branches_per_depth;
+        std::vector<unsigned> block_ix(num_cells);
+
+        // Accumulate num cells in block in a temporary vector to be copied to the device
+        std::vector<size_type> temp_ncells_in_block;
+        temp_ncells_in_block.resize(1, 0);
+
+        // branch_map = branch_maps[block] is a branch map for each gpu block
+        // branch_map[depth] is list of branches is this level
+        // each branch branch_map[depth][i] has
+        // {id, parent_id, start_idx, parent_idx, length}
+        std::vector<std::vector<std::vector<branch>>> branch_maps;
+        branch_maps.resize(1);
+
+        unsigned num_branches = 0u;
+        for (auto c: make_span(0u, num_cells)) {
+            auto cell_start = cell_cv_divs[c];
+            auto cell_tree = trees.branch_tree(c);
+            auto fine_tree = trees.compartment_tree(c);
+            auto branch_starts  = trees.branch_offsets(c);
+            auto branch_lengths = trees.branch_lengths(c);
+
+            auto depths = depth_from_root(cell_tree);
+
+            // calculate the number of levels in this cell
+            auto cell_num_levels = util::max_value(depths)+1u;
+
+            auto num_cell_branches = cell_tree.num_segments();
+
+            // count number of branches per level
+            std::vector<unsigned> cell_num_branches_per_depth(cell_num_levels, 0u);
+            for (auto i: make_span(num_cell_branches)) {
+                cell_num_branches_per_depth[depths[i]] += 1;
+            }
+            // resize the block levels if neccessary
+            if (cell_num_levels > block_num_branches_per_depth.size()) {
+                block_num_branches_per_depth.resize(cell_num_levels, 0);
+            }
+
+
+            // check if we can fit the current cell into the last gpu block
+            bool fits_current_block = true;
+            for (auto i: make_span(cell_num_levels)) {
+                unsigned new_branches_per_depth =
+                    block_num_branches_per_depth[i]
+                    + cell_num_branches_per_depth[i];
+                if (new_branches_per_depth > max_branches_per_level) {
+                    fits_current_block = false;
+                }
+            }
+            if (fits_current_block) {
+                // put the cell into current block
+                block_ix[c] = current_block;
+                temp_ncells_in_block[block_ix[c]] += 1;
+                // and increment counter
+                for (auto i: make_span(cell_num_levels)) {
+                    block_num_branches_per_depth[i] += cell_num_branches_per_depth[i];
+                }
+            } else {
+                // otherwise start a new block
+                block_ix[c] = current_block + 1;
+                temp_ncells_in_block.push_back(1);
+                branch_maps.resize(branch_maps.size()+1);
+                current_block += 1;
+                // and reset counter
+                block_num_branches_per_depth = cell_num_branches_per_depth;
+                for (auto num: block_num_branches_per_depth) {
+                    if (num > max_branches_per_level) {
+                        throw std::runtime_error(
+                            "Could not fit " + std::to_string(num)
+                            + " branches in a block of size "
+                            + std::to_string(max_branches_per_level));
+                    }
+                }
+            }
+            num_cells_in_block = memory::make_const_view(temp_ncells_in_block);
+
+
+            // the branch map for the block in which we put the cell
+            // maps levels to a list of branches in that level
+            auto& branch_map = branch_maps[block_ix[c]];
+
+            // build branch_map:
+            // branch_map[i] is a list of branch meta-data for branches with depth i
+            if (cell_num_levels > branch_map.size()) {
+                branch_map.resize(cell_num_levels);
+            }
+            for (auto i: make_span(num_cell_branches)) {
+                branch b;
+                auto depth = depths[i];
+                // give the branch a unique id number
+                b.id = i + num_branches;
+                // take care to mark branches with no parents with npos
+                b.parent_id = cell_tree.parent(i)==cell_tree.no_parent ?
+                    npos : cell_tree.parent(i) + num_branches;
+                b.start_idx = branch_starts[i] + cell_start;
+                b.length = branch_lengths[i];
+                b.parent_idx = p[b.start_idx] + cell_start;
+                branch_map[depth].push_back(b);
+            }
+            // total number of branches of all cells
+            num_branches += num_cell_branches;
+        }
+
+        for (auto& branch_map: branch_maps) {
+            // reverse the levels
+            std::reverse(branch_map.begin(), branch_map.end());
+
+            // Sort all branches on each level in descending order of length.
+            // Later, branches will be partitioned over thread blocks, and we will
+            // take advantage of the fact that the first branch in a partition is
+            // the longest, to determine how to pack all the branches in a block.
+            for (auto& branches: branch_map) {
+                util::sort(branches);
+            }
+        }
+
+        // The branches generated above have been assigned contiguous ids.
+        // Now generate a vector of branch_loc, one for each branch, that
+        // allow for quick lookup by id of the level and index within a level
+        // of each branch.
+        // This information is only used in the generation of the levels below.
+
+        // Helper for recording location of a branch once packed.
+        struct branch_loc {
+            unsigned block; // the gpu block containing the cell to which the branch blongs to
+            unsigned level; // the level containing the branch
+            unsigned index; // the index of the branch on that level
+        };
+
+        // branch_locs will hold the location information for each branch.
+        std::vector<branch_loc> branch_locs(num_branches);
+        for (unsigned b: make_span(branch_maps.size())) {
+            const auto& branch_map = branch_maps[b];
+            for (unsigned l: make_span(branch_map.size())) {
+                const auto& branches = branch_map[l];
+
+                // Record the location information
+                for (auto i=0u; i<branches.size(); ++i) {
+                    const auto& branch = branches[i];
+                    branch_locs[branch.id] = {b, l, i};
+                }
+            }
+        }
+
+        // Construct description for the set of branches on each level for each
+        // block. This is later used to sort the branches in each block in each
+        // level into conineous chunks which are easier to read for the gpu
+        // kernel.
+
+        // Accumulate metadata about the levels, level lengths, level parents,
+        // data_partition and block indices in temporary vectors to be copied to the device
+        std::vector<level_metadata> temp_meta;
+        std::vector<size_type> temp_lengths, temp_parents, temp_data_part, temp_block_index;
+
+        temp_block_index.reserve(branch_maps.size() + 1);
+        temp_block_index.push_back(0);
+        temp_data_part.reserve(branch_maps.size());
+
+        // Offset into the packed data format, used to apply permutation on data
+        auto pos = 0u;
+        // Offset into the packed data format, used to access level_lengths and level_parents
+        auto data_start = 0u;
+        for (const auto& branch_map: branch_maps) {
+            for (const auto& lvl_branches: branch_map) {
+
+                level_metadata lvl_meta;
+                std::vector<size_type> lvl_lengths(lvl_branches.size()), lvl_parents(lvl_branches.size());
+
+                lvl_meta.num_branches = lvl_branches.size();
+                lvl_meta.matrix_data_index = pos;
+                lvl_meta.level_data_index = data_start;
+
+                // The length of the first branch is the upper bound on branch
+                // length as they are sorted in descending order of length.
+                lvl_meta.max_length = lvl_branches.front().length;
+
+                unsigned bi = 0u;
+                for (const auto& b: lvl_branches) {
+                    // Set the length of the branch.
+                    lvl_lengths[bi] = b.length;
+
+                    // Set the parent indexes. During the forward and backward
+                    // substitution phases each branch accesses the last node in
+                    // its parent branch.
+                    auto index = b.parent_id==npos? npos: branch_locs[b.parent_id].index;
+                    lvl_parents[bi] = index;
+                    ++bi;
+                }
+
+                data_start+= lvl_meta.num_branches;
+                pos += lvl_meta.max_length*lvl_meta.num_branches;
+
+                temp_meta.push_back(std::move(lvl_meta));
+                util::append(temp_lengths, lvl_lengths);
+                util::append(temp_parents, lvl_parents);
+            }
+            auto prev_end = temp_block_index.back();
+            temp_block_index.push_back(prev_end + branch_map.size());
+            temp_data_part.push_back(pos);
+        }
+        data_size = pos;
+
+        // set matrix state
+        matrix_size = p.size();
+
+        // form the permutation index used to reorder vectors to/from the
+        // ordering used by the fine grained matrix storage.
+        std::vector<size_type> perm_tmp(matrix_size);
+        for (auto block: make_span(branch_maps.size())) {
+            const auto& branch_map = branch_maps[block];
+            const auto first_level = temp_block_index[block];
+
+            for (auto i: make_span(temp_block_index[block + 1] - first_level)) {
+                const auto& l = temp_meta[first_level + i];
+                for (auto j: make_span(l.num_branches)) {
+                    const auto& b = branch_map[i][j];
+                    auto j_lvl_length = temp_lengths[l.level_data_index + j];
+                    auto to = l.matrix_data_index + j + l.num_branches*(j_lvl_length-1);
+                    auto from = b.start_idx;
+                    for (auto k: make_span(b.length)) {
+                        perm_tmp[from + k] = to - k*l.num_branches;
+                    }
+                }
+            }
+        }
+
+        level_meta     = memory::make_const_view(temp_meta);
+        level_lengths  = memory::make_const_view(temp_lengths);
+        level_parents  = memory::make_const_view(temp_parents);
+        data_partition = memory::make_const_view(temp_data_part);
+        block_index    = memory::make_const_view(temp_block_index);
+
+        auto perm_balancing = trees.permutation();
+
+        // apppy permutation form balancing
+        std::vector<size_type> perm_tmp2(matrix_size);
+        for (auto i: make_span(matrix_size)) {
+             // This is CORRECT! verified by using the ring benchmark with root=0 (where the permutation is actually not id)
+            perm_tmp2[perm_balancing[i]] = perm_tmp[i];
+        }
+        // copy permutation to device memory
+        perm = memory::make_const_view(perm_tmp2);
+
+
+        // Summary of fields and their storage format:
+        //
+        // face_conductance : not needed, don't store
+        // d, u, rhs        : packed
+        // invariant_d      : flat
+        // cv_to_cell       : flat
+        // area             : flat
+
+        // the invariant part of d is stored in in flat form
+        std::vector<value_type> invariant_d_tmp(matrix_size, 0);
+        std::vector<value_type> temp_u_shuffled(matrix_size, 0);
+        array u_shuffled;
+        for (auto i: make_span(1u, matrix_size)) {
+            auto gij = face_diffusivity[i];
+
+            temp_u_shuffled[i] = -gij;
+            invariant_d_tmp[i] += gij;
+            if (p[i]!=-1) {
+                invariant_d_tmp[p[i]] += gij;
+            }
+        }
+        u_shuffled = memory::make_const_view(temp_u_shuffled);
+
+        // the matrix components u, d and rhs are stored in packed form
+        auto nan = std::numeric_limits<double>::quiet_NaN();
+        d   = array(data_size, nan);
+        u   = array(data_size, nan);
+        rhs = array(data_size, nan);
+
+        // transform u_shuffled values into packed u vector.
+        flat_to_packed(u_shuffled, u);
+
+        // the invariant part of d and cv_area are in flat form
+        cv_area = memory::make_const_view(area);
+        invariant_d = memory::make_const_view(invariant_d_tmp);
+
+        // calculate the cv -> cell mappings
+        std::vector<size_type> cv_to_cell_tmp(matrix_size);
+        size_type ci = 0;
+        for (auto cv_span: util::partition_view(cell_cv_divs)) {
+            util::fill(util::subrange_view(cv_to_cell_tmp, cv_span), ci);
+            ++ci;
+        }
+        std::vector<size_type> cv_to_intdom_tmp(matrix_size);
+        for (auto i = 0ul; i < cv_to_cell_tmp.size(); ++i) {
+            cv_to_intdom_tmp[i] = cell_intdom[cv_to_cell_tmp[i]];
+        }
+        cv_to_intdom = memory::make_const_view(cv_to_intdom_tmp);
+    }
+
+    // Assemble the matrix
+    // Afterwards the diagonal and RHS will have been set given dt, voltage, current, and conductivity.
+    //   dt_intdom [ms] (per integration domain)
+    //   voltage [mV]
+    //   current density [A/mÂ²]
+    //   conductivity [kS/mÂ²]
+    void assemble(const_view dt_intdom, const_view concentration, const_view voltage, const_view current, const_view conductivity, fvm_value_type q) {
+        assemble_diffusion(d.data(),
+                           rhs.data(),
+                           invariant_d.data(),
+                           concentration.data(),
+                           voltage.data(),
+                           current.data(),
+                           q,
+                           conductivity.data(),
+                           cv_area.data(),
+                           cv_to_intdom.data(),
+                           dt_intdom.data(),
+                           perm.data(),
+                           size());
+    }
+
+    void solve(array& to) {
+        solve_diffusion(rhs.data(),
+                        d.data(),
+                        u.data(),
+                        level_meta.data(),
+                        level_lengths.data(),
+                        level_parents.data(),
+                        block_index.data(),
+                        num_cells_in_block.data(),
+                        data_partition.data(),
+                        num_cells_in_block.size(),
+                        max_branches_per_level);
+        // unpermute the solution
+        packed_to_flat(rhs, to);
+    }
+
+    std::size_t size() const { return matrix_size; }
+
+private:
+    void flat_to_packed(const array& from, array& to ) {
+        arb_assert(from.size()==matrix_size);
+        arb_assert(to.size()==data_size);
+        scatter(from.data(), to.data(), perm.data(), perm.size());
+    }
+
+    void packed_to_flat(const array& from, array& to ) {
+        arb_assert(from.size()==data_size);
+        arb_assert(to.size()==matrix_size);
+        gather(from.data(), to.data(), perm.data(), perm.size());
+    }
+};
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/fine.cu b/arbor/backends/gpu/fine.cu
new file mode 100644
index 00000000..c695ba53
--- /dev/null
+++ b/arbor/backends/gpu/fine.cu
@@ -0,0 +1,65 @@
+#include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
+
+#include "fine.hpp"
+
+namespace arb {
+namespace gpu {
+namespace kernels {
+// to[i] = from[p[i]]
+template <typename T, typename I>
+__global__
+void gather(const T* __restrict__ const from,
+            T* __restrict__ const to,
+            const I* __restrict__ const p,
+            unsigned n) {
+    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
+
+    if (i<n) {
+        to[i] = from[p[i]];
+    }
+}
+
+// to[p[i]] = from[i]
+template <typename T, typename I>
+__global__
+void scatter(const T* __restrict__ const from,
+             T* __restrict__ const to,
+             const I* __restrict__ const p,
+             unsigned n) {
+    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
+
+    if (i<n) {
+        to[p[i]] = from[i];
+    }
+}
+
+} // namespace kernels
+
+ARB_ARBOR_API void gather(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n)
+{
+    constexpr unsigned blockdim = 128;
+    const unsigned griddim = impl::block_count(n, blockdim);
+
+    kernels::gather<<<griddim, blockdim>>>(from, to, p, n);
+}
+
+ARB_ARBOR_API void scatter(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n)
+{
+    constexpr unsigned blockdim = 128;
+    const unsigned griddim = impl::block_count(n, blockdim);
+
+    kernels::scatter<<<griddim, blockdim>>>(from, to, p, n);
+}
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/fine.hpp b/arbor/backends/gpu/fine.hpp
new file mode 100644
index 00000000..540bcea8
--- /dev/null
+++ b/arbor/backends/gpu/fine.hpp
@@ -0,0 +1,85 @@
+#pragma once
+
+#include <iostream>
+
+#include <arbor/export.hpp>
+#include <arbor/fvm_types.hpp>
+
+namespace arb {
+namespace gpu {
+
+
+// Helper type for branch meta data in setup phase of fine grained
+// matrix storage+solver.
+//
+//      leaf
+//      .
+//      .
+//      .
+//  -   *
+//      |
+//  l   *
+//  e   |
+//  n   *
+//  g   |
+//  t   *
+//  h   |
+//  -   start_idx
+//      |
+//      parent_idx
+//      |
+//      .
+//      .
+//      .
+//      root
+struct branch {
+    unsigned id;         // branch id
+    unsigned parent_id;  // parent branch id
+    unsigned parent_idx; //
+    unsigned start_idx;  // the index of the first node in the input parent index
+    unsigned length;     // the number of nodes in the branch
+};
+
+// order branches by:
+//  - descending length
+//  - ascending id
+inline
+bool operator<(const branch& lhs, const branch& rhs) {
+    if (lhs.length!=rhs.length) {
+        return lhs.length>rhs.length;
+    } else {
+        return lhs.id<rhs.id;
+    }
+}
+
+inline
+std::ostream& operator<<(std::ostream& o, branch b) {
+    return o << "[" << b.id
+        << ", len " << b.length
+        << ", pid " << b.parent_idx
+        << ", sta " << b.start_idx
+        << "]";
+}
+
+struct level_metadata {
+    unsigned num_branches = 0; // Number of branches in a level
+    unsigned max_length = 0;   // Length of the longest branch
+    unsigned matrix_data_index = 0;   // Index into data values (d, u, rhs) of the first branch
+    unsigned level_data_index  = 0;   // Index into data values (lengths, parents) of each level
+};
+
+// C wrappers around kernels
+ARB_ARBOR_API void gather(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n);
+
+ARB_ARBOR_API void scatter(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n);
+
+} // gpu
+} // arb
diff --git a/arbor/backends/gpu/fvm.hpp b/arbor/backends/gpu/fvm.hpp
index 712b2fc7..79a373e9 100644
--- a/arbor/backends/gpu/fvm.hpp
+++ b/arbor/backends/gpu/fvm.hpp
@@ -16,8 +16,6 @@
 
 #include "threshold_watcher.hpp"
 
-#include "matrix_state_fine.hpp"
-
 namespace arb {
 namespace gpu {
 
@@ -42,11 +40,11 @@ struct backend {
         return memory::on_host(v);
     }
 
-    using matrix_state = arb::gpu::matrix_state_fine<value_type, index_type>;
-    using threshold_watcher = arb::gpu::threshold_watcher;
-
+    using threshold_watcher        = arb::gpu::threshold_watcher;
+    using cable_solver             = arb::gpu::matrix_state_fine<fvm_value_type, fvm_index_type>;
+    using diffusion_solver         = arb::gpu::diffusion_state<fvm_value_type, fvm_index_type>;
     using deliverable_event_stream = arb::gpu::deliverable_event_stream;
-    using sample_event_stream = arb::gpu::sample_event_stream;
+    using sample_event_stream      = arb::gpu::sample_event_stream;
 
     using shared_state = arb::gpu::shared_state;
     using ion_state = arb::gpu::ion_state;
diff --git a/arbor/backends/gpu/matrix_fine.cu b/arbor/backends/gpu/matrix_fine.cu
index 675f6312..41156f2d 100644
--- a/arbor/backends/gpu/matrix_fine.cu
+++ b/arbor/backends/gpu/matrix_fine.cu
@@ -7,41 +7,8 @@
 
 namespace arb {
 namespace gpu {
-
 namespace kernels {
 
-//
-// gather and scatter kernels
-//
-
-// to[i] = from[p[i]]
-template <typename T, typename I>
-__global__
-void gather(const T* __restrict__ const from,
-            T* __restrict__ const to,
-            const I* __restrict__ const p,
-            unsigned n) {
-    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
-
-    if (i<n) {
-        to[i] = from[p[i]];
-    }
-}
-
-// to[p[i]] = from[i]
-template <typename T, typename I>
-__global__
-void scatter(const T* __restrict__ const from,
-             T* __restrict__ const to,
-             const I* __restrict__ const p,
-             unsigned n) {
-    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
-
-    if (i<n) {
-        to[p[i]] = from[i];
-    }
-}
-
 /// GPU implementation of Hines matrix assembly.
 /// Fine layout.
 /// For a given time step size dt:
@@ -245,30 +212,6 @@ void solve_matrix_fine(
 
 } // namespace kernels
 
-ARB_ARBOR_API void gather(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n)
-{
-    constexpr unsigned blockdim = 128;
-    const unsigned griddim = impl::block_count(n, blockdim);
-
-    kernels::gather<<<griddim, blockdim>>>(from, to, p, n);
-}
-
-ARB_ARBOR_API void scatter(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n)
-{
-    constexpr unsigned blockdim = 128;
-    const unsigned griddim = impl::block_count(n, blockdim);
-
-    kernels::scatter<<<griddim, blockdim>>>(from, to, p, n);
-}
-
 ARB_ARBOR_API void assemble_matrix_fine(
     fvm_value_type* d,
     fvm_value_type* rhs,
diff --git a/arbor/backends/gpu/matrix_fine.hpp b/arbor/backends/gpu/matrix_fine.hpp
index 02c54903..66997323 100644
--- a/arbor/backends/gpu/matrix_fine.hpp
+++ b/arbor/backends/gpu/matrix_fine.hpp
@@ -1,31 +1,12 @@
 #include <arbor/fvm_types.hpp>
-
 #include <arbor/export.hpp>
+
+#include "fine.hpp"
+
 #include <ostream>
 
 namespace arb {
 namespace gpu {
-
-struct level_metadata {
-    unsigned num_branches = 0; // Number of branches in a level
-    unsigned max_length = 0;   // Length of the longest branch
-    unsigned matrix_data_index = 0;   // Index into data values (d, u, rhs) of the first branch
-    unsigned level_data_index  = 0;   // Index into data values (lengths, parents) of each level
-};
-
-// C wrappers around kernels
-ARB_ARBOR_API void gather(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n);
-
-ARB_ARBOR_API void scatter(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n);
-
 ARB_ARBOR_API void assemble_matrix_fine(
     fvm_value_type* d,
     fvm_value_type* rhs,
diff --git a/arbor/backends/gpu/matrix_state_fine.hpp b/arbor/backends/gpu/matrix_state_fine.hpp
index d460597c..5959513d 100644
--- a/arbor/backends/gpu/matrix_state_fine.hpp
+++ b/arbor/backends/gpu/matrix_state_fine.hpp
@@ -12,65 +12,13 @@
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
 #include "tree.hpp"
-
+#include "fine.hpp"
 #include "matrix_fine.hpp"
 #include "forest.hpp"
 
 namespace arb {
 namespace gpu {
 
-// Helper type for branch meta data in setup phase of fine grained
-// matrix storage+solver.
-//
-//      leaf
-//      .
-//      .
-//      .
-//  -   *
-//      |
-//  l   *
-//  e   |
-//  n   *
-//  g   |
-//  t   *
-//  h   |
-//  -   start_idx
-//      |
-//      parent_idx
-//      |
-//      .
-//      .
-//      .
-//      root
-struct branch {
-    unsigned id;         // branch id
-    unsigned parent_id;  // parent branch id
-    unsigned parent_idx; //
-    unsigned start_idx;  // the index of the first node in the input parent index
-    unsigned length;     // the number of nodes in the branch
-};
-
-// order branches by:
-//  - descending length
-//  - ascending id
-inline
-bool operator<(const branch& lhs, const branch& rhs) {
-    if (lhs.length!=rhs.length) {
-        return lhs.length>rhs.length;
-    } else {
-        return lhs.id<rhs.id;
-    }
-}
-
-inline
-std::ostream& operator<<(std::ostream& o, branch b) {
-    return o << "[" << b.id
-        << ", len " << b.length
-        << ", pid " << b.parent_idx
-        << ", sta " << b.start_idx
-        << "]";
-}
-
 template <typename T, typename I>
 struct matrix_state_fine {
 public:
@@ -503,11 +451,9 @@ public:
         packed_to_flat(rhs, to);
     }
 
-private:
-    std::size_t size() const {
-        return matrix_size;
-    }
+    std::size_t size() const { return matrix_size; }
 
+private:
     void flat_to_packed(const array& from, array& to ) {
         arb_assert(from.size()==matrix_size);
         arb_assert(to.size()==data_size);
diff --git a/arbor/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp
index b01cd663..382151b1 100644
--- a/arbor/backends/gpu/shared_state.cpp
+++ b/arbor/backends/gpu/shared_state.cpp
@@ -55,8 +55,8 @@ std::pair<fvm_value_type, fvm_value_type> minmax_value_impl(fvm_size_type n, con
 ion_state::ion_state(
     int charge,
     const fvm_ion_config& ion_data,
-    unsigned // alignment/padding ignored.
-):
+    unsigned, // alignment/padding ignored.
+    solver_ptr ptr):
     write_eX_(ion_data.revpot_written),
     write_Xo_(ion_data.econc_written),
     write_Xi_(ion_data.iconc_written),
@@ -64,30 +64,35 @@ ion_state::ion_state(
     iX_(ion_data.cv.size(), NAN),
     eX_(ion_data.init_revpot.begin(), ion_data.init_revpot.end()),
     Xi_(ion_data.init_iconc.begin(), ion_data.init_iconc.end()),
+    Xd_(ion_data.cv.size(), NAN),
     Xo_(ion_data.init_econc.begin(), ion_data.init_econc.end()),
+    gX_(ion_data.cv.size(), NAN),
     init_Xi_(make_const_view(ion_data.init_iconc)),
     init_Xo_(make_const_view(ion_data.init_econc)),
     reset_Xi_(make_const_view(ion_data.reset_iconc)),
     reset_Xo_(make_const_view(ion_data.reset_econc)),
     init_eX_(make_const_view(ion_data.init_revpot)),
-    charge(1u, charge)
-{
+    charge(1u, charge),
+    solver(std::move(ptr)) {
     arb_assert(node_index_.size()==init_Xi_.size());
     arb_assert(node_index_.size()==init_Xo_.size());
     arb_assert(node_index_.size()==init_eX_.size());
 }
 
 void ion_state::init_concentration() {
+    // NB. not resetting Xd here, it's controlled via the solver.
     if (write_Xi_) memory::copy(init_Xi_, Xi_);
     if (write_Xo_) memory::copy(init_Xo_, Xo_);
 }
 
 void ion_state::zero_current() {
+    memory::fill(gX_, 0);
     memory::fill(iX_, 0);
 }
 
 void ion_state::reset() {
     zero_current();
+    memory::copy(reset_Xi_, Xd_);
     if (write_Xi_) memory::copy(reset_Xi_, Xi_);
     if (write_Xo_) memory::copy(reset_Xo_, Xo_);
     if (write_eX_) memory::copy(init_eX_, eX_);
@@ -315,7 +320,15 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
         auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
         ion_state* oion = ptr_by_key(ion_data, ion_binding);
         if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
-        store.ion_states_[idx] = { oion->iX_.data(), oion->eX_.data(), oion->Xi_.data(), oion->Xo_.data(), oion->charge.data(), nullptr };
+        auto& ion_state = store.ion_states_[idx];
+        ion_state = {0};
+        ion_state.current_density         = oion->iX_.data();
+        ion_state.reversal_potential      = oion->eX_.data();
+        ion_state.internal_concentration  = oion->Xi_.data();
+        ion_state.external_concentration  = oion->Xo_.data();
+        ion_state.diffusive_concentration = oion->Xd_.data();
+        ion_state.ionic_charge            = oion->charge.data();
+        ion_state.conductivity            = oion->gX_.data();
     }
 
     // If there are no sites (is this ever meaningful?) there is nothing more to do.
@@ -394,14 +407,33 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
     m.ppack_.ion_states = store.ion_states_d_.data();
 }
 
+void shared_state::integrate_voltage() {
+    solver.assemble(dt_intdom, voltage, current_density, conductivity);
+    solver.solve(voltage);
+}
+
+void shared_state::integrate_diffusion() {
+    for (auto& [ion, data]: ion_data) {
+        if (data.solver) {
+            data.solver->assemble(dt_intdom,
+                                  data.Xd_,
+                                  voltage,
+                                  data.iX_,
+                                  data.gX_,
+                                  data.charge[0]);
+            data.solver->solve(data.Xd_);
+        }
+    }
+}
+
 void shared_state::add_ion(
     const std::string& ion_name,
     int charge,
-    const fvm_ion_config& ion_info)
-{
+    const fvm_ion_config& ion_info,
+    ion_state::solver_ptr ptr) {
     ion_data.emplace(std::piecewise_construct,
         std::forward_as_tuple(ion_name),
-        std::forward_as_tuple(charge, ion_info, 1u));
+                     std::forward_as_tuple(charge, ion_info, 1u, std::move(ptr)));
 }
 
 void shared_state::configure_stimulus(const fvm_stimulus_config& stims) {
diff --git a/arbor/backends/gpu/shared_state.hpp b/arbor/backends/gpu/shared_state.hpp
index 85f1ea0c..20cef451 100644
--- a/arbor/backends/gpu/shared_state.hpp
+++ b/arbor/backends/gpu/shared_state.hpp
@@ -12,6 +12,8 @@
 
 #include "backends/gpu/gpu_store_types.hpp"
 #include "backends/gpu/stimulus.hpp"
+#include "backends/gpu/diffusion_state.hpp"
+#include "backends/gpu/matrix_state_fine.hpp"
 
 namespace arb {
 namespace gpu {
@@ -27,7 +29,10 @@ namespace gpu {
  *     Xi_     cai              internal calcium concentration
  *     Xo_     cao              external calcium concentration
  */
- struct ARB_ARBOR_API ion_state {
+struct ARB_ARBOR_API ion_state {
+    using solver_type = arb::gpu::diffusion_state<fvm_value_type, fvm_index_type>;
+    using solver_ptr  = std::unique_ptr<solver_type>;
+
     bool write_eX_;          // is eX written?
     bool write_Xo_;          // is Xo written?
     bool write_Xi_;          // is Xi written?
@@ -36,7 +41,9 @@ namespace gpu {
     array iX_;          // (A/mÂ²) current density
     array eX_;          // (mV) reversal potential
     array Xi_;          // (mM) internal concentration
+    array Xd_;          // (mM) diffusive concentration
     array Xo_;          // (mM) external concentration
+    array gX_;             // (kS/mÂ²) per-species conductivity
 
     array init_Xi_;     // (mM) area-weighted initial internal concentration
     array init_Xo_;     // (mM) area-weighted initial external concentration
@@ -46,13 +53,15 @@ namespace gpu {
 
     array charge;       // charge of ionic species (global, length 1)
 
+    solver_ptr solver = nullptr;
+
     ion_state() = default;
 
     ion_state(
         int charge,
         const fvm_ion_config& ion_data,
-        unsigned align
-    );
+        unsigned align,
+        solver_ptr ptr);
 
     // Set ion concentrations to weighted proportion of default concentrations.
     void init_concentration();
@@ -103,6 +112,7 @@ struct ARB_ARBOR_API istim_state {
 };
 
 struct ARB_ARBOR_API shared_state {
+
     struct mech_storage {
         array data_;
         iarray indices_;
@@ -115,6 +125,9 @@ struct ARB_ARBOR_API shared_state {
         memory::device_vector<arb_ion_state>   ion_states_d_;
     };
 
+    using cable_solver = arb::gpu::matrix_state_fine<fvm_value_type, fvm_index_type>;
+    cable_solver solver;
+
     static constexpr std::size_t alignment = std::max(array::alignment(), iarray::alignment());
 
     fvm_size_type n_intdom = 0;   // Number of distinct integration domains.
@@ -171,7 +184,8 @@ struct ARB_ARBOR_API shared_state {
     void add_ion(
         const std::string& ion_name,
         int charge,
-        const fvm_ion_config& ion_data);
+        const fvm_ion_config& ion_data,
+        ion_state::solver_ptr solver=nullptr);
 
     void configure_stimulus(const fvm_stimulus_config&);
 
@@ -188,6 +202,10 @@ struct ARB_ARBOR_API shared_state {
     // Update stimulus state and add current contributions.
     void add_stimulus_current();
 
+    // Integrate by matrix solve.
+    void integrate_voltage();
+    void integrate_diffusion();
+
     // Return minimum and maximum time value [ms] across cells.
     std::pair<fvm_value_type, fvm_value_type> time_bounds() const;
 
diff --git a/arbor/backends/multicore/matrix_state.hpp b/arbor/backends/multicore/cable_solver.hpp
similarity index 73%
rename from arbor/backends/multicore/matrix_state.hpp
rename to arbor/backends/multicore/cable_solver.hpp
index 1831bfa0..53d6f789 100644
--- a/arbor/backends/multicore/matrix_state.hpp
+++ b/arbor/backends/multicore/cable_solver.hpp
@@ -10,16 +10,13 @@
 namespace arb {
 namespace multicore {
 
-template <typename T, typename I>
-struct matrix_state {
-public:
-    using value_type = T;
-    using index_type = I;
-
-    using array = padded_vector<value_type>;
+struct cable_solver {
+    using value_type = arb_value_type;
+    using index_type = arb_index_type;
+    using array      = padded_vector<value_type>;
     using const_view = const array&;
+    using iarray     = padded_vector<index_type>;
 
-    using iarray = padded_vector<index_type>;
     iarray parent_index;
     iarray cell_cv_divs;
 
@@ -36,9 +33,14 @@ public:
     // the invariant part of the matrix diagonal
     array invariant_d;         // [Î¼S]
 
-    matrix_state() = default;
+    cable_solver() = default;
+    cable_solver(const cable_solver&) = default;
+    cable_solver(cable_solver&&) = default;
+
+    cable_solver& operator=(const cable_solver&) = default;
+    cable_solver& operator=(cable_solver&&) = default;
 
-    matrix_state(const std::vector<index_type>& p,
+    cable_solver(const std::vector<index_type>& p,
                  const std::vector<index_type>& cell_cv_divs,
                  const std::vector<value_type>& cap,
                  const std::vector<value_type>& cond,
@@ -52,19 +54,20 @@ public:
         cv_area(area.begin(), area.end()),
         cell_to_intdom(cell_to_intdom.begin(), cell_to_intdom.end())
     {
+        // Sanity check
         arb_assert(cap.size() == size());
         arb_assert(cond.size() == size());
         arb_assert(cell_cv_divs.back() == (index_type)size());
 
-        auto n = size();
+        // Build invariant parts
+        const auto n = size();
         invariant_d = array(n, 0);
         if (n >= 1) { // skip empty matrix, ie cell with empty morphology
             for (auto i: util::make_span(1u, n)) {
-                auto gij = face_conductance[i];
-
+                const auto gij = face_conductance[i];
                 u[i] = -gij;
                 invariant_d[i] += gij;
-                if (p[i]!=-1) {
+                if (p[i]!=-1) { // root
                     invariant_d[p[i]] += gij;
                 }
             }
@@ -77,7 +80,6 @@ public:
         return rhs;
     }
 
-
     // Assemble the matrix
     // Afterwards the diagonal and RHS will have been set given dt, voltage and current.
     //   dt_intdom       [ms]      (per integration domain)
@@ -85,20 +87,16 @@ public:
     //   current density [A.m^-2]  (per control volume)
     //   conductivity    [kS.m^-2] (per control volume)
     void assemble(const_view dt_intdom, const_view voltage, const_view current, const_view conductivity) {
-        auto cell_cv_part = util::partition_view(cell_cv_divs);
+        const auto cell_cv_part = util::partition_view(cell_cv_divs);
         const index_type ncells = cell_cv_part.size();
-
         // loop over submatrices
         for (auto m: util::make_span(0, ncells)) {
-            auto dt = dt_intdom[cell_to_intdom[m]];
-
+            const auto dt = dt_intdom[cell_to_intdom[m]];
             if (dt>0) {
-                value_type oodt_factor = 1e-3/dt; // [1/Âµs]
+                const value_type oodt_factor = 1e-3/dt; // [1/Âµs]
                 for (auto i: util::make_span(cell_cv_part[m])) {
-                    auto area_factor = 1e-3*cv_area[i]; // [1e-9Â·mÂ²]
-
-                    auto gi = oodt_factor*cv_capacitance[i] + area_factor*conductivity[i]; // [Î¼S]
-
+                    const auto area_factor = 1e-3*cv_area[i]; // [1e-9Â·mÂ²]
+                    const auto gi = oodt_factor*cv_capacitance[i] + area_factor*conductivity[i]; // [Î¼S]
                     d[i] = gi + invariant_d[i];
                     // convert current to units nA
                     rhs[i] = gi*voltage[i] - area_factor*current[i];
@@ -115,19 +113,17 @@ public:
 
     void solve() {
         // loop over submatrices
-        for (auto cv_span: util::partition_view(cell_cv_divs)) {
-            auto first = cv_span.first;
-            auto last = cv_span.second; // one past the end
+        for (const auto& [first, last]: util::partition_view(cell_cv_divs)) {
             if (first >= last) continue; // skip cell with no CVs
             if (d[first]!=0) {
                 // backward sweep
                 for(auto i=last-1; i>first; --i) {
-                    auto factor = u[i] / d[i];
+                    const auto factor = u[i] / d[i];
                     d[parent_index[i]]   -= factor * u[i];
                     rhs[parent_index[i]] -= factor * rhs[i];
                 }
+                // solve root
                 rhs[first] /= d[first];
-
                 // forward sweep
                 for(auto i=first+1; i<last; ++i) {
                     rhs[i] -= u[i] * rhs[parent_index[i]];
@@ -143,11 +139,8 @@ public:
         memory::copy(rhs, to);
     }
 
-private:
-
-    std::size_t size() const {
-        return parent_index.size();
-    }
+    std::size_t num_cells() const { return cell_cv_divs.size() - 1; }
+    std::size_t size() const { return parent_index.size(); }
 };
 
 } // namespace multicore
diff --git a/arbor/backends/multicore/diffusion_solver.hpp b/arbor/backends/multicore/diffusion_solver.hpp
new file mode 100644
index 00000000..112c646c
--- /dev/null
+++ b/arbor/backends/multicore/diffusion_solver.hpp
@@ -0,0 +1,145 @@
+#pragma once
+
+#include <util/partition.hpp>
+#include <util/span.hpp>
+
+#include <memory/memory.hpp>
+
+#include "multicore_common.hpp"
+
+namespace arb {
+namespace multicore {
+
+struct diffusion_solver {
+    using value_type = arb_value_type;
+    using index_type = arb_index_type;
+    using array      = padded_vector<value_type>;
+    using const_view = const array&;
+    using iarray     = padded_vector<index_type>;
+
+    iarray parent_index;
+    iarray cell_cv_divs;
+
+    array d;     // [Î¼S]
+    array u;     // [Î¼S]
+    array rhs;   // [nA]
+
+    array cv_area;             // [Î¼m^2]
+
+    iarray cell_to_intdom;
+
+    // the invariant part of the matrix diagonal
+    array invariant_d;         // [Î¼S]
+
+    diffusion_solver() = default;
+    diffusion_solver(const diffusion_solver&) = default;
+    diffusion_solver(diffusion_solver&&) = default;
+
+    diffusion_solver& operator=(const diffusion_solver&) = default;
+    diffusion_solver& operator=(diffusion_solver&&) = default;
+
+    diffusion_solver(const std::vector<index_type>& p,
+                     const std::vector<index_type>& cell_cv_divs,
+                     const std::vector<value_type>& diff,
+                     const std::vector<value_type>& area,
+                     const std::vector<index_type>& cell_to_intdom):
+        parent_index(p.begin(), p.end()),
+        cell_cv_divs(cell_cv_divs.begin(), cell_cv_divs.end()),
+        d(size(), 0), u(size(), 0), rhs(size()),
+        cv_area(area.begin(), area.end()),
+        cell_to_intdom(cell_to_intdom.begin(), cell_to_intdom.end())
+    {
+        // Sanity check
+        arb_assert(diff.size() == size());
+        arb_assert(cell_cv_divs.back() == (index_type)size());
+
+        // Build invariant parts
+        const auto n = size();
+        invariant_d = array(n, 0);
+        if (n >= 1) { // skip empty matrix, ie cell with empty morphology
+            for (auto i: util::make_span(1u, n)) {
+                auto gij = diff[i];
+                u[i]           =  -gij;
+                invariant_d[i] +=  gij;
+                // Also add to our parent, if present
+                if (auto pi = p[i]; pi != -1) invariant_d[pi] += gij;
+            }
+        }
+    }
+
+    const_view solution() const {
+        // In this back end the solution is a simple view of the rhs, which
+        // contains the solution after the matrix_solve is performed.
+        return rhs;
+    }
+
+    // Assemble the matrix
+    //   dt_intdom       [ms]      (per integration domain)
+    //   internal conc   [mM]      (per control volume)
+    //   voltage         [mV]      (per control volume)
+    //   current density [A.m^-2]  (per control volume and species)
+    //   diffusivity     [???]     (per control volume)
+    //   charge          [e]
+    void assemble(const_view dt_intdom, const_view concentration, const_view voltage, const_view current, const_view conductivity, fvm_value_type q) {
+        auto cell_cv_part = util::partition_view(cell_cv_divs);
+        index_type ncells = cell_cv_part.size();
+        // loop over submatrices
+        for (auto m: util::make_span(0, ncells)) {
+            auto dt = dt_intdom[cell_to_intdom[m]];
+            if (dt>0) {
+                value_type _1_dt = 1e-3/dt;     // 1/Âµs
+                for (auto i: util::make_span(cell_cv_part[m])) {
+                    auto u = voltage[i];        // mV
+                    auto g = conductivity[i];   // ÂµS
+                    auto J = current[i];        // A/m^2
+                    auto A = 1e-3*cv_area[i];   // 1e-9Â·mÂ²
+                    auto X = concentration[i];  // mM
+                    // conversion from current density to concentration change
+                    // using Faraday's constant
+                    auto F = A/(q*96.485332);
+                    d[i]   = _1_dt   + F*g + invariant_d[i];
+                    rhs[i] = _1_dt*X + F*(u*g - J);
+                }
+            }
+            else {
+                for (auto i: util::make_span(cell_cv_part[m])) {
+                    d[i]   = 0;
+                    rhs[i] = concentration[i];
+                }
+            }
+        }
+    }
+
+    void solve() {
+        // loop over submatrices
+        for (const auto& [first, last]: util::partition_view(cell_cv_divs)) {
+            if (first >= last) continue; // skip cell with no CVs
+            if (d[first]!=0) {
+                // backward sweep
+                for(auto i=last-1; i>first; --i) {
+                    auto pi  = parent_index[i];
+                    auto fac = -u[i] / d[i];
+                    d[pi]    = std::fma(fac, u[i],   d[pi]);
+                    rhs[pi]  = std::fma(fac, rhs[i], rhs[pi]);
+                }
+                // solve root
+                rhs[first] /= d[first];
+                // forward sweep
+                for(auto i=first+1; i<last; ++i) {
+                    rhs[i] = std::fma(-u[i], rhs[parent_index[i]], rhs[i])/d[i];
+                }
+            }
+        }
+    }
+
+    template<typename VTo>
+    void solve(VTo& to) {
+        solve();
+        memory::copy(rhs, to);
+    }
+
+    std::size_t size() const { return parent_index.size(); }
+};
+
+} // namespace multicore
+} // namespace arb
diff --git a/arbor/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp
index a9064357..fb0e17cb 100644
--- a/arbor/backends/multicore/fvm.hpp
+++ b/arbor/backends/multicore/fvm.hpp
@@ -6,10 +6,11 @@
 #include <arbor/mechanism.hpp>
 
 #include "backends/event.hpp"
-#include "backends/multicore/matrix_state.hpp"
 #include "backends/multicore/multi_event_stream.hpp"
 #include "backends/multicore/multicore_common.hpp"
 #include "backends/multicore/shared_state.hpp"
+#include "backends/multicore/diffusion_solver.hpp"
+#include "backends/multicore/cable_solver.hpp"
 #include "backends/multicore/threshold_watcher.hpp"
 #include "execution_context.hpp"
 #include "util/padded_alloc.hpp"
@@ -40,14 +41,13 @@ struct backend {
         return util::range_pointer_view(v);
     }
 
-    using matrix_state = arb::multicore::matrix_state<value_type, index_type>;
-    using threshold_watcher = arb::multicore::threshold_watcher;
-
+    using cable_solver             = arb::multicore::cable_solver;
+    using diffusion_solver         = arb::multicore::diffusion_solver;
+    using threshold_watcher        = arb::multicore::threshold_watcher;
     using deliverable_event_stream = arb::multicore::deliverable_event_stream;
-    using sample_event_stream = arb::multicore::sample_event_stream;
-
-    using shared_state = arb::multicore::shared_state;
-    using ion_state = arb::multicore::ion_state;
+    using sample_event_stream      = arb::multicore::sample_event_stream;
+    using shared_state             = arb::multicore::shared_state;
+    using ion_state                = arb::multicore::ion_state;
 
     static threshold_watcher voltage_watcher(
         shared_state& state,
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index f6c0b205..ccc1a802 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -56,8 +56,8 @@ using pad = util::padded_allocator<>;
 ion_state::ion_state(
     int charge,
     const fvm_ion_config& ion_data,
-    unsigned align
-):
+    unsigned align,
+    solver_ptr ptr):
     alignment(min_alignment(align)),
     write_eX_(ion_data.revpot_written),
     write_Xo_(ion_data.econc_written),
@@ -66,14 +66,16 @@ ion_state::ion_state(
     iX_(ion_data.cv.size(), NAN, pad(alignment)),
     eX_(ion_data.init_revpot.begin(), ion_data.init_revpot.end(), pad(alignment)),
     Xi_(ion_data.init_iconc.begin(), ion_data.init_iconc.end(), pad(alignment)),
+    Xd_(ion_data.reset_iconc.begin(), ion_data.reset_iconc.end(), pad(alignment)),
     Xo_(ion_data.init_econc.begin(), ion_data.init_econc.end(), pad(alignment)),
+    gX_(ion_data.cv.size(), NAN, pad(alignment)),
     init_Xi_(ion_data.init_iconc.begin(), ion_data.init_iconc.end(), pad(alignment)),
     init_Xo_(ion_data.init_econc.begin(), ion_data.init_econc.end(), pad(alignment)),
     reset_Xi_(ion_data.reset_iconc.begin(), ion_data.reset_iconc.end(), pad(alignment)),
     reset_Xo_(ion_data.reset_econc.begin(), ion_data.reset_econc.end(), pad(alignment)),
     init_eX_(ion_data.init_revpot.begin(), ion_data.init_revpot.end(), pad(alignment)),
-    charge(1u, charge, pad(alignment))
-{
+    charge(1u, charge, pad(alignment)),
+    solver(std::move(ptr)) {
     arb_assert(node_index_.size()==init_Xi_.size());
     arb_assert(node_index_.size()==init_Xo_.size());
     arb_assert(node_index_.size()==eX_.size());
@@ -81,16 +83,19 @@ ion_state::ion_state(
 }
 
 void ion_state::init_concentration() {
+    // NB. not resetting Xd here, it's controlled via the solver.
     if (write_Xi_) std::copy(init_Xi_.begin(), init_Xi_.end(), Xi_.begin());
     if (write_Xo_) std::copy(init_Xo_.begin(), init_Xo_.end(), Xo_.begin());
 }
 
 void ion_state::zero_current() {
+    util::fill(gX_, 0);
     util::fill(iX_, 0);
 }
 
 void ion_state::reset() {
     zero_current();
+    std::copy(reset_Xi_.begin(), reset_Xi_.end(), Xd_.begin());
     if (write_Xi_) std::copy(reset_Xi_.begin(), reset_Xi_.end(), Xi_.begin());
     if (write_Xo_) std::copy(reset_Xo_.begin(), reset_Xo_.end(), Xo_.begin());
     if (write_eX_) std::copy(init_eX_.begin(), init_eX_.end(), eX_.begin());
@@ -239,13 +244,33 @@ shared_state::shared_state(
     }
 }
 
+void shared_state::integrate_voltage() {
+    solver.assemble(dt_intdom, voltage, current_density, conductivity);
+    solver.solve(voltage);
+}
+
+void shared_state::integrate_diffusion() {
+    for (auto& [ion, data]: ion_data) {
+        if (data.solver) {
+            data.solver->assemble(dt_intdom,
+                                  data.Xd_,
+                                  voltage,
+                                  data.iX_,
+                                  data.gX_,
+                                  data.charge[0]);
+            data.solver->solve(data.Xd_);
+        }
+    }
+}
+
 void shared_state::add_ion(
     const std::string& ion_name,
     int charge,
-    const fvm_ion_config& ion_info) {
+    const fvm_ion_config& ion_info,
+    ion_state::solver_ptr ptr) {
     ion_data.emplace(std::piecewise_construct,
-        std::forward_as_tuple(ion_name),
-        std::forward_as_tuple(charge, ion_info, alignment));
+                     std::forward_as_tuple(ion_name),
+                     std::forward_as_tuple(charge, ion_info, alignment, std::move(ptr)));
 }
 
 void shared_state::configure_stimulus(const fvm_stimulus_config& stims) {
@@ -503,9 +528,18 @@ void shared_state::instantiate(arb::mechanism& m, unsigned id, const mechanism_o
     for (auto idx: make_span(m.mech_.n_ions)) {
         auto ion = m.mech_.ions[idx].name;
         auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
-        ion_state* oion = ptr_by_key(ion_data, ion_binding);
+        auto* oion = ptr_by_key(ion_data, ion_binding);
         if (!oion) throw arbor_internal_error(util::pprintf("multicore/mechanism: mechanism holds ion '{}' with no corresponding shared state", ion));
-        m.ppack_.ion_states[idx] = { oion->iX_.data(), oion->eX_.data(), oion->Xi_.data(), oion->Xo_.data(), oion->charge.data() };
+
+        auto& ion_state = m.ppack_.ion_states[idx];
+        ion_state = {0};
+        ion_state.current_density         = oion->iX_.data();
+        ion_state.reversal_potential      = oion->eX_.data();
+        ion_state.internal_concentration  = oion->Xi_.data();
+        ion_state.external_concentration  = oion->Xo_.data();
+        ion_state.diffusive_concentration = oion->Xd_.data();
+        ion_state.ionic_charge            = oion->charge.data();
+        ion_state.conductivity            = oion->gX_.data();
     }
 
     // Initialize state and parameter vectors with default values.
diff --git a/arbor/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp
index 8aa8d415..149cc152 100644
--- a/arbor/backends/multicore/shared_state.hpp
+++ b/arbor/backends/multicore/shared_state.hpp
@@ -17,12 +17,13 @@
 #include "util/padded_alloc.hpp"
 #include "util/rangeutil.hpp"
 
-#include "matrix_state.hpp"
 #include "multi_event_stream.hpp"
 #include "threshold_watcher.hpp"
 #include "fvm_layout.hpp"
 #include "multicore_common.hpp"
 #include "partition_by_constraint.hpp"
+#include "backends/multicore/cable_solver.hpp"
+#include "backends/multicore/diffusion_solver.hpp"
 
 namespace arb {
 namespace multicore {
@@ -39,6 +40,9 @@ namespace multicore {
  *     Xo_     cao              external calcium concentration
  */
 struct ARB_ARBOR_API ion_state {
+    using solver_type = diffusion_solver;
+    using solver_ptr  = std::unique_ptr<solver_type>;
+
     unsigned alignment = 1; // Alignment and padding multiple.
 
     bool write_eX_;          // is eX written?
@@ -46,10 +50,12 @@ struct ARB_ARBOR_API ion_state {
     bool write_Xi_;          // is Xi written?
 
     iarray node_index_;     // Instance to CV map.
-    array iX_;              // (A/mÂ²) current density
-    array eX_;              // (mV) reversal potential
-    array Xi_;              // (mM) internal concentration
-    array Xo_;              // (mM) external concentration
+    array iX_;              // (A/mÂ²)  current density
+    array eX_;              // (mV)    reversal potential
+    array Xi_;              // (mM)    internal concentration
+    array Xd_;              // (mM)    diffusive internal concentration
+    array Xo_;              // (mM)    external concentration
+    array gX_;              // (kS/mÂ²) per-species conductivity
 
     array init_Xi_;         // (mM) area-weighted initial internal concentration
     array init_Xo_;         // (mM) area-weighted initial external concentration
@@ -59,12 +65,15 @@ struct ARB_ARBOR_API ion_state {
 
     array charge;           // charge of ionic species (global value, length 1)
 
+    solver_ptr solver = nullptr;
+
     ion_state() = default;
 
     ion_state(
         int charge,
         const fvm_ion_config& ion_data,
-        unsigned align
+        unsigned align,
+        solver_ptr ptr
     );
 
     // Set ion concentrations to weighted proportion of default concentrations.
@@ -120,6 +129,8 @@ struct ARB_ARBOR_API shared_state {
         std::vector<arb_ion_state>   ion_states_;
     };
 
+    cable_solver solver;
+
     unsigned alignment = 1;   // Alignment and padding multiple.
     util::padded_allocator<> alloc;  // Allocator with corresponging alignment/padding.
 
@@ -175,7 +186,8 @@ struct ARB_ARBOR_API shared_state {
     void add_ion(
         const std::string& ion_name,
         int charge,
-        const fvm_ion_config& ion_data);
+        const fvm_ion_config& ion_data,
+        ion_state::solver_ptr solver=nullptr);
 
     void configure_stimulus(const fvm_stimulus_config&);
 
@@ -194,6 +206,10 @@ struct ARB_ARBOR_API shared_state {
     // Update stimulus state and add current contributions.
     void add_stimulus_current();
 
+    // Integrate by matrix solve.
+    void integrate_voltage();
+    void integrate_diffusion();
+
     // Return minimum and maximum time value [ms] across cells.
     std::pair<fvm_value_type, fvm_value_type> time_bounds() const;
 
diff --git a/arbor/cable_cell.cpp b/arbor/cable_cell.cpp
index 704b28cc..a8a3bcf6 100644
--- a/arbor/cable_cell.cpp
+++ b/arbor/cable_cell.cpp
@@ -146,6 +146,10 @@ struct cable_cell_impl {
         return region_map.get<init_int_concentration>()[init.ion];
     }
 
+    mcable_map<ion_diffusivity>& get_region_map(const ion_diffusivity& init) {
+        return region_map.get<ion_diffusivity>()[init.ion];
+    }
+
     mcable_map<init_ext_concentration>& get_region_map(const init_ext_concentration& init) {
         return region_map.get<init_ext_concentration>()[init.ion];
     }
diff --git a/arbor/cable_cell_param.cpp b/arbor/cable_cell_param.cpp
index 36b17d9d..1c2a91e4 100644
--- a/arbor/cable_cell_param.cpp
+++ b/arbor/cable_cell_param.cpp
@@ -37,15 +37,16 @@ ARB_ARBOR_API void check_global_properties(const cable_cell_global_properties& G
         }
     }
 
-    for (const auto& kv: param.ion_data) {
-        auto& ion = kv.first;
-        const cable_cell_ion_data& data = kv.second;
+    for (const auto& [ion, data]: param.ion_data) {
         if (!data.init_int_concentration) {
             throw cable_cell_error("missing init_int_concentration for ion "+ion);
         }
         if (!data.init_ext_concentration) {
             throw cable_cell_error("missing init_ext_concentration for ion "+ion);
         }
+        if (data.diffusivity && *data.diffusivity < 0.0) {
+            throw cable_cell_error("negative diffusivity for ion "+ion);
+        }
         if (!data.init_reversal_potential && !param.reversal_potential_method.count(ion)) {
             throw cable_cell_error("missing init_reversal_potential or reversal_potential_method for ion "+ion);
         }
@@ -62,11 +63,10 @@ cable_cell_parameter_set neuron_parameter_defaults = {
     // membrane capacitance [F/mÂ²]
     0.01,
     // ion defaults:
-    // internal concentration [mM], external concentration [mM], reversal potential [mV]
-    {
-        {"na", {10.0,  140.0,  115 - 65.}},
-        {"k",  {54.4,    2.5,  -12 - 65.}},
-        {"ca", {5e-5,    2.0,  12.5*std::log(2.0/5e-5)}}
+    // internal concentration [mM], external concentration [mM], reversal potential [mV], diffusivity [m^2/s]
+    {{"na", {10.0,  140.0,  115 - 65.,               0.0}},
+     {"k",  {54.4,    2.5,  -12 - 65.,               0.0}},
+     {"ca", {5e-5,    2.0,  12.5*std::log(2.0/5e-5), 0.0}}
     },
 };
 
@@ -96,6 +96,9 @@ std::vector<defaultable> cable_cell_parameter_set::serialize() const {
         if (data.init_reversal_potential) {
             D.push_back(init_reversal_potential{name, *data.init_reversal_potential});
         }
+        if (data.init_reversal_potential) {
+            D.push_back(ion_diffusivity{name, *data.diffusivity});
+        }
     }
 
     for (const auto& [name, mech]: reversal_potential_method) {
@@ -148,6 +151,9 @@ void decor::set_default(defaultable what) {
                 else if constexpr (std::is_same_v<cv_policy, T>) {
                     defaults_.discretization = std::forward<cv_policy>(p);
                 }
+                else if constexpr (std::is_same_v<ion_diffusivity, T>) {
+                    defaults_.ion_data[p.ion].diffusivity = p.value;
+                }
             },
             what);
 }
diff --git a/arbor/fvm_layout.cpp b/arbor/fvm_layout.cpp
index 5f7dd6bc..e862adad 100644
--- a/arbor/fvm_layout.cpp
+++ b/arbor/fvm_layout.cpp
@@ -3,6 +3,7 @@
 #include <set>
 #include <stdexcept>
 #include <unordered_set>
+#include <unordered_map>
 #include <vector>
 
 #include <arbor/arbexcept.hpp>
@@ -12,6 +13,8 @@
 #include <arbor/morph/mprovider.hpp>
 #include <arbor/morph/morphology.hpp>
 
+#include <iostream>
+
 #include "fvm_layout.hpp"
 #include "threading/threading.hpp"
 #include "util/maputil.hpp"
@@ -22,6 +25,7 @@
 #include "util/rangeutil.hpp"
 #include "util/transform.hpp"
 #include "util/unique.hpp"
+#include "util/strprintf.hpp"
 
 namespace arb {
 
@@ -209,6 +213,23 @@ ARB_ARBOR_API fvm_cv_discretization& append(fvm_cv_discretization& dczn, const f
 
     append(dczn.geometry, right.geometry);
 
+    // Those in L and R: merge
+    for (auto& [ion, data]: dczn.diffusive_ions) {
+        const auto& rhs = right.diffusive_ions.find(ion);
+        if (rhs != right.diffusive_ions.end()) {
+            append(data.axial_inv_diffusivity, rhs->second.axial_inv_diffusivity);
+            append(data.face_diffusivity, rhs->second.face_diffusivity);
+        }
+    }
+    // Those only in R: add to L
+    for (auto& [ion, data]: right.diffusive_ions) {
+        const auto& lhs = dczn.diffusive_ions.find(ion);
+        if (lhs == dczn.diffusive_ions.end()) {
+            dczn.diffusive_ions[ion].axial_inv_diffusivity = data.axial_inv_diffusivity;
+            dczn.diffusive_ions[ion].face_diffusivity      = data.face_diffusivity;
+        }
+    }
+
     append(dczn.face_conductance, right.face_conductance);
     append(dczn.cv_area, right.cv_area);
     append(dczn.cv_capacitance, right.cv_capacitance);
@@ -253,6 +274,63 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
     const auto& capacitance = assignments.get<membrane_capacitance>();
     const auto& potential   = assignments.get<init_membrane_potential>();
     const auto& temperature = assignments.get<temperature_K>();
+    const auto& diffusivity = assignments.get<ion_diffusivity>();
+
+    // Set up for ion diffusivity
+    std::unordered_map<std::string, mcable_map<double>> inverse_diffusivity;
+    std::unordered_map<std::string, fvm_diffusion_info> diffusive_ions;
+
+    // Collect all eglible ions: those where any cable has finite diffusivity
+    for (const auto& [ion, data]: global_dflt.ion_data) {
+        if (data.diffusivity.value_or(0.0) != 0.0) {
+            diffusive_ions[ion] = {};
+        }
+    }
+    for (const auto& [ion, data]: dflt.ion_data) {
+        if (data.diffusivity.value_or(0.0) != 0.0) {
+            diffusive_ions[ion] = {};
+        }
+    }
+    for (const auto& [ion, data]: diffusivity) {
+        auto diffusive = std::any_of(data.begin(),
+                                     data.end(),
+                                     [](const auto& kv) { return kv.second.value != 0.0; });
+        if (diffusive) {
+            diffusive_ions[ion] = {};
+        }
+    }
+
+    // Remap diffusivity to resistivity
+    for (auto& [ion, data]: diffusive_ions) {
+        auto& id_map = inverse_diffusivity[ion];
+        // Treat specific assignments
+        if (auto map = diffusivity.find(ion); map != diffusivity.end()) {
+            for (const auto& [k, v]: map->second) {
+                if (v.value <= 0.0) {
+                    throw cable_cell_error{util::pprintf("Illegal diffusivity '{}' for ion '{}' at '{}'.", v.value, ion, k)};
+                }
+                id_map.insert(k, 1.0/v.value);
+            }
+        }
+        // Fetch defaults, either global or per cell
+        auto gd = *(value_by_key(dflt.ion_data,        ion).value().diffusivity
+                  | value_by_key(global_dflt.ion_data, ion).value().diffusivity);
+        if (gd <= 0.0) {
+            throw cable_cell_error{util::pprintf("Illegal global diffusivity '{}' for ion '{}'.", gd, ion)};
+        }
+
+        // Write inverse diffusivity / diffuse resistivity map
+        auto& id = data.axial_inv_diffusivity;
+        id.resize(1);
+        msize_t n_branch = D.geometry.n_branch(0);
+        id.reserve(n_branch);
+        for (msize_t i = 0; i<n_branch; ++i) {
+            auto pw = pw_over_cable(id_map, mcable{i, 0., 1.}, 1.0/gd);
+            id[0].push_back(pw);
+        }
+        // Prepare conductivity map
+        data.face_diffusivity.resize(n_cv);
+    }
 
     D.axial_resistivity.resize(1);
     msize_t n_branch = D.geometry.n_branch(0);
@@ -265,7 +343,7 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
     const auto& embedding = cell.embedding();
     for (auto i: count_along(D.geometry.cv_parent)) {
         auto cv_cables = D.geometry.cables(i);
-        // Computing face_conductance:
+        // Computing face_conductance and face_diffusivity
         //
         // Flux between adjacent CVs is computed as if there were no membrane currents, and with the CV voltage
         // values taken to be exact at a reference point in each CV:
@@ -273,6 +351,10 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
         //     * If the CV is branched, the reference point is taken to be closest branch point to
         //       the interface between the two CVs.
         D.face_conductance[i] = 0;
+        for (auto& [ion, info]: diffusive_ions) {
+            info.face_diffusivity[i] = 0.0;
+        }
+
         fvm_index_type p = D.geometry.cv_parent[i];
         if (p!=-1) {
             auto parent_cables = D.geometry.cables(p);
@@ -296,6 +378,10 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
             mcable span{bid, parent_refpt, cv_refpt};
             double resistance = embedding.integrate_ixa(span, D.axial_resistivity[0].at(bid));
             D.face_conductance[i] = 100/resistance; // 100 scales to ÂµS.
+            for (auto& [ion, info]: diffusive_ions) {
+                double resistance = embedding.integrate_ixa(span, info.axial_inv_diffusivity[0].at(bid));
+                info.face_diffusivity[i] = 1.0/resistance; // scale to m^2/s
+            }
         }
 
         D.cv_area[i] = 0;
@@ -313,8 +399,13 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
         }
 
         if (D.cv_area[i]>0) {
-            D.init_membrane_potential[i] /= D.cv_area[i];
-            D.temperature_K[i] /= D.cv_area[i];
+            auto A = D.cv_area[i];
+            D.init_membrane_potential[i] /= A;
+            D.temperature_K[i] /= A;
+
+            for (auto& [ion, info]: diffusive_ions) {
+                info.face_diffusivity[i] /= A;
+            }
             // If parent is trivial, and there is no grandparent, then we can use values from this CV
             // to get initial values for the parent. (The other case, when there is a grandparent, is
             // caught below.)
@@ -333,6 +424,8 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
             D.diam_um[i] = D.cv_area[i]/(cv_length*math::pi<double>);
         }
     }
+
+    D.diffusive_ions = std::move(diffusive_ions);
     return D;
 }
 
@@ -595,6 +688,8 @@ fvm_mechanism_data& append(fvm_mechanism_data& left, const fvm_mechanism_data& r
         append(L.reset_iconc, R.reset_iconc);
         append(L.reset_econc, R.reset_econc);
         append(L.init_revpot, R.init_revpot);
+        append(L.face_diffusivity, R.face_diffusivity);
+        L.is_diffusive |= R.is_diffusive;
         L.econc_written  |= R.econc_written;
         L.iconc_written  |= R.iconc_written;
         L.revpot_written |= R.revpot_written;
@@ -745,7 +840,7 @@ fvm_mechanism_data fvm_build_mechanism_data(
     const auto& assignments = cell.region_assignments();
 
     // Verify mechanism ion usage, parameter values.
-    auto verify_mechanism = [&gprop](const mechanism_info& info, const mechanism_desc& desc) {
+    auto verify_mechanism = [&gprop, &D](const mechanism_info& info, const mechanism_desc& desc) {
         const auto& global_ions = gprop.ion_species;
 
         for (const auto& pv: desc.values()) {
@@ -757,30 +852,40 @@ fvm_mechanism_data fvm_build_mechanism_data(
             }
         }
 
-        for (const auto& ion: info.ions) {
-            const auto& ion_name = ion.first;
-            const auto& ion_dep = ion.second;
-
-            if (!global_ions.count(ion_name)) {
+        for (const auto& [ion, dep]: info.ions) {
+            if (!global_ions.count(ion)) {
                 throw cable_cell_error(
-                    "mechanism "+desc.name()+" uses ion "+ion_name+ " which is missing in global properties");
+                    "mechanism "+desc.name()+" uses ion "+ion+ " which is missing in global properties");
             }
 
-            if (ion_dep.verify_ion_charge) {
-                if (ion_dep.expected_ion_charge!=global_ions.at(ion_name)) {
+            if (dep.verify_ion_charge) {
+                if (dep.expected_ion_charge!=global_ions.at(ion)) {
                     throw cable_cell_error(
-                        "mechanism "+desc.name()+" uses ion "+ion_name+ " expecting a different valence");
+                        "mechanism "+desc.name()+" uses ion "+ion+ " expecting a different valence");
                 }
             }
 
-            if (ion_dep.write_reversal_potential && (ion_dep.write_concentration_int || ion_dep.write_concentration_ext)) {
+            if (dep.write_reversal_potential && (dep.write_concentration_int || dep.write_concentration_ext)) {
                 throw cable_cell_error("mechanism "+desc.name()+" writes both reversal potential and concentration");
             }
+
+            auto is_diffusive = D.diffusive_ions.count(ion);
+            if (dep.access_concentration_diff && !is_diffusive) {
+                throw illegal_diffusive_mechanism(desc.name(), ion);
+            }
         }
     };
 
     // Track ion usage of mechanisms so that ions are only instantiated where required.
     std::unordered_map<std::string, std::vector<index_type>> ion_support;
+
+    // add diffusive ions to support: If diffusive, it's everywhere.
+    for (const auto& [ion, data]: D.diffusive_ions) {
+        auto& s = ion_support[ion];
+        s.resize(D.geometry.size());
+        std::iota(s.begin(), s.end(), 0);
+    }
+
     auto update_ion_support = [&ion_support](const mechanism_info& info, const std::vector<index_type>& cvs) {
         arb_assert(util::is_sorted(cvs));
 
@@ -1193,9 +1298,9 @@ fvm_mechanism_data fvm_build_mechanism_data(
         auto n_cv = config.cv.size();
         config.init_iconc.resize(n_cv);
         config.init_econc.resize(n_cv);
+        config.init_revpot.resize(n_cv);
         config.reset_iconc.resize(n_cv);
         config.reset_econc.resize(n_cv);
-        config.init_revpot.resize(n_cv);
 
         auto global_ion_data = value_by_key(global_dflt.ion_data, ion).value();
         auto dflt_iconc = global_ion_data.init_int_concentration.value();
@@ -1221,7 +1326,8 @@ fvm_mechanism_data fvm_build_mechanism_data(
             auto cv = config.cv[i];
             if (D.cv_area[cv]==0) continue;
 
-            for (mcable c: D.geometry.cables(cv)) {
+            const auto& cv_cables = D.geometry.cables(cv);
+            for (const mcable c: cv_cables) {
                 auto iconc = pw_over_cable(iconc_on_cable, c, dflt_iconc);
                 auto econc = pw_over_cable(econc_on_cable, c, dflt_econc);
                 auto rvpot = pw_over_cable(rvpot_on_cable, c, dflt_rvpot);
@@ -1237,12 +1343,18 @@ fvm_mechanism_data fvm_build_mechanism_data(
                 config.init_econc[i] += embedding.integrate_area(c.branch, econc_masked);
             }
 
+            // Scale all by area
             double oo_cv_area = 1./D.cv_area[cv];
             config.reset_iconc[i] *= oo_cv_area;
             config.reset_econc[i] *= oo_cv_area;
             config.init_revpot[i] *= oo_cv_area;
-            config.init_iconc[i] *= oo_cv_area;
-            config.init_econc[i] *= oo_cv_area;
+            config.init_iconc[i]  *= oo_cv_area;
+            config.init_econc[i]  *= oo_cv_area;
+        }
+
+        if (auto di = D.diffusive_ions.find(ion); di != D.diffusive_ions.end()) {
+            config.is_diffusive          = true;
+            config.face_diffusivity      = di->second.face_diffusivity;
         }
 
         config.econc_written  = write_xo.count(ion);
@@ -1325,7 +1437,6 @@ fvm_mechanism_data fvm_build_mechanism_data(
             }
         }
     }
-
     // Confirm that all ions written to by a revpot have a corresponding entry in a reversal_potential_method table.
     for (auto& kv: revpot_tbl) {
         if (!revpot_specified.count(kv.first)) {
diff --git a/arbor/fvm_layout.hpp b/arbor/fvm_layout.hpp
index 9306c78f..2c26a398 100644
--- a/arbor/fvm_layout.hpp
+++ b/arbor/fvm_layout.hpp
@@ -137,6 +137,12 @@ ARB_ARBOR_API cv_geometry& append(cv_geometry&, const cv_geometry&);
 // to the CV, or in the absence of any internal forks, is exact at the
 // midpoint of an unbranched CV.
 
+struct fvm_diffusion_info {
+    using value_type = fvm_value_type;
+    std::vector<value_type> face_diffusivity;
+    std::vector<std::vector<pw_constant_fn>> axial_inv_diffusivity;
+};
+    
 struct fvm_cv_discretization {
     using size_type = fvm_size_type;
     using index_type = fvm_index_type;
@@ -158,6 +164,9 @@ struct fvm_cv_discretization {
 
     // For each cell, one piece-wise constant value per branch.
     std::vector<std::vector<pw_constant_fn>> axial_resistivity; // [Î©Â·cm]
+
+    // For each diffusive ion species, their properties
+    std::unordered_map<std::string, fvm_diffusion_info> diffusive_ions;
 };
 
 // Combine two fvm_cv_geometry groups in-place.
@@ -242,6 +251,10 @@ struct fvm_ion_config {
 
     // Ion-specific (initial) reversal potential per CV.
     std::vector<value_type> init_revpot;
+
+    // diffusivity
+    bool is_diffusive = false;
+    std::vector<value_type> face_diffusivity;
 };
 
 struct fvm_stimulus_config {
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index 400df4d8..8621ef7e 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -27,7 +27,6 @@
 #include "fvm_layout.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "label_resolution.hpp"
-#include "matrix.hpp"
 #include "profile/profiler_macro.hpp"
 #include "sampler_map.hpp"
 #include "util/maputil.hpp"
@@ -77,10 +76,10 @@ public:
 
 private:
     // Host or GPU-side back-end dependent storage.
-    using array = typename backend::array;
-    using shared_state = typename backend::shared_state;
+    using array               = typename backend::array;
+    using shared_state        = typename backend::shared_state;
     using sample_event_stream = typename backend::sample_event_stream;
-    using threshold_watcher = typename backend::threshold_watcher;
+    using threshold_watcher   = typename backend::threshold_watcher;
 
     execution_context context_;
 
@@ -90,7 +89,6 @@ private:
     sample_event_stream sample_events_;
     array sample_time_;
     array sample_value_;
-    matrix<backend> matrix_;
     threshold_watcher threshold_watcher_;
 
     value_type tmin_ = 0;
@@ -273,13 +271,13 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         sample_events_.drop_marked_events();
         PL();
 
-        // Integrate voltage by matrix solve.
-
-        PE(advance:integrate:matrix:build);
-        matrix_.assemble(state_->dt_intdom, state_->voltage, state_->current_density, state_->conductivity);
+        // Integrate voltage / solve cable eq
+        PE(advance:integrate:voltage);
+        state_->integrate_voltage();
         PL();
-        PE(advance:integrate:matrix:solve);
-        matrix_.solve(state_->voltage);
+        // Compute ionic diffusion effects
+        PE(advance:integrate:diffusion);
+        state_->integrate_diffusion();
         PL();
 
         // Integrate mechanism state.
@@ -458,8 +456,6 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
                    [&fvm_info](index_type i){ return fvm_info.cell_to_intdom[i]; });
 
     arb_assert(D.n_cell() == ncell);
-    matrix_ = matrix<backend>(D.geometry.cv_parent, D.geometry.cell_cv_divs,
-                              D.cv_capacitance, D.face_conductance, D.cv_area, fvm_info.cell_to_intdom);
     sample_events_ = sample_event_stream(nintdom);
 
     // Discretize and build gap junction info.
@@ -502,11 +498,26 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
                 D.init_membrane_potential, D.temperature_K, D.diam_um, std::move(src_to_spike),
                 data_alignment? data_alignment: 1u);
 
+    state_->solver =
+        {D.geometry.cv_parent, D.geometry.cell_cv_divs, D.cv_capacitance, D.face_conductance, D.cv_area, fvm_info.cell_to_intdom};
+
     // Instantiate mechanisms, ions, and stimuli.
+    auto mk_diff_solver = [&](const auto& fd) {
+        // TODO(TH) _all_ solvers should share their RO data, ie everything below D here.
+        return std::make_unique<typename backend::ion_state::solver_type>(D.geometry.cv_parent,
+                                                                          D.geometry.cell_cv_divs,
+                                                                          fd,
+                                                                          D.cv_area,
+                                                                          fvm_info.cell_to_intdom);
+    };
 
     for (const auto& [ion, data]: mech_data.ions) {
         if (auto charge = value_by_key(global_props.ion_species, ion)) {
-            state_->add_ion(ion, *charge, data);
+            if (data.is_diffusive) {
+                state_->add_ion(ion, *charge, data, mk_diff_solver(data.face_diffusivity));
+            } else {
+                state_->add_ion(ion, *charge, data, nullptr);
+            }
         }
         else {
             throw cable_cell_error("unrecognized ion '"+ion+"' in mechanism");
@@ -758,6 +769,8 @@ void fvm_lowered_cell_impl<Backend>::resolve_probe_address(
         cable_probe_ion_current_cell,
         cable_probe_ion_int_concentration,
         cable_probe_ion_int_concentration_cell,
+        cable_probe_ion_diff_concentration,
+        cable_probe_ion_diff_concentration_cell,
         cable_probe_ion_ext_concentration,
         cable_probe_ion_ext_concentration_cell>;
 
@@ -1086,6 +1099,16 @@ void resolve_probe(const cable_probe_ion_ext_concentration& p, probe_resolution_
     }
 }
 
+template <typename B>
+void resolve_probe(const cable_probe_ion_diff_concentration& p, probe_resolution_data<B>& R) {
+    for (mlocation loc: thingify(p.locations, R.cell.provider())) {
+        auto opt_i = R.ion_location_index(p.ion, loc);
+        if (!opt_i) continue;
+
+        R.result.push_back(fvm_probe_scalar{{R.state->ion_data.at(p.ion).Xd_.data()+*opt_i}, loc});
+    }
+}
+
 // Common implementation for int and ext concentrations across whole cell:
 template <typename B>
 void resolve_ion_conc_common(const std::vector<fvm_index_type>& ion_cvs, const fvm_value_type* src, probe_resolution_data<B>& R) {
@@ -1117,4 +1140,10 @@ void resolve_probe(const cable_probe_ion_ext_concentration_cell& p, probe_resolu
     resolve_ion_conc_common<B>(R.M.ions.at(p.ion).cv, R.state->ion_data.at(p.ion).Xo_.data(), R);
 }
 
+template <typename B>
+void resolve_probe(const cable_probe_ion_diff_concentration_cell& p, probe_resolution_data<B>& R) {
+    if (!R.state->ion_data.count(p.ion)) return;
+    resolve_ion_conc_common<B>(R.M.ions.at(p.ion).cv, R.state->ion_data.at(p.ion).Xd_.data(), R);
+}
+
 } // namespace arb
diff --git a/arbor/include/arbor/arbexcept.hpp b/arbor/include/arbor/arbexcept.hpp
index dee6b57e..1a5679a1 100644
--- a/arbor/include/arbor/arbexcept.hpp
+++ b/arbor/include/arbor/arbexcept.hpp
@@ -128,6 +128,12 @@ struct ARB_SYMBOL_VISIBLE no_such_parameter: arbor_exception {
     std::string param_name;
 };
 
+struct ARB_SYMBOL_VISIBLE illegal_diffusive_mechanism: arbor_exception {
+    explicit illegal_diffusive_mechanism(const std::string& mech, const std::string& ion);
+    std::string mech;
+    std::string ion;
+};
+
 struct ARB_SYMBOL_VISIBLE invalid_parameter_value: arbor_exception {
     invalid_parameter_value(const std::string& mech_name, const std::string& param_name, const std::string& value_str);
     invalid_parameter_value(const std::string& mech_name, const std::string& param_name, double value);
diff --git a/arbor/include/arbor/cable_cell.hpp b/arbor/include/arbor/cable_cell.hpp
index 5b8ef76b..e2927c8f 100644
--- a/arbor/include/arbor/cable_cell.hpp
+++ b/arbor/include/arbor/cable_cell.hpp
@@ -162,6 +162,21 @@ struct ARB_SYMBOL_VISIBLE cable_probe_ion_int_concentration_cell {
     std::string ion;
 };
 
+// Ionic diffusive concentration [mmol/L] of ion `source` at `location`.
+// Sample value type: `double`
+// Sample metadata type: `mlocation`
+struct ARB_SYMBOL_VISIBLE cable_probe_ion_diff_concentration {
+    locset locations;
+    std::string ion;
+};
+
+// Ionic diffusiev concentration [mmol/L] of ion `source` across components of the cell.
+// Sample value type: `cable_sample_range`
+// Sample metadata type: `mcable_list`
+struct ARB_SYMBOL_VISIBLE cable_probe_ion_diff_concentration_cell {
+    std::string ion;
+};
+
 // Ionic external concentration [mmol/L] of ion `source` at `location`.
 // Sample value type: `double`
 // Sample metadata type: `mlocation`
@@ -190,7 +205,8 @@ template <typename T>
 using region_assignment =
     std::conditional_t<
         std::is_same<T, density>::value || std::is_same<T, init_int_concentration>::value ||
-        std::is_same<T, init_ext_concentration>::value || std::is_same<T, init_reversal_potential>::value,
+        std::is_same<T, init_ext_concentration>::value || std::is_same<T, init_reversal_potential>::value ||
+        std::is_same<T, ion_diffusivity>::value,
         std::unordered_map<std::string, mcable_map<T>>,
         mcable_map<T>>;
 
@@ -215,7 +231,7 @@ using location_assignment =
 using cable_cell_region_map = static_typed_map<region_assignment,
     density, init_membrane_potential, axial_resistivity,
     temperature_K, membrane_capacitance, init_int_concentration,
-    init_ext_concentration, init_reversal_potential>;
+    ion_diffusivity, init_ext_concentration, init_reversal_potential>;
 
 using cable_cell_location_map = static_typed_map<location_assignment,
     synapse, junction, i_clamp, threshold_detector>;
diff --git a/arbor/include/arbor/cable_cell_param.hpp b/arbor/include/arbor/cable_cell_param.hpp
index 7ab612f6..0e8a78ef 100644
--- a/arbor/include/arbor/cable_cell_param.hpp
+++ b/arbor/include/arbor/cable_cell_param.hpp
@@ -32,6 +32,7 @@ struct cable_cell_ion_data {
     std::optional<double> init_int_concentration;
     std::optional<double> init_ext_concentration;
     std::optional<double> init_reversal_potential;
+    std::optional<double> diffusivity;
 };
 
 // Clamp current is described by a sine wave with amplitude governed by a
@@ -112,6 +113,12 @@ struct ARB_SYMBOL_VISIBLE init_int_concentration {
     double value = NAN; // [mM]
 };
 
+
+struct ARB_SYMBOL_VISIBLE ion_diffusivity {
+    std::string ion = "";
+    double value = NAN; // [m^2/s]
+};
+
 struct ARB_SYMBOL_VISIBLE init_ext_concentration {
     std::string ion = "";
     double value = NAN; // [mM]
@@ -226,6 +233,7 @@ using paintable =
                  axial_resistivity,
                  temperature_K,
                  membrane_capacitance,
+                 ion_diffusivity,
                  init_int_concentration,
                  init_ext_concentration,
                  init_reversal_potential,
@@ -242,6 +250,7 @@ using defaultable =
                  axial_resistivity,
                  temperature_K,
                  membrane_capacitance,
+                 ion_diffusivity,
                  init_int_concentration,
                  init_ext_concentration,
                  init_reversal_potential,
@@ -313,17 +322,18 @@ struct ARB_SYMBOL_VISIBLE cable_cell_global_properties {
     cable_cell_parameter_set default_parameters;
 
     // Convenience methods for adding a new ion together with default ion values.
-    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, double init_revpot) {
+    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, double init_revpot, double diffusivity=0.0) {
         ion_species[ion_name] = charge;
 
         auto &ion_data = default_parameters.ion_data[ion_name];
-        ion_data.init_int_concentration = init_iconc;
-        ion_data.init_ext_concentration = init_econc;
+        ion_data.init_int_concentration  = init_iconc;
+        ion_data.init_ext_concentration  = init_econc;
         ion_data.init_reversal_potential = init_revpot;
+        ion_data.diffusivity             = diffusivity;
     }
 
-    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, mechanism_desc revpot_mechanism) {
-        add_ion(ion_name, charge, init_iconc, init_econc, 0);
+    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, mechanism_desc revpot_mechanism, double diffusivity=0.0) {
+        add_ion(ion_name, charge, init_iconc, init_econc, 0, diffusivity);
         default_parameters.reversal_potential_method[ion_name] = std::move(revpot_mechanism);
     }
 };
diff --git a/arbor/include/arbor/mechanism_abi.h b/arbor/include/arbor/mechanism_abi.h
index 2998bc76..5c4f2a78 100644
--- a/arbor/include/arbor/mechanism_abi.h
+++ b/arbor/include/arbor/mechanism_abi.h
@@ -9,8 +9,8 @@ extern "C" {
 
 // Version
 #define ARB_MECH_ABI_VERSION_MAJOR 0
-#define ARB_MECH_ABI_VERSION_MINOR 1
-#define ARB_MECH_ABI_VERSION_PATCH 0
+#define ARB_MECH_ABI_VERSION_MINOR 2
+#define ARB_MECH_ABI_VERSION_PATCH 1
 #define ARB_MECH_ABI_VERSION ((ARB_MECH_ABI_VERSION_MAJOR * 10000L * 10000L) + (ARB_MECH_ABI_VERSION_MAJOR * 10000L) + ARB_MECH_ABI_VERSION_PATCH)
 
 typedef const char* arb_mechanism_fingerprint;
@@ -40,9 +40,11 @@ inline const char* arb_mechanism_kind_str(const arb_mechanism_kind& mech) {
 // Ion state variables; view into shared_state
 typedef struct arb_ion_state {
     arb_value_type* current_density;
+    arb_value_type* conductivity;
     arb_value_type* reversal_potential;
     arb_value_type* internal_concentration;
     arb_value_type* external_concentration;
+    arb_value_type* diffusive_concentration;
     arb_value_type* ionic_charge;
     arb_index_type* index;
 } arb_ion_state;
@@ -181,6 +183,7 @@ typedef struct arb_ion_info {
     const char* name;
     bool write_int_concentration;
     bool write_ext_concentration;
+    bool use_diff_concentration;
     bool write_rev_potential;
     bool read_rev_potential;
     bool read_valence;
diff --git a/arbor/include/arbor/mechinfo.hpp b/arbor/include/arbor/mechinfo.hpp
index 39678e4c..859337bd 100644
--- a/arbor/include/arbor/mechinfo.hpp
+++ b/arbor/include/arbor/mechinfo.hpp
@@ -36,6 +36,8 @@ struct ion_dependency {
     bool write_concentration_int = false;
     bool write_concentration_ext = false;
 
+    bool access_concentration_diff = false;
+
     bool read_reversal_potential = false;
     bool write_reversal_potential = false;
 
diff --git a/arbor/include/arbor/simd/avx.hpp b/arbor/include/arbor/simd/avx.hpp
index 2aeaed9f..1724021d 100644
--- a/arbor/include/arbor/simd/avx.hpp
+++ b/arbor/include/arbor/simd/avx.hpp
@@ -3,7 +3,6 @@
 // AVX/AVX2 SIMD intrinsics implementation.
 
 #ifdef __AVX__
-
 #include <cmath>
 #include <cstdint>
 #include <immintrin.h>
diff --git a/arbor/include/arbor/simd/implbase.hpp b/arbor/include/arbor/simd/implbase.hpp
index 54611088..4c3766b2 100644
--- a/arbor/include/arbor/simd/implbase.hpp
+++ b/arbor/include/arbor/simd/implbase.hpp
@@ -414,7 +414,6 @@ struct implbase {
     static void scatter(tag<ImplIndex>, const vector_type& s, scalar_type* p, const typename ImplIndex::vector_type& index) {
         typename ImplIndex::scalar_type o[width];
         ImplIndex::copy_to(index, o);
-
         store a;
         I::copy_to(s, a);
 
diff --git a/arbor/include/arbor/simple_sampler.hpp b/arbor/include/arbor/simple_sampler.hpp
index ca227fee..196f1f11 100644
--- a/arbor/include/arbor/simple_sampler.hpp
+++ b/arbor/include/arbor/simple_sampler.hpp
@@ -24,7 +24,7 @@ struct trace_entry {
 // `trace_data` wraps a std::vector of trace_entry with
 // a copy of the probe-specific metadata associated with a probe.
 //
-// If `Meta` is void, ignore any metadta.
+// If `Meta` is void, ignore any metadata.
 
 template <typename V, typename Meta = void>
 struct trace_data: private std::vector<trace_entry<V>> {
diff --git a/arbor/matrix.hpp b/arbor/matrix.hpp
index 3bc7a54b..3b69db50 100644
--- a/arbor/matrix.hpp
+++ b/arbor/matrix.hpp
@@ -7,6 +7,8 @@
 #include <memory/memory.hpp>
 #include <util/span.hpp>
 
+#include <arbor/arb_types.hpp>
+
 namespace arb {
 
 /// Hines matrix
@@ -15,76 +17,46 @@ namespace arb {
 template<class Backend, class State=typename Backend::matrix_state>
 class matrix {
 public:
-    using backend = Backend;
-
-    // define basic types
-    using value_type = typename backend::value_type;
-    using index_type = typename backend::index_type;
-    using size_type = typename backend::size_type;
-
-    // define storage types
-    using array = typename backend::array;
-    using iarray = typename backend::iarray;
-
-    // back end specific storage for matrix state
-    using state = State;
+    using backend    = Backend;
+    using array      = typename backend::array;
+    using iarray     = typename backend::iarray;
+    using const_view = const array&;
+    using state      = State;                         // backend specific storage for matrix state
 
     matrix() = default;
 
-    matrix(const std::vector<index_type>& pi,
-           const std::vector<index_type>& ci,
-           const std::vector<value_type>& cv_capacitance,
-           const std::vector<value_type>& face_conductance,
-           const std::vector<value_type>& cv_area,
-           const std::vector<index_type>& cell_to_intdom):
-        parent_index_(pi.begin(), pi.end()),
-        cell_index_(ci.begin(), ci.end()),
-        cell_to_intdom_(cell_to_intdom.begin(), cell_to_intdom.end()),
+    matrix(const std::vector<arb_index_type>& pi,
+           const std::vector<arb_index_type>& ci,
+           const std::vector<arb_value_type>& cv_capacitance,
+           const std::vector<arb_value_type>& face_conductance,
+           const std::vector<arb_value_type>& cv_area,
+           const std::vector<arb_index_type>& cell_to_intdom):
+        num_cells_{ci.size() - 1},
         state_(pi, ci, cv_capacitance, face_conductance, cv_area, cell_to_intdom)
     {
-        arb_assert(cell_index_[num_cells()] == index_type(parent_index_.size()));
+        arb_assert(cell_index()[num_cells()] == arb_index_type(parent_index().size()));
     }
 
     /// the dimension of the matrix (i.e. the number of rows or colums)
-    std::size_t size() const {
-        return parent_index_.size();
-    }
-
+    std::size_t size() const { return state_.size(); }
     /// the number of cell matrices that have been packed together
-    size_type num_cells() const {
-        return cell_index_.size() - 1;
-    }
-
+    std::size_t num_cells() const { return num_cells_; }
     /// the vector holding the parent index
-    const iarray& p() const { return parent_index_; }
-
+    const iarray& parent_index() const { return state_.parent_index; }
     /// the partition of the parent index over the cells
-    const iarray& cell_index() const { return cell_index_; }
-
+    const iarray& cell_index() const { return state_.cell_cv_divs; }
     /// Solve the linear system into a given solution storage.
-    void solve(array& to) {
-        state_.solve(to);
-    }
-
+    void solve(array& to) { state_.solve(to); }
     /// Assemble the matrix for given dt
-    void assemble(const array& dt_cell, const array& voltage, const array& current, const array& conductivity) {
-        state_.assemble(dt_cell, voltage, current, conductivity);
-    }
+    void assemble(const_view& dt, const_view& U, const_view& I, const_view& g) { state_.assemble(dt, U, I, g); }
 
 private:
-    /// the parent indice that describe matrix structure
-    iarray parent_index_;
-
-    /// indexes that point to the start of each cell in the index
-    iarray cell_index_;
-
-    /// indexes that point to the index of the integration domain
-    iarray cell_to_intdom_;
+   std::size_t num_cells_ = 0;
 
 public:
-    // Provide via public interface to make testing much easier.
-    // If you modify this directly without knowing what you are doing,
-    // you get what you deserve.
+    // Provide via public interface to make testing much easier. If you modify
+    // this directly without knowing what you are doing, you get what you
+    // deserve.
     state state_;
 };
 
diff --git a/arbor/mechinfo.cpp b/arbor/mechinfo.cpp
index b2dbd7d0..abb54ba7 100644
--- a/arbor/mechinfo.cpp
+++ b/arbor/mechinfo.cpp
@@ -24,6 +24,7 @@ mechanism_info::mechanism_info(const arb_mechanism_type& m) {
         const auto& v = m.ions[idx];
         ions[v.name] = { v.write_int_concentration,
         v.write_ext_concentration,
+        v.use_diff_concentration,
         v.read_rev_potential,
         v.write_rev_potential,
         v.read_valence,
diff --git a/arborio/cableio.cpp b/arborio/cableio.cpp
index e06d81c1..2727788b 100644
--- a/arborio/cableio.cpp
+++ b/arborio/cableio.cpp
@@ -57,6 +57,9 @@ s_expr mksexp(const init_ext_concentration& c) {
 s_expr mksexp(const init_reversal_potential& c) {
     return slist("ion-reversal-potential"_symbol, s_expr(c.ion), c.value);
 }
+s_expr mksexp(const ion_diffusivity& c) {
+    return slist("ion-diffusivity"_symbol, s_expr(c.ion), c.value);
+}
 s_expr mksexp(const i_clamp& c) {
     std::vector<s_expr> evlps;
     std::transform(c.envelope.begin(), c.envelope.end(), std::back_inserter(evlps),
@@ -201,7 +204,7 @@ using version_tuple  = std::tuple<std::string>;
 #define ARBIO_DEFINE_DOUBLE_ARG(name) arb::name make_##name(const std::string& ion, double val) { return arb::name{ion, val};}
 
 ARB_PP_FOREACH(ARBIO_DEFINE_SINGLE_ARG, init_membrane_potential, temperature_K, axial_resistivity, membrane_capacitance, threshold_detector)
-ARB_PP_FOREACH(ARBIO_DEFINE_DOUBLE_ARG, init_int_concentration, init_ext_concentration, init_reversal_potential)
+ARB_PP_FOREACH(ARBIO_DEFINE_DOUBLE_ARG, init_int_concentration, init_ext_concentration, init_reversal_potential, ion_diffusivity)
 
 std::vector<arb::i_clamp::envelope_point> make_envelope(const std::vector<std::variant<envelope_tuple>>& vec) {
     std::vector<arb::i_clamp::envelope_point> envlp;
@@ -582,6 +585,8 @@ eval_map named_evals{
         "'ion_internal_concentration' with 2 arguments (ion:string val:real)")},
     {"ion-external-concentration", make_call<std::string, double>(make_init_ext_concentration,
         "'ion_external_concentration' with 2 arguments (ion:string val:real)")},
+    {"ion-diffusivity", make_call<std::string, double>(make_ion_diffusivity,
+        "'ion_diffusivity' with 2 arguments (ion:string val:real)")},
     {"ion-reversal-potential", make_call<std::string, double>(make_init_reversal_potential,
         "'ion_reversal_potential' with 2 arguments (ion:string val:real)")},
     {"envelope", make_arg_vec_call<envelope_tuple>(make_envelope,
@@ -614,6 +619,7 @@ eval_map named_evals{
     {"paint", make_call<region, axial_resistivity>(make_paint, "'paint' with 2 arguments (reg:region v:axial-resistivity)")},
     {"paint", make_call<region, init_int_concentration>(make_paint, "'paint' with 2 arguments (reg:region v:ion-internal-concentration)")},
     {"paint", make_call<region, init_ext_concentration>(make_paint, "'paint' with 2 arguments (reg:region v:ion-external-concentration)")},
+    {"paint", make_call<region, ion_diffusivity>(make_paint, "'paint' with 2 arguments (reg:region v:ion-diffusivity)")},
     {"paint", make_call<region, init_reversal_potential>(make_paint, "'paint' with 2 arguments (reg:region v:ion-reversal-potential)")},
     {"paint", make_call<region, density>(make_paint, "'paint' with 2 arguments (reg:region v:density)")},
 
@@ -623,6 +629,7 @@ eval_map named_evals{
     {"default", make_call<axial_resistivity>(make_default, "'default' with 1 argument (v:axial-resistivity)")},
     {"default", make_call<init_int_concentration>(make_default, "'default' with 1 argument (v:ion-internal-concentration)")},
     {"default", make_call<init_ext_concentration>(make_default, "'default' with 1 argument (v:ion-external-concentration)")},
+    {"default", make_call<ion_diffusivity>(make_default, "'default' with 1 argument (v:ion-diffusivity)")},
     {"default", make_call<init_reversal_potential>(make_default, "'default' with 1 argument (v:ion-reversal-potential)")},
     {"default", make_call<ion_reversal_potential_method>(make_default, "'default' with 1 argument (v:ion-reversal-potential-method)")},
     {"default", make_call<cv_policy>(make_default, "'default' with 1 argument (v:cv-policy)")},
diff --git a/doc/concepts/decor.rst b/doc/concepts/decor.rst
index 3b15a827..f817193a 100644
--- a/doc/concepts/decor.rst
+++ b/doc/concepts/decor.rst
@@ -188,6 +188,7 @@ Each ion species has the following properties:
 2. *external concentration*: concentration on exterior of the membrane [mM].
 3. *reversal potential*: reversal potential [mV].
 4. *reversal potential mechanism*:  method for calculating reversal potential.
+5. *diffusivity*: diffusion coefficient for marker concentration, defaults to zero [m^2/s].
 
 Properties 1, 2 and 3 must be defined, and are used as the initial values for
 each quantity at the start of the simulation. They are specified globally,
@@ -256,6 +257,29 @@ using the *paint* interface:
     # Alternatively, one can selectively overwrite the global defaults.
     decor.paint('(tag 2)', arbor.ion('ca', rev_pot=126)
 
+To enable diffusion of ion species along the morphology (axial diffusion) one
+sets the per-species diffusivity to a positive value. It can be changed per
+region and defaults to zero. This is strictly passive transport according to the
+diffusion equation ``X' = ÃŸ âˆ†X`` where ``X`` is the species' diffusive
+concentration and ``ÃŸ`` the diffusivity constant.
+
+.. code-block:: Python
+
+    decor = arbor.decor()
+    decor.set_ion('ca', diff=23.0)
+    decor.paint('"region"', 'ca', diff=42.0)
+
+Be aware of the consequences of setting ``ÃŸ > 0`` only in some places, namely
+pile-up effects similar to reflective bounds.
+
+The diffusive concentration is *separate* from the internal concentration for
+reasons innate to the cable model, which require resetting it to its starting
+point at every timestep. It can be accessed from NMODL density and point
+mechanisms as an independent quantity, see :ref:`NMODL mechanism <nmodl>`. It is
+present on the full morphology if its associated diffusivity is set to a
+non-zero value on any subset of the morphology, ie ``region``. It is initialised
+to the value of the internal concentration at time zero.
+
 .. _cablecell-place:
 
 Placed dynamics
diff --git a/doc/dev/axial-diff.tex b/doc/dev/axial-diff.tex
new file mode 100644
index 00000000..de4d178f
--- /dev/null
+++ b/doc/dev/axial-diff.tex
@@ -0,0 +1,203 @@
+% Created 2022-05-04 Wed 13:36
+% Intended LaTeX compiler: pdflatex
+\documentclass[a4paper]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage{graphicx}
+\usepackage{longtable}
+\usepackage{wrapfig}
+\usepackage{rotating}
+\usepackage[normalem]{ulem}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{capt-of}
+\usepackage{hyperref}
+\author{Thorsten Hater}
+\date{\today}
+\title{Axial Diffusion of Ionic Species in Arbor}
+\hypersetup{
+ pdfauthor={Thorsten Hater},
+ pdftitle={Axial Diffusion of Ionic Species in Arbor},
+ pdfkeywords={},
+ pdfsubject={},
+ pdfcreator={Emacs 28.1.50 (Org mode 9.6)}, 
+ pdflang={English}}
+\usepackage{biblatex}
+
+\begin{document}
+
+\maketitle
+\tableofcontents
+
+
+\section{Introduction}
+\label{sec:org8cd0584}
+
+Arbor's model of the neurite -- and similarly that of cable theory in general --
+is valid in a narrow region around the membrane, ie \(r_{m} - \epsilon \leq r
+\leq r_{m} - \epsilon\). Therefore, the state variables of the model -- in
+particular ionic concentrations and currents -- are defined only in said small
+region.
+
+For treating diffusion of ionic species along the axial direction of the neuron,
+however, we will either need to assume a radially homogeneous distribution of
+ions inside the cell plasma or resort to a more detailed model of the general
+dynamics.
+
+Therefore, in the axial diffusion model adopted by Arbor, we are going to assume
+that the concentration \(M_s\) of an ionic species \(s\) behaves like \(M_s(x, r,
+\theta) = M_s(x)\) and is defined on the full inside of the neuron \(0 \leq r <
+r_m\). Furthermore, in contrast to traditional cable theory, we will take into
+account the effect trans-membrane currents on the internal concentration when
+modelling diffusion, ie
+
+\begin{align*}
+\partial_{t} M_{s} = \nabla \cdot (D_{s} \nabla M_{s}) + I_{m,s}
+\end{align*}
+
+This makes the axial diffusion model richer than the plain cable theory approach
+as implemented in Arbor so far. In particular the cable model assumes internal
+(and external) concentrations to be defined in a thin shell around the membrane
+and to be buffered by a far larger volume away from that shell. Thus, these
+values are reset to the buffer value infinitely fast, ie every time step in
+Arbor, unless managed by a concentration model density mechanism. However, this
+makes diffusion impossible as mechanisms cannot not exchange data across CVs.
+Therefore, finally!, we chose to add a new quantity \texttt{Xd} that is not buffered
+and serves as a diffusing proxy concentration that can be coupled by a mechanism
+to the internal concentration if needed. This begets an awkward, but workable,
+interface with ODE models of \texttt{Xd} in NMODL, see the relevant documentation.
+
+This proxy will be initialised as \texttt{Xd = Xi} and present on the full morphology
+iff \(\D_{s} > 0\) on any non-vanishing subset of the morphology.
+
+\section{Derivation and FVM}
+\label{sec:org06e2271}
+
+The cable equation is the foundation of Arbor's model of the neuron
+\begin{align*}
+    c_{m} \partial_{t} V = \frac{1}{2 \pi a} \partial_{x} \left(\frac{\pi a^{2}}{R_{L}} \partial_{x} V\right) + i_{m}
+\end{align*}
+We want to solve the diffusion equation, excluding currents for now,
+\begin{align*}
+\partial_{t} M = \nabla \cdot (D \nabla M)
+\end{align*}
+by application of the finite volume method (as for the cable equation). That is,
+we average over the control volume (CV) \(V\) to rewrite in terms of the average
+concentration \(\bar{M}\) and apply Stoke's theorem on the right hand side. Note
+that we suppress the species subscript \(s\) for clarity where obvious from
+context or not relevant to the discussion. Thus
+\begin{align*}
+\partial_{t} \bar M_{i} = \frac{1}{\Delta V_{i}}\int\limits_{\partial V} \mathrm{d}a \cdot D \nabla M
+\end{align*}
+For linking to NMODL, note that \(M\) is identified with \texttt{Xd} in this discussion.
+Note that \(\nabla \sim \partial_{x}\) in our 1.5d model.
+
+Next, we define the fluxes
+\begin{align*}
+F_{ij} = \sigma_{ij}(\bar M_{i} - \bar M_{j})
+\end{align*}
+where \(\sigma_{ij}\) is the contact area of segments \(i\) and \(j\).
+
+Then we apply an implicit Euler time-step
+\begin{align*}
+\frac{1}{\Delta t} \left(M^{k+1} - M^{k}\right) = \sum_{j}\frac{\sigma_{ij}}{P_{L}\Delta x_{ij}}\eta_{ij} \left(M_{i}^{k+1} - M_{j}^{k+1}\right)
+\end{align*}
+where \(\eta_{ij} = 1\) if \(i\) and \(j\) are neighbours and zero otherwise. We introduced
+\(P_L = P \cdot A/l\) where \(P = 1/D\) in analogy with \(R_{L}\) ('\emph{specific diffusive
+resistivity}').
+
+This can be rewritten to expose the structure of the linear system more clearly
+\begin{align*}
+\frac{1}{\Delta t}M_{i}^{k}= \frac{1}{\Delta t}M_{i}^{k+1} + \sum\limits_{j}\beta_{ij} \left(M_{i}^{k+1} - M_{j}^{k+1} \right)
+\end{align*}
+where
+\begin{align*}
+\beta_{ij} = \frac{\sigma_{ij}}{P_{L} \Delta x_{ij}}\eta_{ij}
+\end{align*}
+
+\section{Treatment of Trans-Membrane Ion Flux}
+\label{sec:org0eeed52}
+
+Finally, we add back the ionic currents to the concentration changes to arrive at
+\begin{align*}
+\frac{1}{\Delta t}M_{i}^{k} = \left(\frac{1}{\Delta t}
+ + \sum\limits_{j}\beta_{ij}\right) M_{i}^{k+1} - \sum\limits_{j}\beta_{ij}M_{j}^{k+1} + \frac{\sigma_{i}}{q I_m(V^{k+1
+})
+\end{align*}
+where we used the CV surface \(\sigma_{i}\), the ion species' current \(I_m\) through
+\(\sigma_{i}\) and its charge \(q\).
+
+We follow Arbor's model and write Ohm's law
+\begin{align*}
+g = \frac{\partial I_m(V)}{\partial V} = \frac{\partial t}{\partial V}\frac{\partial I_m}{\partial t}
+\end{align*}
+where \(g\) is the \emph{per species conductivity} and approximately we have
+\begin{align*}
+I_m^{k+1} - I_m^{k} = g\left(V^{k+1} - V^{k}\right)
+\end{align*}
+Inserting this into the diffusion equation and rearranging to separate
+\(\cdot^{k}\) and \(\cdot^{k+1}\) terms
+\begin{align*}
+\frac{1}{\Delta t}M_{i}^{k} - \frac{\sigma_{i}}{q}\left(I_m^k - gV^k\right)= \left(\frac{1}{\Delta t}
+ + \sum\limits_{j}\beta_{ij}\right) M_{i}^{k+1} - \sum\limits_{j}\beta_{ij}M_{j}^{k+1} + \frac{\sigma_{i}g}{q} V^{k+1}
+\end{align*}
+
+In \texttt{fvm\_discretize} we compute \(\alpha_{ij}\) -- the analogue for \(\beta_{ij}\) in the cable
+equation -- called \texttt{face\_conductance} in code
+\begin{verbatim}
+mcable span{bid, parent_refpt, cv_refpt};
+double resistance =
+    embedding.integrate_ixa(span,
+                            D.axial_resistivity[0].at(bid));
+D.face_conductance[i] = 100/resistance; // 100 scales to ÂµS.
+\end{verbatim}
+
+\section{Units}
+\label{sec:org4ff2b42}
+
+We have the following units preset for us
+
+\begin{center}
+\begin{tabular}{llll}
+\hline
+Quantity & Symbol & Unit & SI\\
+\hline
+Concentration & \(M\) & \(m\mathrm{mol}/l\) & \(10^{-6} \mathrm{mol}/m^{3}\)\\
+Length & \(x\) & \(\mu m\) & \(10^{-6} m\)\\
+Area & \(A\) & \((\mu m)^{2}\) & \(10^{-12} m^{2}\)\\
+Time & \(t\) & \(ms\) & \(10^{-3} s\)\\
+\hline
+\end{tabular}
+\end{center}
+
+From this we need to fix units for
+\begin{itemize}
+\item fluxes \(F\)
+\item diffusivity \(D\) and \(D_{L}\)
+\item diffusive resistivity \(P\) and \(P_{L}\)
+\end{itemize}
+
+We start with \(D\) and apply dimensional analysis to the diffusion equation to note
+\begin{align*}
+[M]/[T] &= [M][D]/[L^{2}]\\
+\end{align*}
+which results in
+\begin{align*}
+[D] &= [L^{2}][T]^{-1}\\
+&= (\mu m)^{2}/ms = 10^{-9} m^{2}/ s
+\end{align*}
+in Arbor's internal units.
+Similarly, we rewrite the diffusion equation in its abstract form
+\begin{align*}
+\partial_{t} M = \nabla \cdot F
+\end{align*}
+to arrive at
+\begin{align*}
+[M]/[T] &= [F]/[L]\\
+\Leftrightarrow [F] &= [L][M][T]^{-1}\\
+\Rightarrow [F] &= 10^{-9} \mathrm{mol}/(m^{2} s)
+\end{align*}
+
+From Yasuda et al we have \([D_{eff}] = 10^{-12}m^{2}/s\) which fits our derivation
+for \(D\), expect for their use of \([T] = s\) as opposed to \([T]=ms\).
+\end{document}
diff --git a/doc/fileformat/nmodl.rst b/doc/fileformat/nmodl.rst
index 5175f0dd..2829521f 100644
--- a/doc/fileformat/nmodl.rst
+++ b/doc/fileformat/nmodl.rst
@@ -56,6 +56,7 @@ ion X current (point and junction mechanisms)    iX
 ion X reversal potential                         eX                                                   mV
 ion X internal concentration                     Xi                                                   mmol/L
 ion X external concentration                     Xo                                                   mmol/L
+ion X diffusive concentration                    Xd                                                   mmol/L
 ===============================================  ===================================================  ==========
 
 Ions
@@ -72,12 +73,22 @@ Ions
   ``PARAMETER``, ``ASSIGNED`` or ``CONSTANT``.
 * ``READ`` and ``WRITE`` permissions of ``Xi``, ``Xo``, ``eX`` and ``iX`` can be set
   in NMODL in the ``NEURON`` block. If a parameter is writable it is automatically
-  readable and doesn't need to be specified as both.
-* If ``Xi``, ``Xo``, ``eX``, ``iX`` are used in a ``PROCEDURE`` or ``FUNCTION``,
+  readable and must not be specified as both.
+* If ``Xi``, ``Xo``, ``eX``, ``iX``, ``Xd`` are used in a ``PROCEDURE`` or ``FUNCTION``,
   they need to be passed as arguments.
 * If ``Xi`` or ``Xo`` (internal and external concentrations) are written in the
-  NMODL mechanism they need to be declared as ``STATE`` variables and their initial
-  values have to be set in the ``INITIAL`` block in the mechanism.
+  NMODL mechanism they need to be declared as ``STATE`` variables and their
+  initial values have to be set in the ``INITIAL`` block in the mechanism. This
+  transfers **all** responsibility for handling ``Xi`` / ``Xo`` to the mechanism
+  and will lead to painted initial values to be ignored. If these quantities are
+  not made ``STATE`` they may be written to, but their values will be reset to
+  their initial values every time step.
+* The diffusive concentration ``Xd`` does not share this semantics. It will not
+  be reset, even if not in ``STATE``, and may freely be written. This comes at the
+  cost of awkward treatment of ODEs for ``Xd``, see the included ``decay.mod`` for
+  an example.
+* ``Xd`` is present on all cables iff its associated diffusivity is set to a
+  non-zero value.
 
 Special variables
 -----------------
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 7a654b4a..752737ec 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -12,3 +12,4 @@ add_subdirectory(gap_junctions)
 add_subdirectory(single)
 add_subdirectory(probe-demo)
 add_subdirectory(lfp)
+add_subdirectory(diffusion)
diff --git a/example/diffusion/CMakeLists.txt b/example/diffusion/CMakeLists.txt
new file mode 100644
index 00000000..687914b7
--- /dev/null
+++ b/example/diffusion/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(diffusion EXCLUDE_FROM_ALL diffusion.cpp)
+add_dependencies(examples diffusion)
+target_link_libraries(diffusion PRIVATE arbor arborio arborenv arbor-sup ext-tinyopt)
diff --git a/example/diffusion/README.md b/example/diffusion/README.md
new file mode 100644
index 00000000..f570e33f
--- /dev/null
+++ b/example/diffusion/README.md
@@ -0,0 +1,18 @@
+# Diffusion example.
+
+Example of simulating a simple linear neuron with a diffusing sodium
+concentration. An event injecting more Na into the centre is fired at t=0. The
+measured concentrations will be written to disk further analysis.
+
+ |    |  Option  | Meaning                         | Default     |
+ |----|----------|---------------------------------|-------------|
+ | -t | --tfinal | Length of the simulation period | 1 ms        |
+ | -d | --dt     | Simulation time step            | 0.01 ms     |
+ | -s | --ds     | Sampling interval               | 0.1 ms      |
+ | -g | --gpu    | Use GPU id, enabled if >=0      | -1          |
+ | -l | --length | Length of stick                 | 30 um       |
+ | -x | --dx     | Discretisation                  | 1 um        |
+ | -i | --Xi     | Initial Na concentration        | 0 mM        |
+ | -b | --beta   | Na diffusivity                  | 0.005 m^2/s |
+ | -o | --output | Save samples                    | log.csv     |
+ 
diff --git a/example/diffusion/diffusion.cpp b/example/diffusion/diffusion.cpp
new file mode 100644
index 00000000..e8f94651
--- /dev/null
+++ b/example/diffusion/diffusion.cpp
@@ -0,0 +1,126 @@
+#include <any>
+#include <iomanip>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <tinyopt/tinyopt.h>
+
+#include <arborio/label_parse.hpp>
+
+#include <arbor/load_balance.hpp>
+#include <arbor/cable_cell.hpp>
+#include <arbor/morph/morphology.hpp>
+#include <arbor/morph/segment_tree.hpp>
+#include <arbor/simulation.hpp>
+#include <arbor/sampling.hpp>
+#include <arbor/util/any_cast.hpp>
+#include <arbor/util/any_ptr.hpp>
+
+using namespace arborio::literals;
+using namespace arb;
+
+struct linear: public recipe {
+     linear(double ext, double dx, double Xi, double beta): l{ext}, d{dx}, i{Xi}, b{beta} {
+        gprop.default_parameters = neuron_parameter_defaults;
+        gprop.default_parameters.discretization = cv_policy_max_extent{d};
+    }
+
+    cell_size_type num_cells()                                   const override { return 1; }
+    cell_kind get_cell_kind(cell_gid_type)                       const override { return cell_kind::cable; }
+    std::any get_global_properties(cell_kind)                    const override { return gprop; }
+    std::vector<probe_info> get_probes(cell_gid_type)            const override { return {cable_probe_ion_diff_concentration_cell{"na"}}; }
+    std::vector<event_generator> event_generators(cell_gid_type) const override { return {explicit_generator({{{"Zap"}, 0.0, 0.005}})}; }
+    util::unique_any get_cell_description(cell_gid_type)         const override {
+        // Stick morphology
+        // -----|-----
+        segment_tree tree;
+        tree.append(mnpos, { -l, 0, 0, 3}, {l, 0, 0, 3}, 1);
+        // Setup
+        decor decor;
+        decor.set_default(init_int_concentration{"na", i});
+        decor.set_default(ion_diffusivity{"na", b});
+        decor.place("(location 0 0.5)"_ls, synapse("inject/x=na", {{"alpha", 200.0*l}}), "Zap");
+        decor.paint("(all)"_reg, density("decay/x=na"));
+        return cable_cell({tree}, {}, decor);
+    }
+
+    cable_cell_global_properties gprop;
+    double l, d, i, b;
+};
+
+std::ofstream out;
+
+void sampler(probe_metadata pm, std::size_t n, const sample_record* samples) {
+    auto ptr = util::any_cast<const mcable_list*>(pm.meta);
+    assert(ptr);
+    auto n_cable = ptr->size();
+    out << "time,prox,dist,Xd\n"
+        << std::fixed << std::setprecision(4);
+    for (std::size_t i = 0; i<n; ++i) {
+        const auto& [val, _ig] = *util::any_cast<const cable_sample_range*>(samples[i].data);
+        for (unsigned j = 0; j<n_cable; ++j) {
+            mcable loc = (*ptr)[j];
+            out << samples[i].time << ',' << loc.prox_pos << ',' << loc.dist_pos << ',' << val[j] << '\n';
+        }
+    }
+    out << '\n';
+}
+
+struct opt_t {
+    double L  = 30.0;
+    double dx = 1.0;
+    double T  = 1.0;
+    double dt = 0.01;
+    double ds = 0.1;
+    double Xi = 0.0;
+    double dX = 0.005;
+    std::string out = "log.csv";
+    int gpu = -1;
+};
+
+opt_t read_options(int argc, char** argv) {
+    auto usage = "\n"
+                 "  -t|--tfinal     [Length of the simulation period (1 ms)]\n"
+                 "  -d|--dt         [Simulation time step (0.01 ms)]\n"
+                 "  -s|--ds         [Sampling interval (0.1 ms)]\n"
+                 "  -g|--gpu        [Use GPU id (-1); enabled if >=0]\n"
+                 "  -l|--length     [Length of stick (30 um)]\n"
+                 "  -x|--dx         [Discretisation (1 um)]\n"
+                 "  -i|--Xi         [Initial Na concentration (0 mM)]\n"
+                 "  -b|--beta       [Na diffusivity (0.005 m^2/s)]\n"
+                 "  -o|--output     [Save samples (log.csv)]\n";
+    auto help = [argv, &usage] { to::usage(argv[0], usage); };
+    opt_t opt;
+    to::option options[] = {{opt.T,                                "-t", "--tfinal"},
+                            {opt.dt,                               "-d", "--dt"},
+                            {opt.ds,                               "-s", "--ds"},
+                            {opt.L,                                "-l", "--length"},
+                            {opt.dx,                               "-x", "--dx"},
+                            {opt.Xi,                               "-i", "--Xi"},
+                            {opt.dX,                               "-b", "--beta"},
+                            {opt.gpu,                              "-g", "--gpu"},
+                            {opt.out,                              "-o", "--out"},
+                            {to::action(help), to::flag, to::exit, "-h", "--help"}};
+    if (!to::run(options, argc, argv+1)) return opt_t{};
+    if (argv[1])          throw to::option_error("Unrecognized argument", argv[1]);
+    if (opt.dt <= 0.0)    throw std::runtime_error("Time step must be positive!");
+    if (opt.ds <= 0.0)    throw std::runtime_error("Sampling interval must be positive!");
+    if (opt.ds <  opt.dt) throw std::runtime_error("Time step is greater than a sampling interval!");
+    if (opt.T  <= opt.ds) throw std::runtime_error("Runtime is less than a sampling interval!");
+    if (opt.dX <= 0.0)    throw std::runtime_error("Diffusivity must be positive!");
+    return opt;
+}
+
+int main(int argc, char** argv) {
+    auto O = read_options(argc, argv);
+    out = std::ofstream{O.out};
+    if (!out.good()) throw std::runtime_error("Could not open output file for writing.");
+    auto C = make_context({1, O.gpu});
+    auto R = linear{O.L, O.dx, O.Xi, O.dX};
+    simulation S(R, partition_load_balance(R, C), C);
+    S.add_sampler(all_probes, regular_schedule(O.ds), sampler);
+    S.run(O.T, O.dt);
+    out.close();
+}
diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt
index 976304bc..97dbf7e3 100644
--- a/mechanisms/CMakeLists.txt
+++ b/mechanisms/CMakeLists.txt
@@ -27,7 +27,7 @@ make_catalogue(
   NAME default
   SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/default"
   OUTPUT "CAT_DEFAULT_SOURCES"
-  MOD exp2syn expsyn expsyn_curr expsyn_stdp hh kamt kdrmt nax nernst pas gj
+  MOD exp2syn expsyn expsyn_curr expsyn_stdp hh kamt kdrmt nax nernst pas gj decay inject
   CXX
   PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"
   CXX_FLAGS_TARGET "${ARB_CXX_FLAGS_TARGET_FULL}"
diff --git a/mechanisms/default/decay.mod b/mechanisms/default/decay.mod
new file mode 100644
index 00000000..a380d5fd
--- /dev/null
+++ b/mechanisms/default/decay.mod
@@ -0,0 +1,18 @@
+NEURON {
+    SUFFIX decay
+    USEION x WRITE xd, ix
+}
+
+INITIAL { F = xd }
+
+STATE { F }
+
+BREAKPOINT {
+   SOLVE dF METHOD cnexp
+   xd = F
+}
+
+DERIVATIVE dF {
+   F = xd
+   F' = -5*F
+}
diff --git a/mechanisms/default/inject.mod b/mechanisms/default/inject.mod
new file mode 100644
index 00000000..39289576
--- /dev/null
+++ b/mechanisms/default/inject.mod
@@ -0,0 +1,20 @@
+NEURON {
+  POINT_PROCESS inject
+  USEION x WRITE xd, ix
+  RANGE alpha, beta
+}
+
+ASSIGNED { beta }
+
+PARAMETER { alpha = 200 }
+
+INITIAL { beta = 0 }
+
+BREAKPOINT {
+    xd = xd + beta
+    beta = 0
+}
+
+NET_RECEIVE(weight) {
+    beta = alpha*weight
+}
diff --git a/modcc/blocks.hpp b/modcc/blocks.hpp
index 495a47ac..ff0f0b13 100644
--- a/modcc/blocks.hpp
+++ b/modcc/blocks.hpp
@@ -32,6 +32,9 @@ struct IonDep {
     bool uses_concentration_int() const {
         return has_variable(name + "i");
     };
+    bool uses_concentration_diff() const {
+        return has_variable(name + "d");
+    };
     bool uses_concentration_ext() const {
         return has_variable(name + "o");
     };
diff --git a/modcc/identifier.hpp b/modcc/identifier.hpp
index dd31222f..47c835f0 100644
--- a/modcc/identifier.hpp
+++ b/modcc/identifier.hpp
@@ -49,7 +49,9 @@ enum class sourceKind {
     dt,
     time,
     ion_current,
+    ion_conductivity,
     ion_current_density,
+    ion_diffusive,
     ion_revpot,
     ion_iconc,
     ion_econc,
@@ -95,6 +97,7 @@ inline std::string to_string(sourceKind v) {
     case sourceKind::dt:                  return "dt";
     case sourceKind::ion_current:         return "ion_current";
     case sourceKind::ion_current_density: return "ion_current_density";
+    case sourceKind::ion_diffusive:       return "ion_diffusive";
     case sourceKind::ion_revpot:          return "ion_revpot";
     case sourceKind::ion_iconc:           return "ion_iconc";
     case sourceKind::ion_econc:           return "ion_econc";
@@ -122,6 +125,7 @@ inline sourceKind ion_source(const std::string& ion, const std::string& var, mod
     else if (var=="i"+ion) return mkind==moduleKind::density? sourceKind::ion_current_density: sourceKind::ion_current;
     else if (var=="e"+ion) return sourceKind::ion_revpot;
     else if (var==ion+"i") return sourceKind::ion_iconc;
+    else if (var==ion+"d") return sourceKind::ion_diffusive;
     else if (var==ion+"o") return sourceKind::ion_econc;
     else return sourceKind::no_source;
 }
diff --git a/modcc/module.cpp b/modcc/module.cpp
index 56d22f03..c167b053 100644
--- a/modcc/module.cpp
+++ b/modcc/module.cpp
@@ -36,6 +36,7 @@ class NrnCurrentRewriter: public BlockRewriterBase {
                         if (src==sourceKind::current_density ||
                             src==sourceKind::current ||
                             src==sourceKind::ion_current_density ||
+                            src==sourceKind::ion_conductivity ||
                             src==sourceKind::ion_current)
                         {
                             return src;
@@ -49,12 +50,16 @@ class NrnCurrentRewriter: public BlockRewriterBase {
 
     bool has_current_update_ = false;
     std::set<std::string> current_vars_;
-    std::set<expression_ptr> conductivity_exps_;
+    std::map<std::string, expression_ptr> conductivity_exps_;
 
 public:
+
+    std::string non_specific_current = "";
     using BlockRewriterBase::visit;
 
     virtual void finalize() override {
+        // Take current name 'iX' and strip off leading 'i' to get ion name.
+        auto i2ion = [this](const auto& name) { return id("conductivity_" + name.substr(1) + "_"); };
         if (has_current_update_) {
             expression_ptr current_sum, conductivity_sum;
             for (auto& curr: current_vars_) {
@@ -66,13 +71,18 @@ public:
                             Location{}, std::move(current_sum), std::move(curr_id));
                 }
             }
-            for (auto& cond: conductivity_exps_) {
+            for (auto& [name, cond]: conductivity_exps_) {
                 if (!conductivity_sum) {
                     conductivity_sum = cond->clone();
                 } else {
                     conductivity_sum = make_expression<AddBinaryExpression>(
                             Location{}, std::move(conductivity_sum), cond->clone());
                 }
+                if (name != non_specific_current) {
+                    statements_.push_back(make_expression<AssignmentExpression>(loc_,
+                                                                                i2ion(name),
+                                                                                cond->clone()));
+                }
             }
             if (current_sum) {
                 statements_.push_back(make_expression<AssignmentExpression>(loc_,
@@ -93,13 +103,13 @@ public:
         sourceKind current_source = current_update(e);
         if (current_source != sourceKind::no_source) {
             has_current_update_ = true;
-
-            auto visited_current = current_vars_.count(e->lhs()->is_identifier()->name());
-            current_vars_.insert(e->lhs()->is_identifier()->name());
+            auto name = e->lhs()->is_identifier()->name();
+            auto visited_current = current_vars_.count(name);
+            current_vars_.insert(name);
 
             linear_test_result L = linear_test(e->rhs(), {"v"});
             if (L.coef.count("v") && !visited_current) {
-                conductivity_exps_.insert(L.coef.at("v")->clone());
+                conductivity_exps_[name] = L.coef.at("v")->clone();
             }
         }
     }
@@ -480,6 +490,9 @@ bool Module::semantic() {
     // compute_currents : update contributions to currents
     //..........................................................
     NrnCurrentRewriter compute_currents_rewriter;
+    // Register non-specific current name
+    if (neuron_block_.has_nonspecific_current()) compute_currents_rewriter.non_specific_current = neuron_block_.nonspecific_current.spelling;
+
     breakpoint->accept(&compute_currents_rewriter);
 
     for (auto& s: breakpoint->body()->statements()) {
@@ -677,7 +690,6 @@ void Module::add_variables_to_symbols() {
         // Otherwise create an indexed variable and associate it
         // with the state variable if present (via a different name)
         // for ion state updates.
-
         VariableExpression* state = nullptr;
         if (has_symbol(name)) {
             state = symbols_[name].get()->is_variable();
@@ -710,12 +722,18 @@ void Module::add_variables_to_symbols() {
         create_indexed_variable(i.spelling, current_kind, accessKind::noaccess, "", i.location);
     }
 
+    std::set<std::string> cond;
     for(auto const& ion : neuron_block_.ions) {
         for(auto const& var : ion.read) {
             update_ion_symbols(var, accessKind::read, ion.name);
         }
         for(auto const& var : ion.write) {
             update_ion_symbols(var, accessKind::write, ion.name);
+            auto name = "conductivity_" + ion.name + "_";
+            if (cond.find(name) == cond.end()) {
+                create_indexed_variable(name, sourceKind::ion_conductivity, accessKind::write, ion.name, var.location);
+                cond.insert(name);
+            }
         }
 
         if(ion.uses_valence()) {
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index 2758e43c..967ad1c6 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -52,21 +52,23 @@ struct index_prop {
 void emit_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
 void emit_simd_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
 void emit_masked_simd_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
-void emit_api_body(std::ostream&, APIMethod*, bool cv_loop = true, bool ppack_iface=true);
-void emit_simd_api_body(std::ostream&, APIMethod*, const std::vector<VariableExpression*>& scalars);
+void emit_api_body(std::ostream&, APIMethod*, bool cv_loop = true, bool ppack_iface=true, bool use_additive=false);
+void emit_simd_api_body(std::ostream&, APIMethod*, const std::vector<VariableExpression*>& scalars, bool use_additive);
 void emit_simd_index_initialize(std::ostream& out, const std::list<index_prop>& indices, simd_expr_constraint constraint);
 
 void emit_simd_body_for_loop(std::ostream& out,
                              BlockExpression* body,
                              const std::vector<LocalVariable*>& indexed_vars,
                              const std::list<index_prop>& indices,
-                             const simd_expr_constraint& constraint);
+                             const simd_expr_constraint& constraint,
+                             bool use_additive);
 
 void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
                                        const std::vector<LocalVariable*>& indexed_vars,
                                        const std::list<index_prop>& indices,
                                        const simd_expr_constraint& constraint,
-                                       std::string constraint_name);
+                                       std::string constraint_name,
+                                       bool use_additive);
 
 struct cprint {
     Expression* expr_;
@@ -230,11 +232,11 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
     }
 
     // Make implementations
-    auto emit_body = [&](APIMethod *p) {
+    auto emit_body = [&](APIMethod *p, bool add=false) {
         if (with_simd) {
-            emit_simd_api_body(out, p, vars.scalars);
+            emit_simd_api_body(out, p, vars.scalars, add);
         } else {
-            emit_api_body(out, p);
+            emit_api_body(out, p, true, true, add);
         }
     };
 
@@ -322,7 +324,7 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
     out << popindent << "}\n\n";
 
     out << "static void compute_currents(arb_mechanism_ppack* pp) {\n" << indent;
-    emit_body(current_api);
+    emit_body(current_api, true);
     out << popindent << "}\n\n";
 
     out << "static void write_ions(arb_mechanism_ppack* pp) {\n" << indent;
@@ -343,7 +345,7 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
                            pp_var_pfx,
                            net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name());
         out << indent << indent << indent << indent;
-        emit_api_body(out, net_receive_api, false, false);
+        emit_api_body(out, net_receive_api, false, false, true);
         out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n\n";
     } else {
         out << "static void apply_events(arb_mechanism_ppack*, arb_deliverable_event_stream*) {}\n\n";
@@ -563,7 +565,7 @@ void emit_state_read(std::ostream& out, LocalVariable* local) {
     ENTER(out);
     out << "arb_value_type " << cprint(local) << " = ";
 
-    if (local->is_read()) {
+    if (local->is_read() || (local->is_write() && decode_indexed_variable(local->external_variable()).additive)) {
         auto d = decode_indexed_variable(local->external_variable());
         out << scaled(d.scale) << deref(d) << ";\n";
     }
@@ -573,17 +575,23 @@ void emit_state_read(std::ostream& out, LocalVariable* local) {
     EXIT(out);
 }
 
-void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* external) {
+void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* external, bool use_additive) {
     if (!external->is_write()) return;
     ENTER(out);
     auto d = decode_indexed_variable(external);
-    double coeff = 1./d.scale;
-
-    if (d.readonly) {
-        throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
+    if (d.readonly) throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
+    std::string var, weight = pp_var_pfx + "weight[i_]", scale = scaled(1.0/d.scale), name = from->name();
+    double coeff = 1.0/d.scale;
+    {
+        std::stringstream v, s, w;
+        v << deref(d); var = v.str();
     }
-
-    if (d.accumulate) {
+    if (d.additive && use_additive) {
+            out << fmt::format("{3} -= {0};\n"
+                               "{0} = fma({1}{2}, {3}, {0});\n",
+                               var, scale, weight, name);
+    }
+    else if (d.accumulate) {
         out << deref(d) << " = fma("
             << scaled(coeff) << pp_var_pfx << "weight[i_], "
             << from->name() << ", " << deref(d) << ");\n";
@@ -594,7 +602,7 @@ void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* externa
     EXIT(out);
 }
 
-void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppack_iface) {
+void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppack_iface, bool use_additive) {
     ENTER(out);
     auto body = method->body();
     auto indexed_vars = indexed_locals(method->scope());
@@ -614,7 +622,7 @@ void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppac
         out << cprint(body);
 
         for (auto& sym: indexed_vars) {
-            emit_state_update(out, sym, sym->external_variable());
+            emit_state_update(out, sym, sym->external_variable(), use_additive);
         }
         cv_loop && out << popindent << "}\n";
     }
@@ -768,7 +776,7 @@ void emit_simd_state_read(std::ostream& out, LocalVariable* local, simd_expr_con
     ENTER(out);
     out << "simd_value " << local->name();
 
-    if (local->is_read()) {
+    if (local->is_read() || (local->is_write() && decode_indexed_variable(local->external_variable()).additive)) {
         auto d = decode_indexed_variable(local->external_variable());
         if (d.scalar()) {
             out << " = simd_cast<simd_value>(" << pp_var_pfx << d.data_var
@@ -813,97 +821,88 @@ void emit_simd_state_read(std::ostream& out, LocalVariable* local, simd_expr_con
     EXIT(out);
 }
 
-void emit_simd_state_update(std::ostream& out, Symbol* from, IndexedVariable* external, simd_expr_constraint constraint) {
+void emit_simd_state_update(std::ostream& out, Symbol* from, IndexedVariable* external, simd_expr_constraint constraint, bool use_additive) {
     if (!external->is_write()) return;
+    ENTER(out);
 
     auto d = decode_indexed_variable(external);
-    double coeff = 1./d.scale;
-
     if (d.readonly) {
         throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
     }
 
-    ENTER(out);
+    auto ext = external->name();
+    auto name = from->name();
+    auto data = data_via_ppack(d);
+    auto node = node_index_i_name(d);
+    auto index = index_i_name(d.outer_index_var());
+
+    std::string scaled = name;
+    if (d.scale != 1.0) {
+        std::stringstream ss;
+        ss << "S::mul(" << name << ", simd_cast<simd_value>(" << as_c_double(1/d.scale) << "))";
+        scaled = ss.str();
+    }
 
-    if (d.accumulate) {
-        switch (d.index_var_kind) {
-            case index_kind::node: {
-                switch (constraint) {
-                case simd_expr_constraint::contiguous:
-                {
-                    std::string tempvar = "t_" + external->name();
-                    out << "simd_value " << tempvar << ";\n"
-                        << "assign(" << tempvar << ", indirect(" << data_via_ppack(d) << " + " << node_index_i_name(d) << ", simd_width_));\n";
-                    if (coeff != 1) {
-                        out << tempvar << " = S::fma(S::mul(w_, simd_cast<simd_value>(" << as_c_double(coeff) << "))," << from->name() << ", " << tempvar << ");\n";
-                    } else {
-                        out << tempvar << " = S::fma(w_, " << from->name() << ", " << tempvar << ");\n";
-                    }
-                    out << "indirect(" << data_via_ppack(d) << " + " << node_index_i_name(d) << ", simd_width_) = " << tempvar << ";\n";
-                    break;
-                }
-                case simd_expr_constraint::constant:
-                {
-                    out << "indirect(" << data_via_ppack(d) << ", simd_cast<simd_index>(" << node_index_i_name(d) << "), simd_width_, constraint_category_)";
-                    if (coeff != 1) {
-                        out << " += S::mul(w_, S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << "), " << from->name() << "));\n";
-                    } else {
-                        out << " += S::mul(w_, " << from->name() << ");\n";
-                    }
-                    break;
-                }
-                default :
-                {
-                    out << "indirect(" << data_via_ppack(d) << ", " << node_index_i_name(d) << ", simd_width_, constraint_category_)";
-                    if (coeff != 1) {
-                        out << " += S::mul(w_, S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << "), " << from->name() << "));\n";
-                    } else {
-                        out << " += S::mul(w_, " << from->name() << ");\n";
-                    }
-                }
-                }
-                break;
+    if (d.additive && use_additive) {
+        if (d.index_var_kind == index_kind::node) {
+            if (constraint == simd_expr_constraint::contiguous) {
+                    out << fmt::format("indirect({} + {}, simd_width_) = S::mul(w_, {});\n",
+                                       data, node, scaled);
             }
-            default: {
-                out << "indirect(" << data_via_ppack(d) << ", " << index_i_name(d.outer_index_var()) << ", simd_width_, index_constraint::none)";
-                if (coeff != 1) {
-                    out << " += S::mul(w_, S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << "), " << from->name() << "));\n";
-                } else {
-                    out << " += S::mul(w_, " << from->name() << ");\n";
-                }
-                break;
+            else {
+                    // We need this instead of simple assignment!
+                    out << fmt::format("{{\n"
+                                       "  simd_value t_{}0_ = simd_cast<simd_value>(0.0);\n"
+                                       "  assign(t_{}0_, indirect({}, simd_cast<simd_index>({}), simd_width_, constraint_category_));\n"
+                                       "  {} -= t_{}0_;\n"
+                                       "  indirect({}, simd_cast<simd_index>({}), simd_width_, constraint_category_) += S::mul(w_, {});\n"
+                                       "}}\n",
+                                       name,
+                                       name, data, node,
+                                       scaled, name,
+                                       data, node, scaled);
             }
         }
+        else {
+            out << fmt::format("indirect({}, {}, simd_width_, index_constraint::none) = S::mul(w_, {});\n",
+                               data, index, scaled);
+        }
     }
-    else {
-        switch (d.index_var_kind) {
-            case index_kind::node: {
-                switch (constraint) {
+    else if (d.accumulate) {
+        if (d.index_var_kind == index_kind::node) {
+            std::string tempvar = "t_" + external->name();
+            switch (constraint) {
                 case simd_expr_constraint::contiguous:
-                    out << "indirect(" << data_via_ppack(d) << " + " << node_index_i_name(d) << ", simd_width_) = ";
+                    out << "simd_value " << tempvar << ";\n"
+                        << "assign(" << tempvar << ", indirect(" << data << " + " << node << ", simd_width_));\n"
+                        << tempvar << " = S::fma(w_, " << scaled << ", " << tempvar << ");\n"
+                        << "indirect(" << data << " + " << node << ", simd_width_) = " << tempvar << ";\n";
                     break;
                 case simd_expr_constraint::constant:
-                    out << "indirect(" << data_via_ppack(d) << ", simd_cast<simd_index>(" << node_index_i_name(d) << "), simd_width_, constraint_category_) = ";
+                    out << "indirect(" << data << ", simd_cast<simd_index>(" << node << "), simd_width_, constraint_category_) += S::mul(w_, " << scaled << ");\n";
                     break;
                 default:
-                    out << "indirect(" << data_via_ppack(d) << ", " << node_index_i_name(d) << ", simd_width_, constraint_category_) = ";
-                }
-                break;
-            }
-            default: {
-                out << "indirect(" << data_via_ppack(d)
-                    << ", " << index_i_name(d.outer_index_var()) << ", simd_width_, index_constraint::none) = ";
-                break;
+                    out << "indirect(" << data << ", " << node << ", simd_width_, constraint_category_) += S::mul(w_, " << scaled << ");\n";
             }
-        }
-
-        if (coeff != 1) {
-            out << "(S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << ")," << from->name() << "));\n";
         } else {
-            out << from->name() << ";\n";
+            out << "indirect(" << data << ", " << index << ", simd_width_, index_constraint::none) += S::mul(w_, " << scaled << ");\n";
         }
     }
-
+    else if (d.index_var_kind == index_kind::node) {
+        switch (constraint) {
+            case simd_expr_constraint::contiguous:
+                out << "indirect(" << data << " + " << node << ", simd_width_) = " << scaled << ";\n";
+                break;
+            case simd_expr_constraint::constant:
+                out << "indirect(" << data << ", simd_cast<simd_index>(" << node << "), simd_width_, constraint_category_) = " << scaled << ";\n";
+                break;
+            default:
+                out << "indirect(" << data << ", " << node << ", simd_width_, constraint_category_) = " << scaled << ";\n";
+        }
+    }
+    else {
+        out << "indirect(" << data << ", " << index << ", simd_width_, index_constraint::none) = " << scaled << ";\n";
+    }
     EXIT(out);
 }
 
@@ -959,7 +958,8 @@ void emit_simd_body_for_loop(
         const std::vector<LocalVariable*>& indexed_vars,
         const std::vector<VariableExpression*>& scalars,
         const std::list<index_prop>& indices,
-        const simd_expr_constraint& constraint) {
+        const simd_expr_constraint& constraint,
+        bool use_additive) {
     ENTER(out);
     emit_simd_index_initialize(out, indices, constraint);
 
@@ -973,17 +973,18 @@ void emit_simd_body_for_loop(
     out << printer;
 
     for (auto& sym: indexed_vars) {
-        emit_simd_state_update(out, sym, sym->external_variable(), constraint);
+        emit_simd_state_update(out, sym, sym->external_variable(), constraint, use_additive);
     }
     EXIT(out);
 }
 
 void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
-                                  const std::vector<LocalVariable*>& indexed_vars,
-                                  const std::vector<VariableExpression*>& scalars,
-                                  const std::list<index_prop>& indices,
-                                  const simd_expr_constraint& constraint,
-                                  std::string underlying_constraint_name) {
+                                       const std::vector<LocalVariable*>& indexed_vars,
+                                       const std::vector<VariableExpression*>& scalars,
+                                       const std::list<index_prop>& indices,
+                                       const simd_expr_constraint& constraint,
+                                       std::string underlying_constraint_name,
+                                       bool use_additive) {
     ENTER(out);
     out << fmt::format("constraint_category_ = index_constraint::{1};\n"
                        "for (auto i_ = 0ul; i_ < {0}index_constraints.n_{1}; i_++) {{\n"
@@ -995,13 +996,13 @@ void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
                        "assign(w_, indirect(({}weight+index_), simd_width_));\n",
                        pp_var_pfx);
 
-    emit_simd_body_for_loop(out, body, indexed_vars, scalars, indices, constraint);
+    emit_simd_body_for_loop(out, body, indexed_vars, scalars, indices, constraint, use_additive);
 
     out << popindent << "}\n";
     EXIT(out);
 }
 
-void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<VariableExpression*>& scalars) {
+void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<VariableExpression*>& scalars, bool use_additive) {
     auto body = method->body();
     auto indexed_vars = indexed_locals(method->scope());
 
@@ -1023,25 +1024,25 @@ void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<
             simd_expr_constraint constraint = simd_expr_constraint::contiguous;
             std::string underlying_constraint = "contiguous";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
             //Generate for loop for all independent simd_vectors
             constraint = simd_expr_constraint::other;
             underlying_constraint = "independent";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
             //Generate for loop for all simd_vectors that have no optimizing constraints
             constraint = simd_expr_constraint::other;
             underlying_constraint = "none";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
             //Generate for loop for all constant simd_vectors
             constraint = simd_expr_constraint::constant;
             underlying_constraint = "constant";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
         }
         else {
diff --git a/modcc/printer/gpuprinter.cpp b/modcc/printer/gpuprinter.cpp
index 63c04d5c..aa3c76e2 100644
--- a/modcc/printer/gpuprinter.cpp
+++ b/modcc/printer/gpuprinter.cpp
@@ -19,10 +19,19 @@ using io::indent;
 using io::popindent;
 using io::quote;
 
-void emit_api_body_cu(std::ostream& out, APIMethod* method, bool is_point_proc, bool cv_loop = true, bool ppack=true);
+static std::string scaled(double coeff) {
+    std::stringstream ss;
+    if (coeff != 1) {
+        ss << as_c_double(coeff) << '*';
+    }
+    return ss.str();
+}
+
+
+void emit_api_body_cu(std::ostream& out, APIMethod* method, bool is_point_proc, bool cv_loop = true, bool ppack=true, bool additive=false);
 void emit_procedure_body_cu(std::ostream& out, ProcedureExpression* proc);
 void emit_state_read_cu(std::ostream& out, LocalVariable* local);
-void emit_state_update_cu(std::ostream& out, Symbol* from, IndexedVariable* external, bool is_point_proc);
+void emit_state_update_cu(std::ostream& out, Symbol* from, IndexedVariable* external, bool is_point_proc, bool use_additive);
 
 const char* index_id(Symbol *s);
 
@@ -211,14 +220,14 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
     }
 
     // API methods as __global__ kernels.
-    auto emit_api_kernel = [&] (APIMethod* e) {
+    auto emit_api_kernel = [&] (APIMethod* e, bool additive=false) {
         // Only print the kernel if the method is not empty.
         if (!e->body()->statements().empty()) {
             out << "__global__\n"
                 << "void " << e->name() << "(arb_mechanism_ppack params_) {\n" << indent
                 << "int n_ = params_.width;\n"
                 << "int tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n";
-            emit_api_body_cu(out, e, is_point_proc);
+            emit_api_body_cu(out, e, is_point_proc, true, true, additive);
             out << popindent << "}\n\n";
         }
     };
@@ -237,7 +246,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
                            pp_var_pfx);
     }
     emit_api_kernel(state_api);
-    emit_api_kernel(current_api);
+    emit_api_kernel(current_api, true);
     emit_api_kernel(write_ions_api);
 
     // event delivery
@@ -256,7 +265,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
                            net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name(),
                            pp_var_pfx);
         out << indent << indent << indent << indent;
-        emit_api_body_cu(out, net_receive_api, is_point_proc, false, false);
+        emit_api_body_cu(out, net_receive_api, is_point_proc, false, false, false);
         out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n";
     }
 
@@ -357,7 +366,7 @@ static std::string index_i_name(const std::string& index_var) {
     return index_var+"i_";
 }
 
-void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool cv_loop, bool ppack) {
+void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool cv_loop, bool ppack, bool additive) {
     auto body = e->body();
     auto indexed_vars = indexed_locals(e->scope());
 
@@ -435,7 +444,7 @@ void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool
         out << cuprint(body);
 
         for (auto& sym: indexed_vars) {
-            emit_state_update_cu(out, sym, sym->external_variable(), is_point_proc);
+            emit_state_update_cu(out, sym, sym->external_variable(), is_point_proc, additive);
         }
         cv_loop && out << popindent << "}\n";
     }
@@ -463,7 +472,7 @@ namespace {
 void emit_state_read_cu(std::ostream& out, LocalVariable* local) {
     out << "arb_value_type " << cuprint(local) << " = ";
 
-    if (local->is_read()) {
+    if (local->is_read() || (local->is_write() && decode_indexed_variable(local->external_variable()).additive)) {
         auto d = decode_indexed_variable(local->external_variable());
         if (d.scale != 1) {
             out << as_c_double(d.scale) << "*";
@@ -477,36 +486,39 @@ void emit_state_read_cu(std::ostream& out, LocalVariable* local) {
 
 
 void emit_state_update_cu(std::ostream& out, Symbol* from,
-                          IndexedVariable* external, bool is_point_proc) {
+                          IndexedVariable* external, bool is_point_proc, bool use_additive) {
     if (!external->is_write()) return;
-
     auto d = decode_indexed_variable(external);
-    double coeff = 1./d.scale;
-
     if (d.readonly) {
         throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
     }
 
-    if (is_point_proc && d.accumulate) {
-        out << "::arb::gpu::reduce_by_key(";
-        if (coeff != 1) out << as_c_double(coeff) << '*';
-
-        out << pp_var_pfx << "weight[tid_]*" << from->name() << ',';
+    auto name   = from->name();
+    auto scale  = scaled(1.0/d.scale);
+    auto data   = pp_var_pfx + d.data_var;
+    auto index  = index_i_name(d.outer_index_var());
+    auto var    = deref(d);
+    auto weight = scale + pp_var_pfx + "weight[tid_]";
 
-        auto index_var = d.outer_index_var();
-        out << pp_var_pfx << d.data_var << ", " << index_i_name(index_var) << ", lane_mask_);\n";
+    if (d.additive && use_additive) {
+        out << name << " -= " << var << ";\n";
+        if (is_point_proc) {
+            out << fmt::format("::arb::gpu::reduce_by_key({}*{}, {}, {}, lane_mask_);\n", weight, name, data, index);
+        }
+        else {
+            out << var << " = fma(" << weight << ", " << name << ", " << var << ");\n";
+        }
     }
     else if (d.accumulate) {
-        out << deref(d) << " = fma(";
-        if (coeff != 1) out << as_c_double(coeff) << '*';
-
-        out << pp_var_pfx << "weight[tid_], " << from->name() << ", " << deref(d) << ");\n";
+        if (is_point_proc) {
+            out << "::arb::gpu::reduce_by_key(" << weight << "*" << name << ',' << data << ", " << index << ", lane_mask_);\n";
+        }
+        else {
+            out << var << " = fma(" << weight << ", " << name << ", " << var << ");\n";
+        }
     }
     else {
-        out << deref(d) << " = ";
-        if (coeff != 1) out << as_c_double(coeff) << '*';
-
-        out << from->name() << ";\n";
+        out << var << " = " << scale << name << ";\n";
     }
 }
 
diff --git a/modcc/printer/infoprinter.cpp b/modcc/printer/infoprinter.cpp
index c796b513..c4211001 100644
--- a/modcc/printer/infoprinter.cpp
+++ b/modcc/printer/infoprinter.cpp
@@ -52,9 +52,10 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op
                 id.unit_string(), val, lo, hi);
     };
     auto fmt_ion = [&](const auto& ion) {
-        return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {} }}"),
+        return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {}, {} }}"),
            ion.name,
            ion.writes_concentration_int(), ion.writes_concentration_ext(),
+           ion.uses_concentration_diff(),
            ion.writes_rev_potential(), ion.uses_rev_potential(),
            ion.uses_valence(), ion.verifies_valence(), ion.expected_valence);
     };
diff --git a/modcc/printer/printerutil.cpp b/modcc/printer/printerutil.cpp
index 72f208e9..64914bac 100644
--- a/modcc/printer/printerutil.cpp
+++ b/modcc/printer/printerutil.cpp
@@ -141,6 +141,7 @@ ARB_LIBMODCC_API indexed_variable_info decode_indexed_variable(IndexedVariable*
     v.index_var_kind = index_kind::node;
     v.scale = 1;
     v.accumulate = true;
+    v.additive = false;
     v.readonly = true;
 
     std::string ion_pfx;
@@ -196,6 +197,11 @@ ARB_LIBMODCC_API indexed_variable_info decode_indexed_variable(IndexedVariable*
         v.scale = 0.1;
         v.readonly = false;
         break;
+    case sourceKind::ion_conductivity:
+        v.data_var = ion_pfx+".conductivity";
+        v.scale = 0.1;
+        v.readonly = false;
+        break;
     case sourceKind::ion_current:
         // unit scale; sourceKind for point processes updating an ionic current variable.
         v.data_var = ion_pfx+".current_density";
@@ -210,6 +216,12 @@ ARB_LIBMODCC_API indexed_variable_info decode_indexed_variable(IndexedVariable*
         v.data_var = ion_pfx+".internal_concentration";
         v.readonly = false;
         break;
+    case sourceKind::ion_diffusive:
+        v.data_var = ion_pfx+".diffusive_concentration";
+        v.readonly = false;
+        v.accumulate = false;
+        v.additive = true;
+        break;
     case sourceKind::ion_econc:
         v.data_var = ion_pfx+".external_concentration";
         v.readonly = false;
diff --git a/modcc/printer/printerutil.hpp b/modcc/printer/printerutil.hpp
index 608a231a..bf3e406c 100644
--- a/modcc/printer/printerutil.hpp
+++ b/modcc/printer/printerutil.hpp
@@ -149,6 +149,7 @@ struct ARB_LIBMODCC_API indexed_variable_info {
 
     bool accumulate = true; // true => add with weight_ factor on assignment
     bool readonly = false;  // true => can never be assigned to by a mechanism
+    bool additive = false;  // only additive contributions allowed?
 
     // Scale is the conversion factor from the data variable
     // to the NMODL value.
diff --git a/python/cable_probes.cpp b/python/cable_probes.cpp
index 4ba583e3..a309578a 100644
--- a/python/cable_probes.cpp
+++ b/python/cable_probes.cpp
@@ -195,6 +195,14 @@ arb::probe_info cable_probe_ion_int_concentration_cell(const char* ion) {
     return arb::cable_probe_ion_int_concentration_cell{ion};
 }
 
+arb::probe_info cable_probe_ion_diff_concentration(const char* where, const char* ion) {
+    return arb::cable_probe_ion_diff_concentration{arborio::parse_locset_expression(where).unwrap(), ion};
+}
+
+arb::probe_info cable_probe_ion_diff_concentration_cell(const char* ion) {
+    return arb::cable_probe_ion_diff_concentration_cell{ion};
+}
+
 arb::probe_info cable_probe_ion_ext_concentration(const char* where, const char* ion) {
     return arb::cable_probe_ion_ext_concentration{arborio::parse_locset_expression(where).unwrap(), ion};
 }
@@ -284,6 +292,14 @@ void register_cable_probes(pybind11::module& m, pyarb_global_ptr global_ptr) {
         "Probe specification for cable cell internal ionic concentration for each cable in each CV.",
         "ion"_a);
 
+    m.def("cable_probe_ion_diff_concentration", &cable_probe_ion_diff_concentration,
+        "Probe specification for cable cell diffusive ionic concentration at points in a location set.",
+        "where"_a, "ion"_a);
+
+    m.def("cable_probe_ion_diff_concentration_cell", &cable_probe_ion_diff_concentration_cell,
+        "Probe specification for cable cell diffusive ionic concentration for each cable in each CV.",
+        "ion"_a);
+
     m.def("cable_probe_ion_ext_concentration", &cable_probe_ion_ext_concentration,
         "Probe specification for cable cell external ionic concentration at points in a location set.",
         "where"_a, "ion"_a);
diff --git a/python/cells.cpp b/python/cells.cpp
index f103764a..bd4f1ba9 100644
--- a/python/cells.cpp
+++ b/python/cells.cpp
@@ -549,7 +549,7 @@ void register_cells(pybind11::module& m) {
             [](arb::cable_cell_global_properties& props, const char* ion,
                optional<double> valence, optional<double> int_con,
                optional<double> ext_con, optional<double> rev_pot,
-               pybind11::object method)
+               pybind11::object method, optional<double> diff)
             {
                 if (!props.ion_species.count(ion) && !valence) {
                     throw std::runtime_error(util::pprintf("New ion species: '{}', missing valence", ion));
@@ -560,6 +560,7 @@ void register_cells(pybind11::module& m) {
                 if (int_con) data.init_int_concentration  = *int_con;
                 if (ext_con) data.init_ext_concentration  = *ext_con;
                 if (rev_pot) data.init_reversal_potential = *rev_pot;
+                if (diff)    data.diffusivity             = *diff;
 
                 if (auto m = maybe_method(method)) {
                     props.default_parameters.reversal_potential_method[ion] = *m;
@@ -571,6 +572,7 @@ void register_cells(pybind11::module& m) {
             pybind11::arg_v("ext_con", pybind11::none(), "initial external concentration [mM]."),
             pybind11::arg_v("rev_pot", pybind11::none(), "reversal potential [mV]."),
             pybind11::arg_v("method",  pybind11::none(), "method for calculating reversal potential."),
+            pybind11::arg_v("diff",    pybind11::none(), "diffusivity [m^2/s]."),
             "Set the global default properties of ion species named 'ion'.\n"
             "There are 3 ion species predefined in arbor: 'ca', 'na' and 'k'.\n"
             "If 'ion' in not one of these ions it will be added to the list, making it\n"
@@ -620,11 +622,13 @@ void register_cells(pybind11::module& m) {
         .def("set_ion",
             [](arb::decor& d, const char* ion,
                optional<double> int_con, optional<double> ext_con,
-               optional<double> rev_pot, pybind11::object method)
+               optional<double> rev_pot, pybind11::object method,
+               optional<double> diff)
             {
                 if (int_con) d.set_default(arb::init_int_concentration{ion, *int_con});
                 if (ext_con) d.set_default(arb::init_ext_concentration{ion, *ext_con});
                 if (rev_pot) d.set_default(arb::init_reversal_potential{ion, *rev_pot});
+                if (diff)    d.set_default(arb::ion_diffusivity{ion, *diff});
                 if (auto m = maybe_method(method)) {
                     d.set_default(arb::ion_reversal_potential_method{ion, *m});
                 }
@@ -634,6 +638,7 @@ void register_cells(pybind11::module& m) {
             pybind11::arg_v("ext_con", pybind11::none(), "initial external concentration [mM]."),
             pybind11::arg_v("rev_pot", pybind11::none(), "reversal potential [mV]."),
             pybind11::arg_v("method",  pybind11::none(), "method for calculating reversal potential."),
+            pybind11::arg_v("diff",    pybind11::none(), "diffusivity [m^2/s]."),
             "Set the properties of ion species named 'ion' that will be applied\n"
             "by default everywhere on the cell. Species concentrations and reversal\n"
             "potential can be overridden on specific regions using the paint interface, \n"
@@ -668,16 +673,19 @@ void register_cells(pybind11::module& m) {
         // Paint ion species initial conditions on a region.
         .def("paint",
             [](arb::decor& dec, const char* region, const char* name,
-               optional<double> int_con, optional<double> ext_con, optional<double> rev_pot) {
+               optional<double> int_con, optional<double> ext_con,
+               optional<double> rev_pot, optional<double> diff) {
                 auto r = arborio::parse_region_expression(region).unwrap();
                 if (int_con) dec.paint(r, arb::init_int_concentration{name, *int_con});
                 if (ext_con) dec.paint(r, arb::init_ext_concentration{name, *ext_con});
                 if (rev_pot) dec.paint(r, arb::init_reversal_potential{name, *rev_pot});
+                if (diff)    dec.paint(r, arb::ion_diffusivity{name, *diff});
             },
             "region"_a, pybind11::kw_only(), "ion_name"_a,
             pybind11::arg_v("int_con", pybind11::none(), "Initial internal concentration [mM]"),
             pybind11::arg_v("ext_con", pybind11::none(), "Initial external concentration [mM]"),
             pybind11::arg_v("rev_pot", pybind11::none(), "Initial reversal potential [mV]"),
+            pybind11::arg_v("diff",    pybind11::none(), "Diffusivity [m^2/s]"),
             "Set ion species properties conditions on a region.")
         // Place synapses
         .def("place",
diff --git a/python/example/diffusion.py b/python/example/diffusion.py
new file mode 100644
index 00000000..6d9f06bd
--- /dev/null
+++ b/python/example/diffusion.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+import arbor as A
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+
+class recipe(A.recipe):
+    def __init__(self, cell, probes):
+        A.recipe.__init__(self)
+        self.the_cell = cell
+        self.the_probes = probes
+        self.the_props = A.neuron_cable_properties()
+        self.the_props.catalogue = A.default_catalogue()
+
+    def num_cells(self):
+        return 1
+
+    def cell_kind(self, gid):
+        return A.cell_kind.cable
+
+    def cell_description(self, gid):
+        return self.the_cell
+
+    def probes(self, gid):
+        return self.the_probes
+
+    def global_properties(self, kind):
+        return self.the_props
+
+
+tree = A.segment_tree()
+s = tree.append(A.mnpos, A.mpoint(-3, 0, 0, 3), A.mpoint(3, 0, 0, 3), tag=1)
+_ = tree.append(s, A.mpoint(3, 0, 0, 1), A.mpoint(33, 0, 0, 1), tag=3)
+
+dec = A.decor()
+dec.set_property(Vm=-40)
+# dec.paint('(tag 1)', A.density('hh'))
+dec.discretization(A.cv_policy("(max-extent 5)"))
+
+# Set up ion diffusion
+dec.set_ion("na", int_con=1.0, ext_con=140, rev_pot=50, diff=0.005)
+dec.paint("(tag 1)", ion_name="na", int_con=100.0, diff=0.01)
+
+prb = [
+    A.cable_probe_ion_diff_concentration_cell("na"),
+]
+cel = A.cable_cell(tree, A.label_dict(), dec)
+rec = recipe(cel, prb)
+ctx = A.context()
+dom = A.partition_load_balance(rec, ctx)
+sim = A.simulation(rec, dom, ctx)
+hdl = (sim.sample((0, 0), A.regular_schedule(0.1)),)
+
+sim.run(tfinal=0.5)
+
+sns.set_theme()
+fg, ax = plt.subplots()
+for h in hdl:
+    for d, m in sim.samples(h):
+        xs = d[:, 0]
+        for lbl, ix in zip(m, range(1, d.shape[1])):
+            ys = d[:, ix]
+            print(lbl, ys.min(), ys.max())
+            ax.plot(xs, ys, label=lbl)
+ax.legend()
+fg.savefig("results.pdf")
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 98b5eeed..2fe2ff90 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -112,6 +112,7 @@ set(unit_sources
     test_forest.cpp
     test_fvm_layout.cpp
     test_fvm_lowered.cpp
+    test_diffusion.cpp
     test_index.cpp
     test_kinetic_linear.cpp
     test_lexcmp.cpp
diff --git a/test/unit/test_diffusion.cpp b/test/unit/test_diffusion.cpp
new file mode 100644
index 00000000..37e29222
--- /dev/null
+++ b/test/unit/test_diffusion.cpp
@@ -0,0 +1,288 @@
+#include <cmath>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "../gtest.h"
+
+#include <arborio/label_parse.hpp>
+
+#include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/load_balance.hpp>
+#include <arbor/math.hpp>
+#include <arbor/cable_cell.hpp>
+#include <arbor/recipe.hpp>
+#include <arbor/sampling.hpp>
+#include <arbor/simulation.hpp>
+#include <arbor/schedule.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/util/any_ptr.hpp>
+
+#include <arborenv/default_env.hpp>
+
+using namespace std::string_literals;
+using namespace arborio::literals;
+
+using namespace arb;
+
+constexpr double epsilon  = 1e-6;
+#ifdef ARB_GPU_ENABLED
+constexpr int    with_gpu = 0;
+#else
+constexpr int    with_gpu = -1;
+#endif
+
+struct linear: public recipe {
+    linear(double x, double d, double c): extent{x}, diameter{d}, cv_length{c} {
+        gprop.default_parameters = arb::neuron_parameter_defaults;
+        gprop.default_parameters.discretization = arb::cv_policy_max_extent{cv_length};
+        // Stick morphology
+        // -----x-----
+        segment_tree tree;
+        auto p = mnpos;
+        p = tree.append(p, { -extent, 0, 0, diameter}, {extent, 0, 0, diameter}, 1);
+        morph = {tree};
+    }
+
+    arb::cell_size_type num_cells()                             const override { return 1; }
+    arb::cell_kind get_cell_kind(arb::cell_gid_type)            const override { return arb::cell_kind::cable; }
+    std::any get_global_properties(arb::cell_kind)              const override { return gprop; }
+    std::vector<arb::probe_info> get_probes(arb::cell_gid_type) const override { return {arb::cable_probe_ion_diff_concentration_cell{"na"}}; }
+    util::unique_any get_cell_description(arb::cell_gid_type)   const override { return arb::cable_cell(morph, {}, decor); }
+
+    std::vector<arb::event_generator> event_generators(arb::cell_gid_type gid) const override {
+        std::vector<arb::event_generator> result;
+        for (const auto& [t, w]: inject_at) result.push_back(arb::explicit_generator({{{"Zap"}, t, w}}));
+        return result;
+    }
+
+    arb::cable_cell_global_properties gprop;
+    double extent = 1.0,
+           diameter = 1.0,
+           cv_length = 1.0;
+    std::vector<std::tuple<double, float>> inject_at;
+    morphology morph;
+    arb::decor decor;
+
+    linear& add_decay()  { decor.paint("(all)"_reg, arb::density("decay/x=na")); return *this; }
+    linear& add_inject() { decor.place("(location 0 0.5)"_ls, arb::synapse("inject/x=na", {{"alpha", 200.0*cv_length}}), "Zap"); return *this; }
+    linear& add_event(double t, float w) { inject_at.push_back({t, w}); return *this; }
+    linear& set_diffusivity(double d, std::optional<region> rg = {}) {
+        if (rg) decor.paint(*rg, ion_diffusivity{"na", d});
+        else    decor.set_default(ion_diffusivity{"na", d});
+        return *this;
+    }
+    linear& set_concentration(double d, std::optional<region> rg = {}) {
+        if (rg) decor.paint(*rg, init_int_concentration{"na", d});
+        else    decor.set_default(init_int_concentration{"na", d});
+        return *this;
+    }
+};
+
+using result_t = std::vector<std::tuple<double, double, double>>;
+
+testing::AssertionResult all_near(const result_t& a, const result_t& b, double eps) {
+    if (a.size() != b.size()) return testing::AssertionFailure() << "sequences differ in length";
+    std::stringstream res;
+    for (size_t ix = 0; ix < a.size(); ++ix) {
+        const auto&[ax, ay, az] = a[ix];
+        const auto&[bx, by, bz] = b[ix];
+        if (fabs(ax - bx) > eps) res << " X elements " << ax << " and " << bx << " differ at index " << ix << ".";
+        if (fabs(ay - by) > eps) res << " Y elements " << ay << " and " << by << " differ at index " << ix << ".";
+        if (fabs(az - bz) > eps) res << " Z elements " << az << " and " << bz << " differ at index " << ix << ".";
+    }
+    std::string str = res.str();
+    if (str.empty()) return testing::AssertionSuccess();
+    else             return testing::AssertionFailure() << str;
+}
+
+testing::AssertionResult run(const linear& rec, const result_t exp) {
+    result_t sample_values;
+    auto sampler = [&sample_values](arb::probe_metadata pm, std::size_t n, const arb::sample_record* samples) {
+        sample_values.clear();
+        auto ptr = arb::util::any_cast<const arb::mcable_list*>(pm.meta);
+        ASSERT_NE(ptr, nullptr);
+        auto n_cable = ptr->size();
+        for (std::size_t i = 0; i<n; ++i) {
+            const auto& [val, _ig] = *arb::util::any_cast<const arb::cable_sample_range*>(samples[i].data);
+            for (unsigned j = 0; j<n_cable; ++j) {
+                arb::mcable loc = (*ptr)[j];
+                sample_values.push_back({samples[i].time, loc.prox_pos, val[j]});
+            }
+        }
+    };
+    auto ctx = make_context({arbenv::default_concurrency(), with_gpu});
+    auto sim = simulation{rec, partition_load_balance(rec, ctx), ctx};
+    sim.add_sampler(arb::all_probes, arb::regular_schedule(0.1), sampler);
+    sim.run(0.11, 0.01);
+    return all_near(sample_values, exp, epsilon);
+}
+
+TEST(diffusion, errors) {
+    {
+        // Cannot R/W Xd w/o setting diffusivity
+        auto rec = linear{30, 3, 1}.add_decay();
+        ASSERT_THROW(run(rec, {}), illegal_diffusive_mechanism);
+    }
+    {
+        // Cannot R/W Xd w/o setting diffusivity
+        auto rec = linear{30, 3, 1}.add_inject();
+        ASSERT_THROW(run(rec, {}), illegal_diffusive_mechanism);
+    }
+    {
+        // No negative diffusivity
+        auto rec = linear{30, 3, 1}.set_diffusivity(-42.0, "(all)"_reg);
+        ASSERT_THROW(run(rec, {}), cable_cell_error);
+    }
+    {
+        // No negative diffusivity
+        auto rec = linear{30, 3, 1}.set_diffusivity(-42.0);
+        ASSERT_THROW(run(rec, {}), cable_cell_error);
+    }
+}
+
+TEST(diffusion, by_initial_concentration) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .set_concentration(0.1, "(cable 0 0.5 0.6)"_reg);
+    result_t exp = {{0.000000, 0.000000, 0.000000},
+                    {0.000000, 0.100000, 0.000000},
+                    {0.000000, 0.200000, 0.000000},
+                    {0.000000, 0.300000, 0.000000},
+                    {0.000000, 0.400000, 0.000000},
+                    {0.000000, 0.500000, 0.100000},
+                    {0.000000, 0.600000, 0.000000},
+                    {0.000000, 0.700000, 0.000000},
+                    {0.000000, 0.800000, 0.000000},
+                    {0.000000, 0.900000, 0.000000},
+                    {0.100000, 0.000000, 0.000000},
+                    {0.100000, 0.100000, 0.000000},
+                    {0.100000, 0.200000, 0.000000},
+                    {0.100000, 0.300000, 0.000023},
+                    {0.100000, 0.400000, 0.001991},
+                    {0.100000, 0.500000, 0.095973},
+                    {0.100000, 0.600000, 0.001991},
+                    {0.100000, 0.700000, 0.000023},
+                    {0.100000, 0.800000, 0.000000},
+                    {0.100000, 0.900000, 0.000000}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, by_event) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .add_inject()
+        .add_event(0, 0.005);
+    result_t exp = {{ 0.000000,  0.000000,  0.000000},
+                    { 0.000000,  0.100000,  0.000000},
+                    { 0.000000,  0.200000,  0.000000},
+                    { 0.000000,  0.300000,  0.000000},
+                    { 0.000000,  0.400000,  0.000000},
+                    { 0.000000,  0.500000, 53.051647},
+                    { 0.000000,  0.600000,  0.000000},
+                    { 0.000000,  0.700000,  0.000000},
+                    { 0.000000,  0.800000,  0.000000},
+                    { 0.000000,  0.900000,  0.000000},
+                    { 0.100000,  0.000000,  0.000000},
+                    { 0.100000,  0.100000,  0.000001},
+                    { 0.100000,  0.200000,  0.000100},
+                    { 0.100000,  0.300000,  0.012051},
+                    { 0.100000,  0.400000,  1.056130},
+                    { 0.100000,  0.500000, 50.915085},
+                    { 0.100000,  0.600000,  1.056130},
+                    { 0.100000,  0.700000,  0.012051},
+                    { 0.100000,  0.800000,  0.000100},
+                    { 0.100000,  0.900000,  0.000001}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, decay) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(1e-300)
+        .set_concentration(0.1)
+        .add_decay();
+    result_t exp = {{ 0.000000,  0.000000,  0.100000},
+                    { 0.000000,  0.100000,  0.100000},
+                    { 0.000000,  0.200000,  0.100000},
+                    { 0.000000,  0.300000,  0.100000},
+                    { 0.000000,  0.400000,  0.100000},
+                    { 0.000000,  0.500000,  0.100000},
+                    { 0.000000,  0.600000,  0.100000},
+                    { 0.000000,  0.700000,  0.100000},
+                    { 0.000000,  0.800000,  0.100000},
+                    { 0.000000,  0.900000,  0.100000},
+                    { 0.100000,  0.000000,  0.060647},
+                    { 0.100000,  0.100000,  0.060647},
+                    { 0.100000,  0.200000,  0.060647},
+                    { 0.100000,  0.300000,  0.060647},
+                    { 0.100000,  0.400000,  0.060647},
+                    { 0.100000,  0.500000,  0.060647},
+                    { 0.100000,  0.600000,  0.060647},
+                    { 0.100000,  0.700000,  0.060647},
+                    { 0.100000,  0.800000,  0.060647},
+                    { 0.100000,  0.900000,  0.060647}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, decay_by_initial_concentration) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .set_concentration(0.1, "(cable 0 0.5 0.6)"_reg)
+        .add_decay();
+    result_t exp = {{ 0.000000,  0.000000,  0.000000},
+                    { 0.000000,  0.100000,  0.000000},
+                    { 0.000000,  0.200000,  0.000000},
+                    { 0.000000,  0.300000,  0.000000},
+                    { 0.000000,  0.400000,  0.000000},
+                    { 0.000000,  0.500000,  0.100000},
+                    { 0.000000,  0.600000,  0.000000},
+                    { 0.000000,  0.700000,  0.000000},
+                    { 0.000000,  0.800000,  0.000000},
+                    { 0.000000,  0.900000,  0.000000},
+                    { 0.100000,  0.000000,  0.000000},
+                    { 0.100000,  0.100000,  0.000000},
+                    { 0.100000,  0.200000,  0.000000},
+                    { 0.100000,  0.300000,  0.000014},
+                    { 0.100000,  0.400000,  0.001207},
+                    { 0.100000,  0.500000,  0.058204},
+                    { 0.100000,  0.600000,  0.001207},
+                    { 0.100000,  0.700000,  0.000014},
+                    { 0.100000,  0.800000,  0.000000},
+                    { 0.100000,  0.900000,  0.000000}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, decay_by_event) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .add_decay()
+        .add_inject()
+        .add_event(0, 0.005);
+    result_t exp = {{ 0.000000,  0.000000,  0.000000},
+                    { 0.000000,  0.100000,  0.000000},
+                    { 0.000000,  0.200000,  0.000000},
+                    { 0.000000,  0.300000,  0.000000},
+                    { 0.000000,  0.400000,  0.000000},
+                    { 0.000000,  0.500000, 53.051647},
+                    { 0.000000,  0.600000,  0.000000},
+                    { 0.000000,  0.700000,  0.000000},
+                    { 0.000000,  0.800000,  0.000000},
+                    { 0.000000,  0.900000,  0.000000},
+                    { 0.100000,  0.000000,  0.000000},
+                    { 0.100000,  0.100000,  0.000000},
+                    { 0.100000,  0.200000,  0.000061},
+                    { 0.100000,  0.300000,  0.007308},
+                    { 0.100000,  0.400000,  0.640508},
+                    { 0.100000,  0.500000, 30.878342},
+                    { 0.100000,  0.600000,  0.640508},
+                    { 0.100000,  0.700000,  0.007308},
+                    { 0.100000,  0.800000,  0.000061},
+                    { 0.100000,  0.900000,  0.000000}};
+    EXPECT_TRUE(run(rec, exp));
+}
diff --git a/test/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp
index e26c8db8..9bdd9bab 100644
--- a/test/unit/test_fvm_layout.cpp
+++ b/test/unit/test_fvm_layout.cpp
@@ -1474,7 +1474,7 @@ TEST(fvm_layout, valence_verify) {
     EXPECT_THROW(fvm_build_mechanism_data(gprop, cells, gids, gj_conns, D), cable_cell_error);
 
     // Adding ion, should be fine now:
-    gprop.default_parameters.ion_data["cl"] = { 1., 1., 0. };
+    gprop.default_parameters.ion_data["cl"] = { 1., 1., 0., 0. };
     gprop.ion_species["cl"] = -1;
     EXPECT_NO_THROW(fvm_build_mechanism_data(gprop, cells, gids, gj_conns, D));
 
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index b07208fd..d9d1aa76 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -48,8 +48,7 @@ using fvm_cell = arb::fvm_lowered_cell_impl<backend>;
 using shared_state = backend::shared_state;
 ACCESS_BIND(std::unique_ptr<shared_state> fvm_cell::*, private_state_ptr, &fvm_cell::state_)
 
-using matrix = arb::matrix<arb::multicore::backend>;
-ACCESS_BIND(matrix fvm_cell::*, private_matrix_ptr, &fvm_cell::matrix_)
+using matrix = arb::multicore::cable_solver;
 
 ACCESS_BIND(std::vector<arb::mechanism_ptr> fvm_cell::*, private_mechanisms_ptr, &fvm_cell::mechanisms_)
 
@@ -202,8 +201,8 @@ TEST(fvm_lowered, matrix_init)
     fvm_cell fvcell(*context);
     fvcell.initialize({0}, cable1d_recipe(cell));
 
-    auto& J = fvcell.*private_matrix_ptr;
     auto& S = fvcell.*private_state_ptr;
+    auto& J = S->solver;
     EXPECT_EQ(J.size(), 12u);
 
     // Test that the matrix is initialized with sensible values
@@ -211,14 +210,13 @@ TEST(fvm_lowered, matrix_init)
     fvcell.integrate(0.01, 0.01, {}, {});
 
     auto n = J.size();
-    auto& mat = J.state_;
 
-    EXPECT_FALSE(arb::util::any_of(util::subrange_view(mat.u, 1, n), isnan));
-    EXPECT_FALSE(arb::util::any_of(mat.d, isnan));
+    EXPECT_FALSE(arb::util::any_of(util::subrange_view(J.u, 1, n), isnan));
+    EXPECT_FALSE(arb::util::any_of(J.d, isnan));
     EXPECT_FALSE(arb::util::any_of(S->voltage, isnan));
 
-    EXPECT_FALSE(arb::util::any_of(util::subrange_view(mat.u, 1, n), ispos));
-    EXPECT_FALSE(arb::util::any_of(mat.d, isneg));
+    EXPECT_FALSE(arb::util::any_of(util::subrange_view(J.u, 1, n), ispos));
+    EXPECT_FALSE(arb::util::any_of(J.d, isneg));
 }
 
 TEST(fvm_lowered, target_handles) {
diff --git a/test/unit/test_matrix.cpp b/test/unit/test_matrix.cpp
index f63bf86f..12959eb6 100644
--- a/test/unit/test_matrix.cpp
+++ b/test/unit/test_matrix.cpp
@@ -5,7 +5,6 @@
 
 #include <arbor/math.hpp>
 
-#include "matrix.hpp"
 #include "backends/multicore/fvm.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
@@ -16,21 +15,21 @@ using namespace arb;
 
 using backend     = multicore::backend;
 using array       = backend::array;
-using matrix_type = matrix<backend>;
-using index_type  = matrix_type::index_type;
-using value_type  = matrix_type::value_type;
+using solver_type = backend::cable_solver;
+using index_type  = arb_index_type;
+using value_type  = arb_value_type;
 
 using vvec = std::vector<value_type>;
 
 TEST(matrix, construct_from_parent_only)
 {
     std::vector<index_type> p = {0,0,1};
-    matrix_type m(p, {0, 3}, vvec(3), vvec(3), vvec(3), {0});
+    solver_type m(p, {0, 3}, vvec(3), vvec(3), vvec(3), {0});
     EXPECT_EQ(m.num_cells(), 1u);
     EXPECT_EQ(m.size(), 3u);
     EXPECT_EQ(p.size(), 3u);
 
-    auto mp = m.p();
+    auto mp = m.parent_index;
     EXPECT_EQ(mp[0], index_type(0));
     EXPECT_EQ(mp[1], index_type(0));
     EXPECT_EQ(mp[2], index_type(1));
@@ -43,11 +42,10 @@ TEST(matrix, solve_host)
 
     // trivial case : 1x1 matrix
     {
-        matrix_type m({0}, {0,1}, vvec(1), vvec(1), vvec(1), {0});
-        auto& state = m.state_;
-        fill(state.d,  2);
-        fill(state.u, -1);
-        fill(state.rhs,1);
+        solver_type m({0}, {0,1}, vvec(1), vvec(1), vvec(1), {0});
+        fill(m.d,  2);
+        fill(m.u, -1);
+        fill(m.rhs,1);
 
         auto x = array({0});
         m.solve(x);
@@ -60,16 +58,14 @@ TEST(matrix, solve_host)
         for(auto n : make_span(2, 1001)) {
             auto p = std::vector<index_type>(n);
             std::iota(p.begin()+1, p.end(), 0);
-            matrix_type m(p, {0, n}, vvec(n), vvec(n), vvec(n), {0});
+            solver_type m(p, {0, n}, vvec(n), vvec(n), vvec(n), {0});
 
             EXPECT_EQ(m.size(), (unsigned)n);
             EXPECT_EQ(m.num_cells(), 1u);
 
-            auto& A = m.state_;
-
-            fill(A.d,  2);
-            fill(A.u, -1);
-            fill(A.rhs,1);
+            fill(m.d,  2);
+            fill(m.u, -1);
+            fill(m.rhs,1);
 
 
             auto x = array();
@@ -100,15 +96,14 @@ TEST(matrix, zero_diagonal)
     std::vector<index_type> p = {0, 0, 1, 3, 3, 5, 5};
     std::vector<index_type> c = {0, 3, 5, 7};
     std::vector<index_type> i = {0, 1, 2};
-    matrix_type m(p, c, vvec(7), vvec(7), vvec(7), i);
+    solver_type m(p, c, vvec(7), vvec(7), vvec(7), i);
 
     EXPECT_EQ(7u, m.size());
     EXPECT_EQ(3u, m.num_cells());
 
-    auto& A = m.state_;
-    assign(A.d,   vvec({2,  3,  2, 0,  0,  4,  5}));
-    assign(A.u,   vvec({0, -1, -1, 0, -1,  0, -2}));
-    assign(A.rhs, vvec({3,  5,  7, 7,  8, 16, 32}));
+    assign(m.d,   vvec({2,  3,  2, 0,  0,  4,  5}));
+    assign(m.u,   vvec({0, -1, -1, 0, -1,  0, -2}));
+    assign(m.rhs, vvec({3,  5,  7, 7,  8, 16, 32}));
 
     // Expected solution:
     std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10};
@@ -125,7 +120,7 @@ TEST(matrix, zero_diagonal_assembled)
     // test case from CV data.
 
     using util::assign;
-    using array = matrix_type::array;
+    using array = solver_type::array;
 
     // Combined matrix may have zero-blocks, corresponding to a zero dt.
     // Zero-blocks are indicated by zero value in the diagonal (the off-diagonal
@@ -162,7 +157,7 @@ TEST(matrix, zero_diagonal_assembled)
     // Expected solution:
     // x = [ 4 5 6 7 8 9 10 ]
 
-    matrix_type m(p, c, Cm, g, area, s);
+    solver_type m(p, c, Cm, g, area, s);
     m.assemble(dt, v, i, mg);
 
     auto x = array({0, 0, 0, 0, 0, 0, 0});
@@ -184,4 +179,3 @@ TEST(matrix, zero_diagonal_assembled)
 
     EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x));
 }
-
diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
index 9212f3b0..8a00ddf4 100644
--- a/test/unit/test_matrix_cpuvsgpu.cpp
+++ b/test/unit/test_matrix_cpuvsgpu.cpp
@@ -36,8 +36,8 @@ using std::end;
 //  * matrices all have same size and structure
 TEST(matrix, assemble)
 {
-    using gpu_state = gpu::backend::matrix_state;
-    using mc_state  = multicore::backend::matrix_state;
+    using gpu_state = gpu::backend::cable_solver;
+    using mc_state  = multicore::backend::cable_solver;
 
     using T = fvm_value_type;
     using I = fvm_index_type;
diff --git a/test/unit/test_s_expr.cpp b/test/unit/test_s_expr.cpp
index f8aa256b..be436e78 100644
--- a/test/unit/test_s_expr.cpp
+++ b/test/unit/test_s_expr.cpp
@@ -479,6 +479,9 @@ std::ostream& operator<<(std::ostream& o, const membrane_capacitance& p) {
 std::ostream& operator<<(std::ostream& o, const init_int_concentration& p) {
     return o << "(ion-internal-concentration \"" << p.ion << "\" " << p.value << ')';
 }
+std::ostream& operator<<(std::ostream& o, const ion_diffusivity& p) {
+    return o << "(ion-diffusivity \"" << p.ion << "\" " << p.value << ')';
+}
 std::ostream& operator<<(std::ostream& o, const init_ext_concentration& p) {
     return o << "(ion-external-concentration \"" << p.ion << "\" " << p.value << ')';
 }
-- 
GitLab