diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 7a1fb7a4fda8a9d9c22f55f24d5e5b0550047147..569a27ecdfca6177e4b88efbf340768a4921baad 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -71,6 +71,8 @@ if(ARB_WITH_GPU)
         backends/gpu/threshold_watcher.cu
         backends/gpu/matrix_assemble.cu
         backends/gpu/matrix_fine.cu
+        backends/gpu/diffusion.cu
+        backends/gpu/fine.cu
         backends/gpu/matrix_solve.cu
         backends/gpu/multi_event_stream.cpp
         backends/gpu/multi_event_stream.cu
diff --git a/arbor/arbexcept.cpp b/arbor/arbexcept.cpp
index 37cb7a92d7b29c9408dbae1cb5a5c1d7ec05ec64..520883fe6dbcb5e32dacc41a97e093db07aa0517 100644
--- a/arbor/arbexcept.cpp
+++ b/arbor/arbexcept.cpp
@@ -88,6 +88,12 @@ no_such_parameter::no_such_parameter(const std::string& mech_name, const std::st
     param_name(param_name)
 {}
 
+illegal_diffusive_mechanism::illegal_diffusive_mechanism(const std::string& m, const std::string& i):
+        arbor_exception(pprintf("mechanism '{}' accesses diffusive value of ion '{}', but diffusivity is disabled for it.", m, i)),
+        mech{m},
+        ion{i}
+{}
+
 invalid_parameter_value::invalid_parameter_value(const std::string& mech_name, const std::string& param_name, double value):
     arbor_exception(pprintf("invalid parameter value for mechanism {} parameter {}: {}", mech_name, param_name, value)),
     mech_name(mech_name),
diff --git a/arbor/backends/gpu/diffusion.cu b/arbor/backends/gpu/diffusion.cu
new file mode 100644
index 0000000000000000000000000000000000000000..a4f129bc15faf3ccc9b52c8f0e6e83a5c044fda2
--- /dev/null
+++ b/arbor/backends/gpu/diffusion.cu
@@ -0,0 +1,274 @@
+#include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
+
+#include "matrix_common.hpp"
+#include "diffusion.hpp"
+
+namespace arb {
+namespace gpu {
+namespace kernels {
+/// GPU implementation of Hines matrix assembly.
+/// Fine layout.
+/// For a given time step size dt:
+///     - use the precomputed alpha and alpha_d values to construct the diagonal
+///       and off diagonal of the symmetric Hines matrix.
+///     - compute the RHS of the linear system to solve.
+template <typename T, typename I>
+__global__
+void assemble_diffusion(
+        T* __restrict__ const d,
+        T* __restrict__ const rhs,
+        const T* __restrict__ const invariant_d,
+        const T* __restrict__ const concentration,
+        const T* __restrict__ const voltage,
+        const T* __restrict__ const current,
+        const T q,
+        const T* __restrict__ const conductivity,
+        const T* __restrict__ const area,
+        const I* __restrict__ const cv_to_intdom,
+        const T* __restrict__ const dt_intdom,
+        const I* __restrict__ const perm,
+        unsigned n) {
+    const unsigned tid = threadIdx.x + blockDim.x*blockIdx.x;
+    if (tid < n) {
+        const auto dt = dt_intdom[cv_to_intdom[tid]];
+        const auto p = dt > 0;
+        const auto pid = perm[tid];
+        auto u = voltage[tid];        // mV
+        auto g = conductivity[tid];   // ÂµS
+        auto J = current[tid];        // A/m^2
+        auto A = 1e-3*area[tid];      // 1e-9Â·mÂ²
+        auto X = concentration[tid];  // mM
+        // conversion from current density to concentration change
+        // using Faraday's constant
+        auto F = A/(q*96.485332);
+
+        d[pid]   = p ? (1e-3/dt   + F*g + invariant_d[tid]) : 0;
+        rhs[pid] = p ? (1e-3/dt*X + F*(u*g - J))            : concentration[tid];
+    }
+}
+
+/// GPU implementation of Hines Matrix solver.
+/// Fine-grained tree based solver.
+/// Each block solves a set of matricesb iterating over the levels of matrix
+/// and perfoming a backward and forward substitution. On each level one thread
+/// gets assigned to one branch on this level of a matrix and solves and
+/// performs the substitution. Afterwards all threads continue on the next
+/// level.
+/// To avoid idle threads, one should try that on each level, there is a similar
+/// number of branches.
+template <typename T>
+__global__
+void solve_diffusion(
+    T* __restrict__ const rhs,
+    T* __restrict__ const d,
+    const T* __restrict__ const u,
+    const level_metadata* __restrict__ const level_meta,
+    const fvm_index_type* __restrict__ const level_lengths,
+    const fvm_index_type* __restrict__ const level_parents,
+    const fvm_index_type* __restrict__ const block_index,
+    const fvm_index_type* __restrict__ const num_matrix) // number of packed matrices = number of cells
+{
+    const auto tid = threadIdx.x;
+    const auto bid = blockIdx.x;
+
+    const auto first_level = block_index[bid];
+    const auto num_levels  = block_index[bid + 1] - first_level;
+
+    const auto block_level_meta = &level_meta[first_level];
+
+    // backward substitution
+
+    for (unsigned l=0; l<num_levels-1; ++l) {
+        // Metadata for this level and the next level
+        const auto& lvl_meta = block_level_meta[l];
+        const auto& next_lvl_meta = block_level_meta[l+1];
+
+        // Addresses of the first elements of level_lengths and level_parents
+        // that belong to this level
+        const auto lvl_lengths = level_lengths + lvl_meta.level_data_index;
+        const auto lvl_parents = level_parents + lvl_meta.level_data_index;
+
+        const unsigned width = lvl_meta.num_branches;
+
+
+        // Perform backward substitution for each branch on this level.
+        // One thread per branch.
+        if (tid < width) {
+            const unsigned len = lvl_lengths[tid];
+            unsigned pos = lvl_meta.matrix_data_index + tid;
+
+            // Zero diagonal term implies dt==0; just leave rhs (for whole matrix)
+            // alone in that case.
+
+            // Each cell has a different `dt`, because we choose time step size
+            // according to when the next event is arriving at a cell. So, some
+            // cells require more time steps than others, but we have to solve
+            // all the matrices at the same time. When a cell finishes, we put a
+            // `0` on the diagonal to mark that it should not be solved for.
+            if (d[pos]!=0) {
+                // each branch perform substitution
+                for (unsigned i=0; i<len-1; ++i) {
+                    const unsigned next_pos = pos + width;
+                    const auto d_next = d[next_pos];
+                    const auto rhs_next = rhs[next_pos];
+                    const T factor = -u[pos]/d[pos];
+                    d[next_pos] = fma(factor, u[pos], d_next);
+                    rhs[next_pos] = fma(factor, rhs[pos], rhs_next);
+                    pos = next_pos;
+                }
+                // Update d and rhs at the parent node of this branch.
+                // A parent may have more than one contributing to it, so we use
+                // atomic updates to avoid races conditions.
+                const unsigned parent_index = next_lvl_meta.matrix_data_index;
+                const unsigned p = parent_index + lvl_parents[tid];
+
+                const T factor = -u[pos] / d[pos];
+                gpu_atomic_add(d + p, factor*u[pos]);
+                gpu_atomic_add(rhs + p, factor*rhs[pos]);
+            }
+        }
+        __syncthreads();
+    }
+
+    // Solve the root
+    {
+        // The levels are sorted such that the root is the last level
+        const auto& last_lvl_meta = block_level_meta[num_levels-1];
+        const auto lvl_lengths = level_lengths + last_lvl_meta.level_data_index;
+
+        const unsigned width = num_matrix[bid];
+
+        if (tid < width) {
+            const unsigned len = lvl_lengths[tid];
+            unsigned pos = last_lvl_meta.matrix_data_index + tid;
+
+            if (d[pos]!=0) {
+                // backward
+                for (unsigned i=0; i<len-1; ++i) {
+                    const unsigned next_pos = pos + width;
+                    const T factor = -u[pos] / d[pos];
+                    const auto rhs_next = rhs[next_pos];
+                    const auto d_next = d[next_pos];
+                    d[next_pos]   = fma(factor, u[pos], d_next);
+                    rhs[next_pos] = fma(factor, rhs[pos], rhs_next);
+                    pos = next_pos;
+                }
+
+                auto rhsp = rhs[pos] / d[pos];
+                rhs[pos] = rhsp;
+                pos -= width;
+
+                // forward
+                for (unsigned i=0; i<len-1; ++i) {
+                    rhsp = rhs[pos] - u[pos]*rhsp;
+                    rhsp /= d[pos];
+                    rhs[pos] = rhsp;
+                    pos -= width;
+                }
+            }
+        }
+    }
+
+    // forward substitution
+
+    // take great care with loop limits decrementing unsigned counter l
+    for (unsigned l=num_levels-1; l>0; --l) {
+        const auto& lvl_meta = block_level_meta[l-1];
+
+        // Addresses of the first elements of level_lengths and level_parents
+        // that belong to this level
+        const auto lvl_lengths = level_lengths + lvl_meta.level_data_index;
+        const auto lvl_parents = level_parents + lvl_meta.level_data_index;
+
+        const unsigned width = lvl_meta.num_branches;
+        const unsigned parent_index = block_level_meta[l].matrix_data_index;
+
+        __syncthreads();
+
+        // Perform forward-substitution for each branch on this level.
+        // One thread per branch.
+        if (tid < width) {
+            // Find the index of the first node in this branch.
+            const unsigned len = lvl_lengths[tid];
+            unsigned pos = lvl_meta.matrix_data_index + (len-1)*width + tid;
+
+            if (d[pos]!=0) {
+                // Load the rhs value for the parent node of this branch.
+                const unsigned p = parent_index + lvl_parents[tid];
+                T rhsp = rhs[p];
+                // each branch perform substitution
+                for (unsigned i=0; i<len; ++i) {
+                    rhsp = rhs[pos] - u[pos]*rhsp;
+                    rhsp /= d[pos];
+                    rhs[pos] = rhsp;
+                    pos -= width;
+                }
+            }
+        }
+    }
+}
+
+} // namespace kernels
+
+ARB_ARBOR_API void assemble_diffusion(
+    fvm_value_type* d,
+    fvm_value_type* rhs,
+    const fvm_value_type* invariant_d,
+    const fvm_value_type* concentration,
+    const fvm_value_type* voltage,
+    const fvm_value_type* current,
+    fvm_value_type q,
+    const fvm_value_type* conductivity,
+    const fvm_value_type* area,
+    const fvm_index_type* cv_to_intdom,
+    const fvm_value_type* dt_intdom,
+    const fvm_index_type* perm,
+    unsigned n)
+{
+    const unsigned block_dim = 128;
+    const unsigned num_blocks = impl::block_count(n, block_dim);
+
+    kernels::assemble_diffusion<<<num_blocks, block_dim>>>(
+        d, rhs, invariant_d, concentration, voltage, current, q, conductivity, area,
+        cv_to_intdom, dt_intdom, perm, n);
+}
+
+// Example:
+//
+//         block 0                  block 1              block 2
+// .~~~~~~~~~~~~~~~~~~.  .~~~~~~~~~~~~~~~~~~~~~~~~.  .~~~~~~~~~~~ ~ ~
+//
+//  L0 \  /                                           L5    \  /
+//      \/                                                   \/
+//  L1   \   /   \   /    L3 \   /   \ | /   \   /    L6 \   /  . . .
+//        \ /     \ /         \ /     \|/     \ /         \ /
+//  L2     |       |      L4   |       |       |      L7   |
+//         |       |           |       |       |           |
+//
+// levels       = [L0, L1, L2, L3, L4, L5, L6, L7, ... ]
+// block_index  = [0, 3, 5, 8, ...]
+// num_levels   = [3, 2, 3, ...]
+// num_cells    = [2, 3, ...]
+// num_blocks   = level_start.size() - 1 = num_levels.size() = num_cells.size()
+ARB_ARBOR_API void solve_diffusion(
+    fvm_value_type* rhs,
+    fvm_value_type* d,                     // diagonal values
+    const fvm_value_type* u,               // upper diagonal (and lower diagonal as the matrix is SPD)
+    const level_metadata* level_meta,      // information pertaining to each level
+    const fvm_index_type* level_lengths,   // lengths of branches of every level concatenated
+    const fvm_index_type* level_parents,   // parents of branches of every level concatenated
+    const fvm_index_type* block_index,     // start index into levels for each gpu block
+    fvm_index_type* num_cells,             // the number of cells packed into this single matrix
+    fvm_index_type* padded_size,           // length of rhs, d, u, including padding
+    unsigned num_blocks,                   // number of blocks
+    unsigned blocksize)                    // size of each block
+{
+    kernels::solve_diffusion<<<num_blocks, blocksize>>>(
+        rhs, d, u, level_meta, level_lengths, level_parents, block_index,
+        num_cells);
+}
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/diffusion.hpp b/arbor/backends/gpu/diffusion.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f07c0539da0200c8f0ba5f4e03d9ca31f4ec5bd6
--- /dev/null
+++ b/arbor/backends/gpu/diffusion.hpp
@@ -0,0 +1,40 @@
+#include <arbor/fvm_types.hpp>
+#include <arbor/export.hpp>
+
+#include <ostream>
+
+#include "fine.hpp"
+
+namespace arb {
+namespace gpu {
+
+ARB_ARBOR_API void assemble_diffusion(
+    fvm_value_type* d,
+    fvm_value_type* rhs,
+    const fvm_value_type* invariant_d,
+    const fvm_value_type* concentration,
+    const fvm_value_type* voltage,
+    const fvm_value_type* current,
+    const fvm_value_type q,
+    const fvm_value_type* conductivity,
+    const fvm_value_type* area,
+    const fvm_index_type* cv_to_intdom,
+    const fvm_value_type* dt_intdom,
+    const fvm_index_type* perm,
+    unsigned n);
+
+ARB_ARBOR_API void solve_diffusion(
+    fvm_value_type* rhs,
+    fvm_value_type* d,                     // diagonal values
+    const fvm_value_type* u,               // upper diagonal (and lower diagonal as the matrix is SPD)
+    const level_metadata* level_meta,      // information pertaining to each level
+    const fvm_index_type* level_lengths,   // lengths of branches of every level concatenated
+    const fvm_index_type* level_parents,   // parents of branches of every level concatenated
+    const fvm_index_type* block_index,     // start index (exclusive) into levels for each gpu block
+    fvm_index_type* num_cells,             // the number of cells packed into this single matrix
+    fvm_index_type* padded_size,           // length of rhs, d, u, including padding
+    unsigned num_blocks,                   // number of blocks
+    unsigned blocksize);                   // size of each block
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/diffusion_state.hpp b/arbor/backends/gpu/diffusion_state.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..cb18bcb92ca7f3ffa153aadb5d461c6f79d5a5e8
--- /dev/null
+++ b/arbor/backends/gpu/diffusion_state.hpp
@@ -0,0 +1,462 @@
+#pragma once
+
+#include <cstring>
+
+#include <vector>
+#include <type_traits>
+
+#include <arbor/common_types.hpp>
+
+#include "memory/memory.hpp"
+#include "util/partition.hpp"
+#include "util/rangeutil.hpp"
+#include "util/span.hpp"
+#include "tree.hpp"
+#include "diffusion.hpp"
+#include "forest.hpp"
+#include "fine.hpp"
+
+namespace arb {
+namespace gpu {
+
+template <typename T, typename I>
+struct diffusion_state {
+public:
+    using value_type = T;
+    using size_type = I;
+
+    using array      = memory::device_vector<value_type>;
+    using view       = typename array::view_type;
+    using const_view = typename array::const_view_type;
+    using iarray     = memory::device_vector<size_type>;
+
+    using metadata_array = memory::device_vector<level_metadata>;
+
+    // Maps control volume to integration domain
+    iarray cv_to_intdom;
+
+    array d;     // [Î¼S]
+    array u;     // [Î¼S]
+    array rhs;   // [nA]
+
+    // Required for matrix assembly
+    array cv_area;             // [Î¼m^2]
+
+    // Invariant part of the matrix diagonal
+    array invariant_d;         // [Î¼S]
+
+    // Solution in unpacked format
+    array solution_;
+
+    // Maximum number of branches in each level per block
+    unsigned max_branches_per_level;
+
+    // Number of rows in matrix
+    unsigned matrix_size;
+
+    // Number of cells
+    unsigned num_cells;
+    iarray num_cells_in_block;
+
+    // End of the data of each level
+    iarray data_partition;
+    std::size_t data_size;
+
+    // Metadata for each level
+    // Includes indices into d, u, rhs and level_lengths, level_parents
+    metadata_array level_meta;
+
+    // Stores the lengths (number of compartments) of the branches of each
+    // level sequentially in memory. Indexed by level_metadata::level_data_index
+    iarray level_lengths;
+
+    // Stores the indices of the parent of each of the branches in each
+    // level sequentially in memory. Indexed by level_metadata::level_data_index
+    iarray level_parents;
+
+    // Stores the indices to the first level belonging to each block
+    // block b owns { levels[block_index[b]], ..., levels[block_index[b+1] - 1] }
+    // there is an additional entry at the end of the vector to make the above
+    // computation safe
+    iarray block_index;
+
+    // Permutation from front end storage to packed storage
+    //      `solver_format[perm[i]] = external_format[i]`
+    iarray perm;
+
+
+    diffusion_state() = default;
+
+    // constructor for fine-grained matrix.
+    diffusion_state(const std::vector<size_type>& p,
+                    const std::vector<size_type>& cell_cv_divs,
+                    const std::vector<value_type>& face_diffusivity,
+                    const std::vector<value_type>& area,
+                    const std::vector<size_type>& cell_intdom) {
+        using util::make_span;
+        constexpr unsigned npos = unsigned(-1);
+
+        max_branches_per_level = 128;
+
+        num_cells = cell_cv_divs.size()-1;
+
+        forest trees(p, cell_cv_divs);
+        trees.optimize();
+
+        // Now distribute the cells into gpu blocks.
+        // While the total number of branches on each level of theses cells in a
+        // block are less than `max_branches_per_level` we add more cells. If
+        // one block is full, we start a new gpu block.
+
+        unsigned current_block = 0;
+        std::vector<unsigned> block_num_branches_per_depth;
+        std::vector<unsigned> block_ix(num_cells);
+
+        // Accumulate num cells in block in a temporary vector to be copied to the device
+        std::vector<size_type> temp_ncells_in_block;
+        temp_ncells_in_block.resize(1, 0);
+
+        // branch_map = branch_maps[block] is a branch map for each gpu block
+        // branch_map[depth] is list of branches is this level
+        // each branch branch_map[depth][i] has
+        // {id, parent_id, start_idx, parent_idx, length}
+        std::vector<std::vector<std::vector<branch>>> branch_maps;
+        branch_maps.resize(1);
+
+        unsigned num_branches = 0u;
+        for (auto c: make_span(0u, num_cells)) {
+            auto cell_start = cell_cv_divs[c];
+            auto cell_tree = trees.branch_tree(c);
+            auto fine_tree = trees.compartment_tree(c);
+            auto branch_starts  = trees.branch_offsets(c);
+            auto branch_lengths = trees.branch_lengths(c);
+
+            auto depths = depth_from_root(cell_tree);
+
+            // calculate the number of levels in this cell
+            auto cell_num_levels = util::max_value(depths)+1u;
+
+            auto num_cell_branches = cell_tree.num_segments();
+
+            // count number of branches per level
+            std::vector<unsigned> cell_num_branches_per_depth(cell_num_levels, 0u);
+            for (auto i: make_span(num_cell_branches)) {
+                cell_num_branches_per_depth[depths[i]] += 1;
+            }
+            // resize the block levels if neccessary
+            if (cell_num_levels > block_num_branches_per_depth.size()) {
+                block_num_branches_per_depth.resize(cell_num_levels, 0);
+            }
+
+
+            // check if we can fit the current cell into the last gpu block
+            bool fits_current_block = true;
+            for (auto i: make_span(cell_num_levels)) {
+                unsigned new_branches_per_depth =
+                    block_num_branches_per_depth[i]
+                    + cell_num_branches_per_depth[i];
+                if (new_branches_per_depth > max_branches_per_level) {
+                    fits_current_block = false;
+                }
+            }
+            if (fits_current_block) {
+                // put the cell into current block
+                block_ix[c] = current_block;
+                temp_ncells_in_block[block_ix[c]] += 1;
+                // and increment counter
+                for (auto i: make_span(cell_num_levels)) {
+                    block_num_branches_per_depth[i] += cell_num_branches_per_depth[i];
+                }
+            } else {
+                // otherwise start a new block
+                block_ix[c] = current_block + 1;
+                temp_ncells_in_block.push_back(1);
+                branch_maps.resize(branch_maps.size()+1);
+                current_block += 1;
+                // and reset counter
+                block_num_branches_per_depth = cell_num_branches_per_depth;
+                for (auto num: block_num_branches_per_depth) {
+                    if (num > max_branches_per_level) {
+                        throw std::runtime_error(
+                            "Could not fit " + std::to_string(num)
+                            + " branches in a block of size "
+                            + std::to_string(max_branches_per_level));
+                    }
+                }
+            }
+            num_cells_in_block = memory::make_const_view(temp_ncells_in_block);
+
+
+            // the branch map for the block in which we put the cell
+            // maps levels to a list of branches in that level
+            auto& branch_map = branch_maps[block_ix[c]];
+
+            // build branch_map:
+            // branch_map[i] is a list of branch meta-data for branches with depth i
+            if (cell_num_levels > branch_map.size()) {
+                branch_map.resize(cell_num_levels);
+            }
+            for (auto i: make_span(num_cell_branches)) {
+                branch b;
+                auto depth = depths[i];
+                // give the branch a unique id number
+                b.id = i + num_branches;
+                // take care to mark branches with no parents with npos
+                b.parent_id = cell_tree.parent(i)==cell_tree.no_parent ?
+                    npos : cell_tree.parent(i) + num_branches;
+                b.start_idx = branch_starts[i] + cell_start;
+                b.length = branch_lengths[i];
+                b.parent_idx = p[b.start_idx] + cell_start;
+                branch_map[depth].push_back(b);
+            }
+            // total number of branches of all cells
+            num_branches += num_cell_branches;
+        }
+
+        for (auto& branch_map: branch_maps) {
+            // reverse the levels
+            std::reverse(branch_map.begin(), branch_map.end());
+
+            // Sort all branches on each level in descending order of length.
+            // Later, branches will be partitioned over thread blocks, and we will
+            // take advantage of the fact that the first branch in a partition is
+            // the longest, to determine how to pack all the branches in a block.
+            for (auto& branches: branch_map) {
+                util::sort(branches);
+            }
+        }
+
+        // The branches generated above have been assigned contiguous ids.
+        // Now generate a vector of branch_loc, one for each branch, that
+        // allow for quick lookup by id of the level and index within a level
+        // of each branch.
+        // This information is only used in the generation of the levels below.
+
+        // Helper for recording location of a branch once packed.
+        struct branch_loc {
+            unsigned block; // the gpu block containing the cell to which the branch blongs to
+            unsigned level; // the level containing the branch
+            unsigned index; // the index of the branch on that level
+        };
+
+        // branch_locs will hold the location information for each branch.
+        std::vector<branch_loc> branch_locs(num_branches);
+        for (unsigned b: make_span(branch_maps.size())) {
+            const auto& branch_map = branch_maps[b];
+            for (unsigned l: make_span(branch_map.size())) {
+                const auto& branches = branch_map[l];
+
+                // Record the location information
+                for (auto i=0u; i<branches.size(); ++i) {
+                    const auto& branch = branches[i];
+                    branch_locs[branch.id] = {b, l, i};
+                }
+            }
+        }
+
+        // Construct description for the set of branches on each level for each
+        // block. This is later used to sort the branches in each block in each
+        // level into conineous chunks which are easier to read for the gpu
+        // kernel.
+
+        // Accumulate metadata about the levels, level lengths, level parents,
+        // data_partition and block indices in temporary vectors to be copied to the device
+        std::vector<level_metadata> temp_meta;
+        std::vector<size_type> temp_lengths, temp_parents, temp_data_part, temp_block_index;
+
+        temp_block_index.reserve(branch_maps.size() + 1);
+        temp_block_index.push_back(0);
+        temp_data_part.reserve(branch_maps.size());
+
+        // Offset into the packed data format, used to apply permutation on data
+        auto pos = 0u;
+        // Offset into the packed data format, used to access level_lengths and level_parents
+        auto data_start = 0u;
+        for (const auto& branch_map: branch_maps) {
+            for (const auto& lvl_branches: branch_map) {
+
+                level_metadata lvl_meta;
+                std::vector<size_type> lvl_lengths(lvl_branches.size()), lvl_parents(lvl_branches.size());
+
+                lvl_meta.num_branches = lvl_branches.size();
+                lvl_meta.matrix_data_index = pos;
+                lvl_meta.level_data_index = data_start;
+
+                // The length of the first branch is the upper bound on branch
+                // length as they are sorted in descending order of length.
+                lvl_meta.max_length = lvl_branches.front().length;
+
+                unsigned bi = 0u;
+                for (const auto& b: lvl_branches) {
+                    // Set the length of the branch.
+                    lvl_lengths[bi] = b.length;
+
+                    // Set the parent indexes. During the forward and backward
+                    // substitution phases each branch accesses the last node in
+                    // its parent branch.
+                    auto index = b.parent_id==npos? npos: branch_locs[b.parent_id].index;
+                    lvl_parents[bi] = index;
+                    ++bi;
+                }
+
+                data_start+= lvl_meta.num_branches;
+                pos += lvl_meta.max_length*lvl_meta.num_branches;
+
+                temp_meta.push_back(std::move(lvl_meta));
+                util::append(temp_lengths, lvl_lengths);
+                util::append(temp_parents, lvl_parents);
+            }
+            auto prev_end = temp_block_index.back();
+            temp_block_index.push_back(prev_end + branch_map.size());
+            temp_data_part.push_back(pos);
+        }
+        data_size = pos;
+
+        // set matrix state
+        matrix_size = p.size();
+
+        // form the permutation index used to reorder vectors to/from the
+        // ordering used by the fine grained matrix storage.
+        std::vector<size_type> perm_tmp(matrix_size);
+        for (auto block: make_span(branch_maps.size())) {
+            const auto& branch_map = branch_maps[block];
+            const auto first_level = temp_block_index[block];
+
+            for (auto i: make_span(temp_block_index[block + 1] - first_level)) {
+                const auto& l = temp_meta[first_level + i];
+                for (auto j: make_span(l.num_branches)) {
+                    const auto& b = branch_map[i][j];
+                    auto j_lvl_length = temp_lengths[l.level_data_index + j];
+                    auto to = l.matrix_data_index + j + l.num_branches*(j_lvl_length-1);
+                    auto from = b.start_idx;
+                    for (auto k: make_span(b.length)) {
+                        perm_tmp[from + k] = to - k*l.num_branches;
+                    }
+                }
+            }
+        }
+
+        level_meta     = memory::make_const_view(temp_meta);
+        level_lengths  = memory::make_const_view(temp_lengths);
+        level_parents  = memory::make_const_view(temp_parents);
+        data_partition = memory::make_const_view(temp_data_part);
+        block_index    = memory::make_const_view(temp_block_index);
+
+        auto perm_balancing = trees.permutation();
+
+        // apppy permutation form balancing
+        std::vector<size_type> perm_tmp2(matrix_size);
+        for (auto i: make_span(matrix_size)) {
+             // This is CORRECT! verified by using the ring benchmark with root=0 (where the permutation is actually not id)
+            perm_tmp2[perm_balancing[i]] = perm_tmp[i];
+        }
+        // copy permutation to device memory
+        perm = memory::make_const_view(perm_tmp2);
+
+
+        // Summary of fields and their storage format:
+        //
+        // face_conductance : not needed, don't store
+        // d, u, rhs        : packed
+        // invariant_d      : flat
+        // cv_to_cell       : flat
+        // area             : flat
+
+        // the invariant part of d is stored in in flat form
+        std::vector<value_type> invariant_d_tmp(matrix_size, 0);
+        std::vector<value_type> temp_u_shuffled(matrix_size, 0);
+        array u_shuffled;
+        for (auto i: make_span(1u, matrix_size)) {
+            auto gij = face_diffusivity[i];
+
+            temp_u_shuffled[i] = -gij;
+            invariant_d_tmp[i] += gij;
+            if (p[i]!=-1) {
+                invariant_d_tmp[p[i]] += gij;
+            }
+        }
+        u_shuffled = memory::make_const_view(temp_u_shuffled);
+
+        // the matrix components u, d and rhs are stored in packed form
+        auto nan = std::numeric_limits<double>::quiet_NaN();
+        d   = array(data_size, nan);
+        u   = array(data_size, nan);
+        rhs = array(data_size, nan);
+
+        // transform u_shuffled values into packed u vector.
+        flat_to_packed(u_shuffled, u);
+
+        // the invariant part of d and cv_area are in flat form
+        cv_area = memory::make_const_view(area);
+        invariant_d = memory::make_const_view(invariant_d_tmp);
+
+        // calculate the cv -> cell mappings
+        std::vector<size_type> cv_to_cell_tmp(matrix_size);
+        size_type ci = 0;
+        for (auto cv_span: util::partition_view(cell_cv_divs)) {
+            util::fill(util::subrange_view(cv_to_cell_tmp, cv_span), ci);
+            ++ci;
+        }
+        std::vector<size_type> cv_to_intdom_tmp(matrix_size);
+        for (auto i = 0ul; i < cv_to_cell_tmp.size(); ++i) {
+            cv_to_intdom_tmp[i] = cell_intdom[cv_to_cell_tmp[i]];
+        }
+        cv_to_intdom = memory::make_const_view(cv_to_intdom_tmp);
+    }
+
+    // Assemble the matrix
+    // Afterwards the diagonal and RHS will have been set given dt, voltage, current, and conductivity.
+    //   dt_intdom [ms] (per integration domain)
+    //   voltage [mV]
+    //   current density [A/mÂ²]
+    //   conductivity [kS/mÂ²]
+    void assemble(const_view dt_intdom, const_view concentration, const_view voltage, const_view current, const_view conductivity, fvm_value_type q) {
+        assemble_diffusion(d.data(),
+                           rhs.data(),
+                           invariant_d.data(),
+                           concentration.data(),
+                           voltage.data(),
+                           current.data(),
+                           q,
+                           conductivity.data(),
+                           cv_area.data(),
+                           cv_to_intdom.data(),
+                           dt_intdom.data(),
+                           perm.data(),
+                           size());
+    }
+
+    void solve(array& to) {
+        solve_diffusion(rhs.data(),
+                        d.data(),
+                        u.data(),
+                        level_meta.data(),
+                        level_lengths.data(),
+                        level_parents.data(),
+                        block_index.data(),
+                        num_cells_in_block.data(),
+                        data_partition.data(),
+                        num_cells_in_block.size(),
+                        max_branches_per_level);
+        // unpermute the solution
+        packed_to_flat(rhs, to);
+    }
+
+    std::size_t size() const { return matrix_size; }
+
+private:
+    void flat_to_packed(const array& from, array& to ) {
+        arb_assert(from.size()==matrix_size);
+        arb_assert(to.size()==data_size);
+        scatter(from.data(), to.data(), perm.data(), perm.size());
+    }
+
+    void packed_to_flat(const array& from, array& to ) {
+        arb_assert(from.size()==data_size);
+        arb_assert(to.size()==matrix_size);
+        gather(from.data(), to.data(), perm.data(), perm.size());
+    }
+};
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/fine.cu b/arbor/backends/gpu/fine.cu
new file mode 100644
index 0000000000000000000000000000000000000000..c695ba53117eef86d9e6e3d833b4de762cf2c6c3
--- /dev/null
+++ b/arbor/backends/gpu/fine.cu
@@ -0,0 +1,65 @@
+#include <arbor/fvm_types.hpp>
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
+
+#include "fine.hpp"
+
+namespace arb {
+namespace gpu {
+namespace kernels {
+// to[i] = from[p[i]]
+template <typename T, typename I>
+__global__
+void gather(const T* __restrict__ const from,
+            T* __restrict__ const to,
+            const I* __restrict__ const p,
+            unsigned n) {
+    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
+
+    if (i<n) {
+        to[i] = from[p[i]];
+    }
+}
+
+// to[p[i]] = from[i]
+template <typename T, typename I>
+__global__
+void scatter(const T* __restrict__ const from,
+             T* __restrict__ const to,
+             const I* __restrict__ const p,
+             unsigned n) {
+    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
+
+    if (i<n) {
+        to[p[i]] = from[i];
+    }
+}
+
+} // namespace kernels
+
+ARB_ARBOR_API void gather(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n)
+{
+    constexpr unsigned blockdim = 128;
+    const unsigned griddim = impl::block_count(n, blockdim);
+
+    kernels::gather<<<griddim, blockdim>>>(from, to, p, n);
+}
+
+ARB_ARBOR_API void scatter(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n)
+{
+    constexpr unsigned blockdim = 128;
+    const unsigned griddim = impl::block_count(n, blockdim);
+
+    kernels::scatter<<<griddim, blockdim>>>(from, to, p, n);
+}
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/fine.hpp b/arbor/backends/gpu/fine.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..540bcea838535bb718a6a377d3295199b8985c4a
--- /dev/null
+++ b/arbor/backends/gpu/fine.hpp
@@ -0,0 +1,85 @@
+#pragma once
+
+#include <iostream>
+
+#include <arbor/export.hpp>
+#include <arbor/fvm_types.hpp>
+
+namespace arb {
+namespace gpu {
+
+
+// Helper type for branch meta data in setup phase of fine grained
+// matrix storage+solver.
+//
+//      leaf
+//      .
+//      .
+//      .
+//  -   *
+//      |
+//  l   *
+//  e   |
+//  n   *
+//  g   |
+//  t   *
+//  h   |
+//  -   start_idx
+//      |
+//      parent_idx
+//      |
+//      .
+//      .
+//      .
+//      root
+struct branch {
+    unsigned id;         // branch id
+    unsigned parent_id;  // parent branch id
+    unsigned parent_idx; //
+    unsigned start_idx;  // the index of the first node in the input parent index
+    unsigned length;     // the number of nodes in the branch
+};
+
+// order branches by:
+//  - descending length
+//  - ascending id
+inline
+bool operator<(const branch& lhs, const branch& rhs) {
+    if (lhs.length!=rhs.length) {
+        return lhs.length>rhs.length;
+    } else {
+        return lhs.id<rhs.id;
+    }
+}
+
+inline
+std::ostream& operator<<(std::ostream& o, branch b) {
+    return o << "[" << b.id
+        << ", len " << b.length
+        << ", pid " << b.parent_idx
+        << ", sta " << b.start_idx
+        << "]";
+}
+
+struct level_metadata {
+    unsigned num_branches = 0; // Number of branches in a level
+    unsigned max_length = 0;   // Length of the longest branch
+    unsigned matrix_data_index = 0;   // Index into data values (d, u, rhs) of the first branch
+    unsigned level_data_index  = 0;   // Index into data values (lengths, parents) of each level
+};
+
+// C wrappers around kernels
+ARB_ARBOR_API void gather(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n);
+
+ARB_ARBOR_API void scatter(
+    const fvm_value_type* from,
+    fvm_value_type* to,
+    const fvm_index_type* p,
+    unsigned n);
+
+} // gpu
+} // arb
diff --git a/arbor/backends/gpu/fvm.hpp b/arbor/backends/gpu/fvm.hpp
index 712b2fc72f4359b92434dfcf3118e9881e79158f..79a373e94f68f2cc57b5061d962d743c7056c5f8 100644
--- a/arbor/backends/gpu/fvm.hpp
+++ b/arbor/backends/gpu/fvm.hpp
@@ -16,8 +16,6 @@
 
 #include "threshold_watcher.hpp"
 
-#include "matrix_state_fine.hpp"
-
 namespace arb {
 namespace gpu {
 
@@ -42,11 +40,11 @@ struct backend {
         return memory::on_host(v);
     }
 
-    using matrix_state = arb::gpu::matrix_state_fine<value_type, index_type>;
-    using threshold_watcher = arb::gpu::threshold_watcher;
-
+    using threshold_watcher        = arb::gpu::threshold_watcher;
+    using cable_solver             = arb::gpu::matrix_state_fine<fvm_value_type, fvm_index_type>;
+    using diffusion_solver         = arb::gpu::diffusion_state<fvm_value_type, fvm_index_type>;
     using deliverable_event_stream = arb::gpu::deliverable_event_stream;
-    using sample_event_stream = arb::gpu::sample_event_stream;
+    using sample_event_stream      = arb::gpu::sample_event_stream;
 
     using shared_state = arb::gpu::shared_state;
     using ion_state = arb::gpu::ion_state;
diff --git a/arbor/backends/gpu/matrix_fine.cu b/arbor/backends/gpu/matrix_fine.cu
index 675f6312c57977dde53594b591530872053eaf0c..41156f2de111c7178f337c81e93a48aaef87b9d3 100644
--- a/arbor/backends/gpu/matrix_fine.cu
+++ b/arbor/backends/gpu/matrix_fine.cu
@@ -7,41 +7,8 @@
 
 namespace arb {
 namespace gpu {
-
 namespace kernels {
 
-//
-// gather and scatter kernels
-//
-
-// to[i] = from[p[i]]
-template <typename T, typename I>
-__global__
-void gather(const T* __restrict__ const from,
-            T* __restrict__ const to,
-            const I* __restrict__ const p,
-            unsigned n) {
-    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
-
-    if (i<n) {
-        to[i] = from[p[i]];
-    }
-}
-
-// to[p[i]] = from[i]
-template <typename T, typename I>
-__global__
-void scatter(const T* __restrict__ const from,
-             T* __restrict__ const to,
-             const I* __restrict__ const p,
-             unsigned n) {
-    unsigned i = threadIdx.x + blockDim.x*blockIdx.x;
-
-    if (i<n) {
-        to[p[i]] = from[i];
-    }
-}
-
 /// GPU implementation of Hines matrix assembly.
 /// Fine layout.
 /// For a given time step size dt:
@@ -245,30 +212,6 @@ void solve_matrix_fine(
 
 } // namespace kernels
 
-ARB_ARBOR_API void gather(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n)
-{
-    constexpr unsigned blockdim = 128;
-    const unsigned griddim = impl::block_count(n, blockdim);
-
-    kernels::gather<<<griddim, blockdim>>>(from, to, p, n);
-}
-
-ARB_ARBOR_API void scatter(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n)
-{
-    constexpr unsigned blockdim = 128;
-    const unsigned griddim = impl::block_count(n, blockdim);
-
-    kernels::scatter<<<griddim, blockdim>>>(from, to, p, n);
-}
-
 ARB_ARBOR_API void assemble_matrix_fine(
     fvm_value_type* d,
     fvm_value_type* rhs,
diff --git a/arbor/backends/gpu/matrix_fine.hpp b/arbor/backends/gpu/matrix_fine.hpp
index 02c549039a591d2492202ce42cce702b25431ae7..66997323c3e14773d97feb1f4325a68b8ba50ced 100644
--- a/arbor/backends/gpu/matrix_fine.hpp
+++ b/arbor/backends/gpu/matrix_fine.hpp
@@ -1,31 +1,12 @@
 #include <arbor/fvm_types.hpp>
-
 #include <arbor/export.hpp>
+
+#include "fine.hpp"
+
 #include <ostream>
 
 namespace arb {
 namespace gpu {
-
-struct level_metadata {
-    unsigned num_branches = 0; // Number of branches in a level
-    unsigned max_length = 0;   // Length of the longest branch
-    unsigned matrix_data_index = 0;   // Index into data values (d, u, rhs) of the first branch
-    unsigned level_data_index  = 0;   // Index into data values (lengths, parents) of each level
-};
-
-// C wrappers around kernels
-ARB_ARBOR_API void gather(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n);
-
-ARB_ARBOR_API void scatter(
-    const fvm_value_type* from,
-    fvm_value_type* to,
-    const fvm_index_type* p,
-    unsigned n);
-
 ARB_ARBOR_API void assemble_matrix_fine(
     fvm_value_type* d,
     fvm_value_type* rhs,
diff --git a/arbor/backends/gpu/matrix_state_fine.hpp b/arbor/backends/gpu/matrix_state_fine.hpp
index d460597cfbaa86052d2fa87f2939bebb19467692..5959513dbe0267f68817f3de98842a4c926f1fcb 100644
--- a/arbor/backends/gpu/matrix_state_fine.hpp
+++ b/arbor/backends/gpu/matrix_state_fine.hpp
@@ -12,65 +12,13 @@
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
 #include "tree.hpp"
-
+#include "fine.hpp"
 #include "matrix_fine.hpp"
 #include "forest.hpp"
 
 namespace arb {
 namespace gpu {
 
-// Helper type for branch meta data in setup phase of fine grained
-// matrix storage+solver.
-//
-//      leaf
-//      .
-//      .
-//      .
-//  -   *
-//      |
-//  l   *
-//  e   |
-//  n   *
-//  g   |
-//  t   *
-//  h   |
-//  -   start_idx
-//      |
-//      parent_idx
-//      |
-//      .
-//      .
-//      .
-//      root
-struct branch {
-    unsigned id;         // branch id
-    unsigned parent_id;  // parent branch id
-    unsigned parent_idx; //
-    unsigned start_idx;  // the index of the first node in the input parent index
-    unsigned length;     // the number of nodes in the branch
-};
-
-// order branches by:
-//  - descending length
-//  - ascending id
-inline
-bool operator<(const branch& lhs, const branch& rhs) {
-    if (lhs.length!=rhs.length) {
-        return lhs.length>rhs.length;
-    } else {
-        return lhs.id<rhs.id;
-    }
-}
-
-inline
-std::ostream& operator<<(std::ostream& o, branch b) {
-    return o << "[" << b.id
-        << ", len " << b.length
-        << ", pid " << b.parent_idx
-        << ", sta " << b.start_idx
-        << "]";
-}
-
 template <typename T, typename I>
 struct matrix_state_fine {
 public:
@@ -503,11 +451,9 @@ public:
         packed_to_flat(rhs, to);
     }
 
-private:
-    std::size_t size() const {
-        return matrix_size;
-    }
+    std::size_t size() const { return matrix_size; }
 
+private:
     void flat_to_packed(const array& from, array& to ) {
         arb_assert(from.size()==matrix_size);
         arb_assert(to.size()==data_size);
diff --git a/arbor/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp
index b01cd663082d365c9e33afa82051ee06ce1d9262..382151b1fbfa6653d9e108a195a36f6edc4bcbfd 100644
--- a/arbor/backends/gpu/shared_state.cpp
+++ b/arbor/backends/gpu/shared_state.cpp
@@ -55,8 +55,8 @@ std::pair<fvm_value_type, fvm_value_type> minmax_value_impl(fvm_size_type n, con
 ion_state::ion_state(
     int charge,
     const fvm_ion_config& ion_data,
-    unsigned // alignment/padding ignored.
-):
+    unsigned, // alignment/padding ignored.
+    solver_ptr ptr):
     write_eX_(ion_data.revpot_written),
     write_Xo_(ion_data.econc_written),
     write_Xi_(ion_data.iconc_written),
@@ -64,30 +64,35 @@ ion_state::ion_state(
     iX_(ion_data.cv.size(), NAN),
     eX_(ion_data.init_revpot.begin(), ion_data.init_revpot.end()),
     Xi_(ion_data.init_iconc.begin(), ion_data.init_iconc.end()),
+    Xd_(ion_data.cv.size(), NAN),
     Xo_(ion_data.init_econc.begin(), ion_data.init_econc.end()),
+    gX_(ion_data.cv.size(), NAN),
     init_Xi_(make_const_view(ion_data.init_iconc)),
     init_Xo_(make_const_view(ion_data.init_econc)),
     reset_Xi_(make_const_view(ion_data.reset_iconc)),
     reset_Xo_(make_const_view(ion_data.reset_econc)),
     init_eX_(make_const_view(ion_data.init_revpot)),
-    charge(1u, charge)
-{
+    charge(1u, charge),
+    solver(std::move(ptr)) {
     arb_assert(node_index_.size()==init_Xi_.size());
     arb_assert(node_index_.size()==init_Xo_.size());
     arb_assert(node_index_.size()==init_eX_.size());
 }
 
 void ion_state::init_concentration() {
+    // NB. not resetting Xd here, it's controlled via the solver.
     if (write_Xi_) memory::copy(init_Xi_, Xi_);
     if (write_Xo_) memory::copy(init_Xo_, Xo_);
 }
 
 void ion_state::zero_current() {
+    memory::fill(gX_, 0);
     memory::fill(iX_, 0);
 }
 
 void ion_state::reset() {
     zero_current();
+    memory::copy(reset_Xi_, Xd_);
     if (write_Xi_) memory::copy(reset_Xi_, Xi_);
     if (write_Xo_) memory::copy(reset_Xo_, Xo_);
     if (write_eX_) memory::copy(init_eX_, eX_);
@@ -315,7 +320,15 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
         auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
         ion_state* oion = ptr_by_key(ion_data, ion_binding);
         if (!oion) throw arbor_internal_error("gpu/mechanism: mechanism holds ion with no corresponding shared state");
-        store.ion_states_[idx] = { oion->iX_.data(), oion->eX_.data(), oion->Xi_.data(), oion->Xo_.data(), oion->charge.data(), nullptr };
+        auto& ion_state = store.ion_states_[idx];
+        ion_state = {0};
+        ion_state.current_density         = oion->iX_.data();
+        ion_state.reversal_potential      = oion->eX_.data();
+        ion_state.internal_concentration  = oion->Xi_.data();
+        ion_state.external_concentration  = oion->Xo_.data();
+        ion_state.diffusive_concentration = oion->Xd_.data();
+        ion_state.ionic_charge            = oion->charge.data();
+        ion_state.conductivity            = oion->gX_.data();
     }
 
     // If there are no sites (is this ever meaningful?) there is nothing more to do.
@@ -394,14 +407,33 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
     m.ppack_.ion_states = store.ion_states_d_.data();
 }
 
+void shared_state::integrate_voltage() {
+    solver.assemble(dt_intdom, voltage, current_density, conductivity);
+    solver.solve(voltage);
+}
+
+void shared_state::integrate_diffusion() {
+    for (auto& [ion, data]: ion_data) {
+        if (data.solver) {
+            data.solver->assemble(dt_intdom,
+                                  data.Xd_,
+                                  voltage,
+                                  data.iX_,
+                                  data.gX_,
+                                  data.charge[0]);
+            data.solver->solve(data.Xd_);
+        }
+    }
+}
+
 void shared_state::add_ion(
     const std::string& ion_name,
     int charge,
-    const fvm_ion_config& ion_info)
-{
+    const fvm_ion_config& ion_info,
+    ion_state::solver_ptr ptr) {
     ion_data.emplace(std::piecewise_construct,
         std::forward_as_tuple(ion_name),
-        std::forward_as_tuple(charge, ion_info, 1u));
+                     std::forward_as_tuple(charge, ion_info, 1u, std::move(ptr)));
 }
 
 void shared_state::configure_stimulus(const fvm_stimulus_config& stims) {
diff --git a/arbor/backends/gpu/shared_state.hpp b/arbor/backends/gpu/shared_state.hpp
index 85f1ea0c451331d59338bebf0747567c0cc8a91c..20cef451b59e37a1599f570bc9e3c14a914c17a3 100644
--- a/arbor/backends/gpu/shared_state.hpp
+++ b/arbor/backends/gpu/shared_state.hpp
@@ -12,6 +12,8 @@
 
 #include "backends/gpu/gpu_store_types.hpp"
 #include "backends/gpu/stimulus.hpp"
+#include "backends/gpu/diffusion_state.hpp"
+#include "backends/gpu/matrix_state_fine.hpp"
 
 namespace arb {
 namespace gpu {
@@ -27,7 +29,10 @@ namespace gpu {
  *     Xi_     cai              internal calcium concentration
  *     Xo_     cao              external calcium concentration
  */
- struct ARB_ARBOR_API ion_state {
+struct ARB_ARBOR_API ion_state {
+    using solver_type = arb::gpu::diffusion_state<fvm_value_type, fvm_index_type>;
+    using solver_ptr  = std::unique_ptr<solver_type>;
+
     bool write_eX_;          // is eX written?
     bool write_Xo_;          // is Xo written?
     bool write_Xi_;          // is Xi written?
@@ -36,7 +41,9 @@ namespace gpu {
     array iX_;          // (A/mÂ²) current density
     array eX_;          // (mV) reversal potential
     array Xi_;          // (mM) internal concentration
+    array Xd_;          // (mM) diffusive concentration
     array Xo_;          // (mM) external concentration
+    array gX_;             // (kS/mÂ²) per-species conductivity
 
     array init_Xi_;     // (mM) area-weighted initial internal concentration
     array init_Xo_;     // (mM) area-weighted initial external concentration
@@ -46,13 +53,15 @@ namespace gpu {
 
     array charge;       // charge of ionic species (global, length 1)
 
+    solver_ptr solver = nullptr;
+
     ion_state() = default;
 
     ion_state(
         int charge,
         const fvm_ion_config& ion_data,
-        unsigned align
-    );
+        unsigned align,
+        solver_ptr ptr);
 
     // Set ion concentrations to weighted proportion of default concentrations.
     void init_concentration();
@@ -103,6 +112,7 @@ struct ARB_ARBOR_API istim_state {
 };
 
 struct ARB_ARBOR_API shared_state {
+
     struct mech_storage {
         array data_;
         iarray indices_;
@@ -115,6 +125,9 @@ struct ARB_ARBOR_API shared_state {
         memory::device_vector<arb_ion_state>   ion_states_d_;
     };
 
+    using cable_solver = arb::gpu::matrix_state_fine<fvm_value_type, fvm_index_type>;
+    cable_solver solver;
+
     static constexpr std::size_t alignment = std::max(array::alignment(), iarray::alignment());
 
     fvm_size_type n_intdom = 0;   // Number of distinct integration domains.
@@ -171,7 +184,8 @@ struct ARB_ARBOR_API shared_state {
     void add_ion(
         const std::string& ion_name,
         int charge,
-        const fvm_ion_config& ion_data);
+        const fvm_ion_config& ion_data,
+        ion_state::solver_ptr solver=nullptr);
 
     void configure_stimulus(const fvm_stimulus_config&);
 
@@ -188,6 +202,10 @@ struct ARB_ARBOR_API shared_state {
     // Update stimulus state and add current contributions.
     void add_stimulus_current();
 
+    // Integrate by matrix solve.
+    void integrate_voltage();
+    void integrate_diffusion();
+
     // Return minimum and maximum time value [ms] across cells.
     std::pair<fvm_value_type, fvm_value_type> time_bounds() const;
 
diff --git a/arbor/backends/multicore/matrix_state.hpp b/arbor/backends/multicore/cable_solver.hpp
similarity index 73%
rename from arbor/backends/multicore/matrix_state.hpp
rename to arbor/backends/multicore/cable_solver.hpp
index 1831bfa07c221c26a28b8e71104262e4add0faa2..53d6f7898a00cf9985c631a92c21591a09534c3f 100644
--- a/arbor/backends/multicore/matrix_state.hpp
+++ b/arbor/backends/multicore/cable_solver.hpp
@@ -10,16 +10,13 @@
 namespace arb {
 namespace multicore {
 
-template <typename T, typename I>
-struct matrix_state {
-public:
-    using value_type = T;
-    using index_type = I;
-
-    using array = padded_vector<value_type>;
+struct cable_solver {
+    using value_type = arb_value_type;
+    using index_type = arb_index_type;
+    using array      = padded_vector<value_type>;
     using const_view = const array&;
+    using iarray     = padded_vector<index_type>;
 
-    using iarray = padded_vector<index_type>;
     iarray parent_index;
     iarray cell_cv_divs;
 
@@ -36,9 +33,14 @@ public:
     // the invariant part of the matrix diagonal
     array invariant_d;         // [Î¼S]
 
-    matrix_state() = default;
+    cable_solver() = default;
+    cable_solver(const cable_solver&) = default;
+    cable_solver(cable_solver&&) = default;
+
+    cable_solver& operator=(const cable_solver&) = default;
+    cable_solver& operator=(cable_solver&&) = default;
 
-    matrix_state(const std::vector<index_type>& p,
+    cable_solver(const std::vector<index_type>& p,
                  const std::vector<index_type>& cell_cv_divs,
                  const std::vector<value_type>& cap,
                  const std::vector<value_type>& cond,
@@ -52,19 +54,20 @@ public:
         cv_area(area.begin(), area.end()),
         cell_to_intdom(cell_to_intdom.begin(), cell_to_intdom.end())
     {
+        // Sanity check
         arb_assert(cap.size() == size());
         arb_assert(cond.size() == size());
         arb_assert(cell_cv_divs.back() == (index_type)size());
 
-        auto n = size();
+        // Build invariant parts
+        const auto n = size();
         invariant_d = array(n, 0);
         if (n >= 1) { // skip empty matrix, ie cell with empty morphology
             for (auto i: util::make_span(1u, n)) {
-                auto gij = face_conductance[i];
-
+                const auto gij = face_conductance[i];
                 u[i] = -gij;
                 invariant_d[i] += gij;
-                if (p[i]!=-1) {
+                if (p[i]!=-1) { // root
                     invariant_d[p[i]] += gij;
                 }
             }
@@ -77,7 +80,6 @@ public:
         return rhs;
     }
 
-
     // Assemble the matrix
     // Afterwards the diagonal and RHS will have been set given dt, voltage and current.
     //   dt_intdom       [ms]      (per integration domain)
@@ -85,20 +87,16 @@ public:
     //   current density [A.m^-2]  (per control volume)
     //   conductivity    [kS.m^-2] (per control volume)
     void assemble(const_view dt_intdom, const_view voltage, const_view current, const_view conductivity) {
-        auto cell_cv_part = util::partition_view(cell_cv_divs);
+        const auto cell_cv_part = util::partition_view(cell_cv_divs);
         const index_type ncells = cell_cv_part.size();
-
         // loop over submatrices
         for (auto m: util::make_span(0, ncells)) {
-            auto dt = dt_intdom[cell_to_intdom[m]];
-
+            const auto dt = dt_intdom[cell_to_intdom[m]];
             if (dt>0) {
-                value_type oodt_factor = 1e-3/dt; // [1/Âµs]
+                const value_type oodt_factor = 1e-3/dt; // [1/Âµs]
                 for (auto i: util::make_span(cell_cv_part[m])) {
-                    auto area_factor = 1e-3*cv_area[i]; // [1e-9Â·mÂ²]
-
-                    auto gi = oodt_factor*cv_capacitance[i] + area_factor*conductivity[i]; // [Î¼S]
-
+                    const auto area_factor = 1e-3*cv_area[i]; // [1e-9Â·mÂ²]
+                    const auto gi = oodt_factor*cv_capacitance[i] + area_factor*conductivity[i]; // [Î¼S]
                     d[i] = gi + invariant_d[i];
                     // convert current to units nA
                     rhs[i] = gi*voltage[i] - area_factor*current[i];
@@ -115,19 +113,17 @@ public:
 
     void solve() {
         // loop over submatrices
-        for (auto cv_span: util::partition_view(cell_cv_divs)) {
-            auto first = cv_span.first;
-            auto last = cv_span.second; // one past the end
+        for (const auto& [first, last]: util::partition_view(cell_cv_divs)) {
             if (first >= last) continue; // skip cell with no CVs
             if (d[first]!=0) {
                 // backward sweep
                 for(auto i=last-1; i>first; --i) {
-                    auto factor = u[i] / d[i];
+                    const auto factor = u[i] / d[i];
                     d[parent_index[i]]   -= factor * u[i];
                     rhs[parent_index[i]] -= factor * rhs[i];
                 }
+                // solve root
                 rhs[first] /= d[first];
-
                 // forward sweep
                 for(auto i=first+1; i<last; ++i) {
                     rhs[i] -= u[i] * rhs[parent_index[i]];
@@ -143,11 +139,8 @@ public:
         memory::copy(rhs, to);
     }
 
-private:
-
-    std::size_t size() const {
-        return parent_index.size();
-    }
+    std::size_t num_cells() const { return cell_cv_divs.size() - 1; }
+    std::size_t size() const { return parent_index.size(); }
 };
 
 } // namespace multicore
diff --git a/arbor/backends/multicore/diffusion_solver.hpp b/arbor/backends/multicore/diffusion_solver.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..112c646cbe0865645176713519db6c25892a293c
--- /dev/null
+++ b/arbor/backends/multicore/diffusion_solver.hpp
@@ -0,0 +1,145 @@
+#pragma once
+
+#include <util/partition.hpp>
+#include <util/span.hpp>
+
+#include <memory/memory.hpp>
+
+#include "multicore_common.hpp"
+
+namespace arb {
+namespace multicore {
+
+struct diffusion_solver {
+    using value_type = arb_value_type;
+    using index_type = arb_index_type;
+    using array      = padded_vector<value_type>;
+    using const_view = const array&;
+    using iarray     = padded_vector<index_type>;
+
+    iarray parent_index;
+    iarray cell_cv_divs;
+
+    array d;     // [Î¼S]
+    array u;     // [Î¼S]
+    array rhs;   // [nA]
+
+    array cv_area;             // [Î¼m^2]
+
+    iarray cell_to_intdom;
+
+    // the invariant part of the matrix diagonal
+    array invariant_d;         // [Î¼S]
+
+    diffusion_solver() = default;
+    diffusion_solver(const diffusion_solver&) = default;
+    diffusion_solver(diffusion_solver&&) = default;
+
+    diffusion_solver& operator=(const diffusion_solver&) = default;
+    diffusion_solver& operator=(diffusion_solver&&) = default;
+
+    diffusion_solver(const std::vector<index_type>& p,
+                     const std::vector<index_type>& cell_cv_divs,
+                     const std::vector<value_type>& diff,
+                     const std::vector<value_type>& area,
+                     const std::vector<index_type>& cell_to_intdom):
+        parent_index(p.begin(), p.end()),
+        cell_cv_divs(cell_cv_divs.begin(), cell_cv_divs.end()),
+        d(size(), 0), u(size(), 0), rhs(size()),
+        cv_area(area.begin(), area.end()),
+        cell_to_intdom(cell_to_intdom.begin(), cell_to_intdom.end())
+    {
+        // Sanity check
+        arb_assert(diff.size() == size());
+        arb_assert(cell_cv_divs.back() == (index_type)size());
+
+        // Build invariant parts
+        const auto n = size();
+        invariant_d = array(n, 0);
+        if (n >= 1) { // skip empty matrix, ie cell with empty morphology
+            for (auto i: util::make_span(1u, n)) {
+                auto gij = diff[i];
+                u[i]           =  -gij;
+                invariant_d[i] +=  gij;
+                // Also add to our parent, if present
+                if (auto pi = p[i]; pi != -1) invariant_d[pi] += gij;
+            }
+        }
+    }
+
+    const_view solution() const {
+        // In this back end the solution is a simple view of the rhs, which
+        // contains the solution after the matrix_solve is performed.
+        return rhs;
+    }
+
+    // Assemble the matrix
+    //   dt_intdom       [ms]      (per integration domain)
+    //   internal conc   [mM]      (per control volume)
+    //   voltage         [mV]      (per control volume)
+    //   current density [A.m^-2]  (per control volume and species)
+    //   diffusivity     [???]     (per control volume)
+    //   charge          [e]
+    void assemble(const_view dt_intdom, const_view concentration, const_view voltage, const_view current, const_view conductivity, fvm_value_type q) {
+        auto cell_cv_part = util::partition_view(cell_cv_divs);
+        index_type ncells = cell_cv_part.size();
+        // loop over submatrices
+        for (auto m: util::make_span(0, ncells)) {
+            auto dt = dt_intdom[cell_to_intdom[m]];
+            if (dt>0) {
+                value_type _1_dt = 1e-3/dt;     // 1/Âµs
+                for (auto i: util::make_span(cell_cv_part[m])) {
+                    auto u = voltage[i];        // mV
+                    auto g = conductivity[i];   // ÂµS
+                    auto J = current[i];        // A/m^2
+                    auto A = 1e-3*cv_area[i];   // 1e-9Â·mÂ²
+                    auto X = concentration[i];  // mM
+                    // conversion from current density to concentration change
+                    // using Faraday's constant
+                    auto F = A/(q*96.485332);
+                    d[i]   = _1_dt   + F*g + invariant_d[i];
+                    rhs[i] = _1_dt*X + F*(u*g - J);
+                }
+            }
+            else {
+                for (auto i: util::make_span(cell_cv_part[m])) {
+                    d[i]   = 0;
+                    rhs[i] = concentration[i];
+                }
+            }
+        }
+    }
+
+    void solve() {
+        // loop over submatrices
+        for (const auto& [first, last]: util::partition_view(cell_cv_divs)) {
+            if (first >= last) continue; // skip cell with no CVs
+            if (d[first]!=0) {
+                // backward sweep
+                for(auto i=last-1; i>first; --i) {
+                    auto pi  = parent_index[i];
+                    auto fac = -u[i] / d[i];
+                    d[pi]    = std::fma(fac, u[i],   d[pi]);
+                    rhs[pi]  = std::fma(fac, rhs[i], rhs[pi]);
+                }
+                // solve root
+                rhs[first] /= d[first];
+                // forward sweep
+                for(auto i=first+1; i<last; ++i) {
+                    rhs[i] = std::fma(-u[i], rhs[parent_index[i]], rhs[i])/d[i];
+                }
+            }
+        }
+    }
+
+    template<typename VTo>
+    void solve(VTo& to) {
+        solve();
+        memory::copy(rhs, to);
+    }
+
+    std::size_t size() const { return parent_index.size(); }
+};
+
+} // namespace multicore
+} // namespace arb
diff --git a/arbor/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp
index a9064357057f01a21eb863639e3fe16e3999247f..fb0e17cbb4ee371526ffe6210370e36508201129 100644
--- a/arbor/backends/multicore/fvm.hpp
+++ b/arbor/backends/multicore/fvm.hpp
@@ -6,10 +6,11 @@
 #include <arbor/mechanism.hpp>
 
 #include "backends/event.hpp"
-#include "backends/multicore/matrix_state.hpp"
 #include "backends/multicore/multi_event_stream.hpp"
 #include "backends/multicore/multicore_common.hpp"
 #include "backends/multicore/shared_state.hpp"
+#include "backends/multicore/diffusion_solver.hpp"
+#include "backends/multicore/cable_solver.hpp"
 #include "backends/multicore/threshold_watcher.hpp"
 #include "execution_context.hpp"
 #include "util/padded_alloc.hpp"
@@ -40,14 +41,13 @@ struct backend {
         return util::range_pointer_view(v);
     }
 
-    using matrix_state = arb::multicore::matrix_state<value_type, index_type>;
-    using threshold_watcher = arb::multicore::threshold_watcher;
-
+    using cable_solver             = arb::multicore::cable_solver;
+    using diffusion_solver         = arb::multicore::diffusion_solver;
+    using threshold_watcher        = arb::multicore::threshold_watcher;
     using deliverable_event_stream = arb::multicore::deliverable_event_stream;
-    using sample_event_stream = arb::multicore::sample_event_stream;
-
-    using shared_state = arb::multicore::shared_state;
-    using ion_state = arb::multicore::ion_state;
+    using sample_event_stream      = arb::multicore::sample_event_stream;
+    using shared_state             = arb::multicore::shared_state;
+    using ion_state                = arb::multicore::ion_state;
 
     static threshold_watcher voltage_watcher(
         shared_state& state,
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index f6c0b205714fa4cfacd7cabe575bd7373d04cac2..ccc1a8024f02aaf9024af019e8e27361a4d45da3 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -56,8 +56,8 @@ using pad = util::padded_allocator<>;
 ion_state::ion_state(
     int charge,
     const fvm_ion_config& ion_data,
-    unsigned align
-):
+    unsigned align,
+    solver_ptr ptr):
     alignment(min_alignment(align)),
     write_eX_(ion_data.revpot_written),
     write_Xo_(ion_data.econc_written),
@@ -66,14 +66,16 @@ ion_state::ion_state(
     iX_(ion_data.cv.size(), NAN, pad(alignment)),
     eX_(ion_data.init_revpot.begin(), ion_data.init_revpot.end(), pad(alignment)),
     Xi_(ion_data.init_iconc.begin(), ion_data.init_iconc.end(), pad(alignment)),
+    Xd_(ion_data.reset_iconc.begin(), ion_data.reset_iconc.end(), pad(alignment)),
     Xo_(ion_data.init_econc.begin(), ion_data.init_econc.end(), pad(alignment)),
+    gX_(ion_data.cv.size(), NAN, pad(alignment)),
     init_Xi_(ion_data.init_iconc.begin(), ion_data.init_iconc.end(), pad(alignment)),
     init_Xo_(ion_data.init_econc.begin(), ion_data.init_econc.end(), pad(alignment)),
     reset_Xi_(ion_data.reset_iconc.begin(), ion_data.reset_iconc.end(), pad(alignment)),
     reset_Xo_(ion_data.reset_econc.begin(), ion_data.reset_econc.end(), pad(alignment)),
     init_eX_(ion_data.init_revpot.begin(), ion_data.init_revpot.end(), pad(alignment)),
-    charge(1u, charge, pad(alignment))
-{
+    charge(1u, charge, pad(alignment)),
+    solver(std::move(ptr)) {
     arb_assert(node_index_.size()==init_Xi_.size());
     arb_assert(node_index_.size()==init_Xo_.size());
     arb_assert(node_index_.size()==eX_.size());
@@ -81,16 +83,19 @@ ion_state::ion_state(
 }
 
 void ion_state::init_concentration() {
+    // NB. not resetting Xd here, it's controlled via the solver.
     if (write_Xi_) std::copy(init_Xi_.begin(), init_Xi_.end(), Xi_.begin());
     if (write_Xo_) std::copy(init_Xo_.begin(), init_Xo_.end(), Xo_.begin());
 }
 
 void ion_state::zero_current() {
+    util::fill(gX_, 0);
     util::fill(iX_, 0);
 }
 
 void ion_state::reset() {
     zero_current();
+    std::copy(reset_Xi_.begin(), reset_Xi_.end(), Xd_.begin());
     if (write_Xi_) std::copy(reset_Xi_.begin(), reset_Xi_.end(), Xi_.begin());
     if (write_Xo_) std::copy(reset_Xo_.begin(), reset_Xo_.end(), Xo_.begin());
     if (write_eX_) std::copy(init_eX_.begin(), init_eX_.end(), eX_.begin());
@@ -239,13 +244,33 @@ shared_state::shared_state(
     }
 }
 
+void shared_state::integrate_voltage() {
+    solver.assemble(dt_intdom, voltage, current_density, conductivity);
+    solver.solve(voltage);
+}
+
+void shared_state::integrate_diffusion() {
+    for (auto& [ion, data]: ion_data) {
+        if (data.solver) {
+            data.solver->assemble(dt_intdom,
+                                  data.Xd_,
+                                  voltage,
+                                  data.iX_,
+                                  data.gX_,
+                                  data.charge[0]);
+            data.solver->solve(data.Xd_);
+        }
+    }
+}
+
 void shared_state::add_ion(
     const std::string& ion_name,
     int charge,
-    const fvm_ion_config& ion_info) {
+    const fvm_ion_config& ion_info,
+    ion_state::solver_ptr ptr) {
     ion_data.emplace(std::piecewise_construct,
-        std::forward_as_tuple(ion_name),
-        std::forward_as_tuple(charge, ion_info, alignment));
+                     std::forward_as_tuple(ion_name),
+                     std::forward_as_tuple(charge, ion_info, alignment, std::move(ptr)));
 }
 
 void shared_state::configure_stimulus(const fvm_stimulus_config& stims) {
@@ -503,9 +528,18 @@ void shared_state::instantiate(arb::mechanism& m, unsigned id, const mechanism_o
     for (auto idx: make_span(m.mech_.n_ions)) {
         auto ion = m.mech_.ions[idx].name;
         auto ion_binding = value_by_key(overrides.ion_rebind, ion).value_or(ion);
-        ion_state* oion = ptr_by_key(ion_data, ion_binding);
+        auto* oion = ptr_by_key(ion_data, ion_binding);
         if (!oion) throw arbor_internal_error(util::pprintf("multicore/mechanism: mechanism holds ion '{}' with no corresponding shared state", ion));
-        m.ppack_.ion_states[idx] = { oion->iX_.data(), oion->eX_.data(), oion->Xi_.data(), oion->Xo_.data(), oion->charge.data() };
+
+        auto& ion_state = m.ppack_.ion_states[idx];
+        ion_state = {0};
+        ion_state.current_density         = oion->iX_.data();
+        ion_state.reversal_potential      = oion->eX_.data();
+        ion_state.internal_concentration  = oion->Xi_.data();
+        ion_state.external_concentration  = oion->Xo_.data();
+        ion_state.diffusive_concentration = oion->Xd_.data();
+        ion_state.ionic_charge            = oion->charge.data();
+        ion_state.conductivity            = oion->gX_.data();
     }
 
     // Initialize state and parameter vectors with default values.
diff --git a/arbor/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp
index 8aa8d4155141efb78fae60a14d660eddc6565cd5..149cc152da4fc8875504ddcd62d7a3798896c686 100644
--- a/arbor/backends/multicore/shared_state.hpp
+++ b/arbor/backends/multicore/shared_state.hpp
@@ -17,12 +17,13 @@
 #include "util/padded_alloc.hpp"
 #include "util/rangeutil.hpp"
 
-#include "matrix_state.hpp"
 #include "multi_event_stream.hpp"
 #include "threshold_watcher.hpp"
 #include "fvm_layout.hpp"
 #include "multicore_common.hpp"
 #include "partition_by_constraint.hpp"
+#include "backends/multicore/cable_solver.hpp"
+#include "backends/multicore/diffusion_solver.hpp"
 
 namespace arb {
 namespace multicore {
@@ -39,6 +40,9 @@ namespace multicore {
  *     Xo_     cao              external calcium concentration
  */
 struct ARB_ARBOR_API ion_state {
+    using solver_type = diffusion_solver;
+    using solver_ptr  = std::unique_ptr<solver_type>;
+
     unsigned alignment = 1; // Alignment and padding multiple.
 
     bool write_eX_;          // is eX written?
@@ -46,10 +50,12 @@ struct ARB_ARBOR_API ion_state {
     bool write_Xi_;          // is Xi written?
 
     iarray node_index_;     // Instance to CV map.
-    array iX_;              // (A/mÂ²) current density
-    array eX_;              // (mV) reversal potential
-    array Xi_;              // (mM) internal concentration
-    array Xo_;              // (mM) external concentration
+    array iX_;              // (A/mÂ²)  current density
+    array eX_;              // (mV)    reversal potential
+    array Xi_;              // (mM)    internal concentration
+    array Xd_;              // (mM)    diffusive internal concentration
+    array Xo_;              // (mM)    external concentration
+    array gX_;              // (kS/mÂ²) per-species conductivity
 
     array init_Xi_;         // (mM) area-weighted initial internal concentration
     array init_Xo_;         // (mM) area-weighted initial external concentration
@@ -59,12 +65,15 @@ struct ARB_ARBOR_API ion_state {
 
     array charge;           // charge of ionic species (global value, length 1)
 
+    solver_ptr solver = nullptr;
+
     ion_state() = default;
 
     ion_state(
         int charge,
         const fvm_ion_config& ion_data,
-        unsigned align
+        unsigned align,
+        solver_ptr ptr
     );
 
     // Set ion concentrations to weighted proportion of default concentrations.
@@ -120,6 +129,8 @@ struct ARB_ARBOR_API shared_state {
         std::vector<arb_ion_state>   ion_states_;
     };
 
+    cable_solver solver;
+
     unsigned alignment = 1;   // Alignment and padding multiple.
     util::padded_allocator<> alloc;  // Allocator with corresponging alignment/padding.
 
@@ -175,7 +186,8 @@ struct ARB_ARBOR_API shared_state {
     void add_ion(
         const std::string& ion_name,
         int charge,
-        const fvm_ion_config& ion_data);
+        const fvm_ion_config& ion_data,
+        ion_state::solver_ptr solver=nullptr);
 
     void configure_stimulus(const fvm_stimulus_config&);
 
@@ -194,6 +206,10 @@ struct ARB_ARBOR_API shared_state {
     // Update stimulus state and add current contributions.
     void add_stimulus_current();
 
+    // Integrate by matrix solve.
+    void integrate_voltage();
+    void integrate_diffusion();
+
     // Return minimum and maximum time value [ms] across cells.
     std::pair<fvm_value_type, fvm_value_type> time_bounds() const;
 
diff --git a/arbor/cable_cell.cpp b/arbor/cable_cell.cpp
index 704b28cc947895d63c9444455ad72384096ee407..a8a3bcf6ef7cb2d848527eabdf184fff77f7d6c8 100644
--- a/arbor/cable_cell.cpp
+++ b/arbor/cable_cell.cpp
@@ -146,6 +146,10 @@ struct cable_cell_impl {
         return region_map.get<init_int_concentration>()[init.ion];
     }
 
+    mcable_map<ion_diffusivity>& get_region_map(const ion_diffusivity& init) {
+        return region_map.get<ion_diffusivity>()[init.ion];
+    }
+
     mcable_map<init_ext_concentration>& get_region_map(const init_ext_concentration& init) {
         return region_map.get<init_ext_concentration>()[init.ion];
     }
diff --git a/arbor/cable_cell_param.cpp b/arbor/cable_cell_param.cpp
index 36b17d9d70962d8baba60b65c7765f754d67d6f5..1c2a91e4a5a8aaeeea4042e5bb107941e3161843 100644
--- a/arbor/cable_cell_param.cpp
+++ b/arbor/cable_cell_param.cpp
@@ -37,15 +37,16 @@ ARB_ARBOR_API void check_global_properties(const cable_cell_global_properties& G
         }
     }
 
-    for (const auto& kv: param.ion_data) {
-        auto& ion = kv.first;
-        const cable_cell_ion_data& data = kv.second;
+    for (const auto& [ion, data]: param.ion_data) {
         if (!data.init_int_concentration) {
             throw cable_cell_error("missing init_int_concentration for ion "+ion);
         }
         if (!data.init_ext_concentration) {
             throw cable_cell_error("missing init_ext_concentration for ion "+ion);
         }
+        if (data.diffusivity && *data.diffusivity < 0.0) {
+            throw cable_cell_error("negative diffusivity for ion "+ion);
+        }
         if (!data.init_reversal_potential && !param.reversal_potential_method.count(ion)) {
             throw cable_cell_error("missing init_reversal_potential or reversal_potential_method for ion "+ion);
         }
@@ -62,11 +63,10 @@ cable_cell_parameter_set neuron_parameter_defaults = {
     // membrane capacitance [F/mÂ²]
     0.01,
     // ion defaults:
-    // internal concentration [mM], external concentration [mM], reversal potential [mV]
-    {
-        {"na", {10.0,  140.0,  115 - 65.}},
-        {"k",  {54.4,    2.5,  -12 - 65.}},
-        {"ca", {5e-5,    2.0,  12.5*std::log(2.0/5e-5)}}
+    // internal concentration [mM], external concentration [mM], reversal potential [mV], diffusivity [m^2/s]
+    {{"na", {10.0,  140.0,  115 - 65.,               0.0}},
+     {"k",  {54.4,    2.5,  -12 - 65.,               0.0}},
+     {"ca", {5e-5,    2.0,  12.5*std::log(2.0/5e-5), 0.0}}
     },
 };
 
@@ -96,6 +96,9 @@ std::vector<defaultable> cable_cell_parameter_set::serialize() const {
         if (data.init_reversal_potential) {
             D.push_back(init_reversal_potential{name, *data.init_reversal_potential});
         }
+        if (data.init_reversal_potential) {
+            D.push_back(ion_diffusivity{name, *data.diffusivity});
+        }
     }
 
     for (const auto& [name, mech]: reversal_potential_method) {
@@ -148,6 +151,9 @@ void decor::set_default(defaultable what) {
                 else if constexpr (std::is_same_v<cv_policy, T>) {
                     defaults_.discretization = std::forward<cv_policy>(p);
                 }
+                else if constexpr (std::is_same_v<ion_diffusivity, T>) {
+                    defaults_.ion_data[p.ion].diffusivity = p.value;
+                }
             },
             what);
 }
diff --git a/arbor/fvm_layout.cpp b/arbor/fvm_layout.cpp
index 5f7dd6bc0f02aaade3de42bf6816a9c26aa71713..e862adad778e61e41e939f6bc8d0a19f8adf1e85 100644
--- a/arbor/fvm_layout.cpp
+++ b/arbor/fvm_layout.cpp
@@ -3,6 +3,7 @@
 #include <set>
 #include <stdexcept>
 #include <unordered_set>
+#include <unordered_map>
 #include <vector>
 
 #include <arbor/arbexcept.hpp>
@@ -12,6 +13,8 @@
 #include <arbor/morph/mprovider.hpp>
 #include <arbor/morph/morphology.hpp>
 
+#include <iostream>
+
 #include "fvm_layout.hpp"
 #include "threading/threading.hpp"
 #include "util/maputil.hpp"
@@ -22,6 +25,7 @@
 #include "util/rangeutil.hpp"
 #include "util/transform.hpp"
 #include "util/unique.hpp"
+#include "util/strprintf.hpp"
 
 namespace arb {
 
@@ -209,6 +213,23 @@ ARB_ARBOR_API fvm_cv_discretization& append(fvm_cv_discretization& dczn, const f
 
     append(dczn.geometry, right.geometry);
 
+    // Those in L and R: merge
+    for (auto& [ion, data]: dczn.diffusive_ions) {
+        const auto& rhs = right.diffusive_ions.find(ion);
+        if (rhs != right.diffusive_ions.end()) {
+            append(data.axial_inv_diffusivity, rhs->second.axial_inv_diffusivity);
+            append(data.face_diffusivity, rhs->second.face_diffusivity);
+        }
+    }
+    // Those only in R: add to L
+    for (auto& [ion, data]: right.diffusive_ions) {
+        const auto& lhs = dczn.diffusive_ions.find(ion);
+        if (lhs == dczn.diffusive_ions.end()) {
+            dczn.diffusive_ions[ion].axial_inv_diffusivity = data.axial_inv_diffusivity;
+            dczn.diffusive_ions[ion].face_diffusivity      = data.face_diffusivity;
+        }
+    }
+
     append(dczn.face_conductance, right.face_conductance);
     append(dczn.cv_area, right.cv_area);
     append(dczn.cv_capacitance, right.cv_capacitance);
@@ -253,6 +274,63 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
     const auto& capacitance = assignments.get<membrane_capacitance>();
     const auto& potential   = assignments.get<init_membrane_potential>();
     const auto& temperature = assignments.get<temperature_K>();
+    const auto& diffusivity = assignments.get<ion_diffusivity>();
+
+    // Set up for ion diffusivity
+    std::unordered_map<std::string, mcable_map<double>> inverse_diffusivity;
+    std::unordered_map<std::string, fvm_diffusion_info> diffusive_ions;
+
+    // Collect all eglible ions: those where any cable has finite diffusivity
+    for (const auto& [ion, data]: global_dflt.ion_data) {
+        if (data.diffusivity.value_or(0.0) != 0.0) {
+            diffusive_ions[ion] = {};
+        }
+    }
+    for (const auto& [ion, data]: dflt.ion_data) {
+        if (data.diffusivity.value_or(0.0) != 0.0) {
+            diffusive_ions[ion] = {};
+        }
+    }
+    for (const auto& [ion, data]: diffusivity) {
+        auto diffusive = std::any_of(data.begin(),
+                                     data.end(),
+                                     [](const auto& kv) { return kv.second.value != 0.0; });
+        if (diffusive) {
+            diffusive_ions[ion] = {};
+        }
+    }
+
+    // Remap diffusivity to resistivity
+    for (auto& [ion, data]: diffusive_ions) {
+        auto& id_map = inverse_diffusivity[ion];
+        // Treat specific assignments
+        if (auto map = diffusivity.find(ion); map != diffusivity.end()) {
+            for (const auto& [k, v]: map->second) {
+                if (v.value <= 0.0) {
+                    throw cable_cell_error{util::pprintf("Illegal diffusivity '{}' for ion '{}' at '{}'.", v.value, ion, k)};
+                }
+                id_map.insert(k, 1.0/v.value);
+            }
+        }
+        // Fetch defaults, either global or per cell
+        auto gd = *(value_by_key(dflt.ion_data,        ion).value().diffusivity
+                  | value_by_key(global_dflt.ion_data, ion).value().diffusivity);
+        if (gd <= 0.0) {
+            throw cable_cell_error{util::pprintf("Illegal global diffusivity '{}' for ion '{}'.", gd, ion)};
+        }
+
+        // Write inverse diffusivity / diffuse resistivity map
+        auto& id = data.axial_inv_diffusivity;
+        id.resize(1);
+        msize_t n_branch = D.geometry.n_branch(0);
+        id.reserve(n_branch);
+        for (msize_t i = 0; i<n_branch; ++i) {
+            auto pw = pw_over_cable(id_map, mcable{i, 0., 1.}, 1.0/gd);
+            id[0].push_back(pw);
+        }
+        // Prepare conductivity map
+        data.face_diffusivity.resize(n_cv);
+    }
 
     D.axial_resistivity.resize(1);
     msize_t n_branch = D.geometry.n_branch(0);
@@ -265,7 +343,7 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
     const auto& embedding = cell.embedding();
     for (auto i: count_along(D.geometry.cv_parent)) {
         auto cv_cables = D.geometry.cables(i);
-        // Computing face_conductance:
+        // Computing face_conductance and face_diffusivity
         //
         // Flux between adjacent CVs is computed as if there were no membrane currents, and with the CV voltage
         // values taken to be exact at a reference point in each CV:
@@ -273,6 +351,10 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
         //     * If the CV is branched, the reference point is taken to be closest branch point to
         //       the interface between the two CVs.
         D.face_conductance[i] = 0;
+        for (auto& [ion, info]: diffusive_ions) {
+            info.face_diffusivity[i] = 0.0;
+        }
+
         fvm_index_type p = D.geometry.cv_parent[i];
         if (p!=-1) {
             auto parent_cables = D.geometry.cables(p);
@@ -296,6 +378,10 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
             mcable span{bid, parent_refpt, cv_refpt};
             double resistance = embedding.integrate_ixa(span, D.axial_resistivity[0].at(bid));
             D.face_conductance[i] = 100/resistance; // 100 scales to ÂµS.
+            for (auto& [ion, info]: diffusive_ions) {
+                double resistance = embedding.integrate_ixa(span, info.axial_inv_diffusivity[0].at(bid));
+                info.face_diffusivity[i] = 1.0/resistance; // scale to m^2/s
+            }
         }
 
         D.cv_area[i] = 0;
@@ -313,8 +399,13 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
         }
 
         if (D.cv_area[i]>0) {
-            D.init_membrane_potential[i] /= D.cv_area[i];
-            D.temperature_K[i] /= D.cv_area[i];
+            auto A = D.cv_area[i];
+            D.init_membrane_potential[i] /= A;
+            D.temperature_K[i] /= A;
+
+            for (auto& [ion, info]: diffusive_ions) {
+                info.face_diffusivity[i] /= A;
+            }
             // If parent is trivial, and there is no grandparent, then we can use values from this CV
             // to get initial values for the parent. (The other case, when there is a grandparent, is
             // caught below.)
@@ -333,6 +424,8 @@ ARB_ARBOR_API fvm_cv_discretization fvm_cv_discretize(const cable_cell& cell, co
             D.diam_um[i] = D.cv_area[i]/(cv_length*math::pi<double>);
         }
     }
+
+    D.diffusive_ions = std::move(diffusive_ions);
     return D;
 }
 
@@ -595,6 +688,8 @@ fvm_mechanism_data& append(fvm_mechanism_data& left, const fvm_mechanism_data& r
         append(L.reset_iconc, R.reset_iconc);
         append(L.reset_econc, R.reset_econc);
         append(L.init_revpot, R.init_revpot);
+        append(L.face_diffusivity, R.face_diffusivity);
+        L.is_diffusive |= R.is_diffusive;
         L.econc_written  |= R.econc_written;
         L.iconc_written  |= R.iconc_written;
         L.revpot_written |= R.revpot_written;
@@ -745,7 +840,7 @@ fvm_mechanism_data fvm_build_mechanism_data(
     const auto& assignments = cell.region_assignments();
 
     // Verify mechanism ion usage, parameter values.
-    auto verify_mechanism = [&gprop](const mechanism_info& info, const mechanism_desc& desc) {
+    auto verify_mechanism = [&gprop, &D](const mechanism_info& info, const mechanism_desc& desc) {
         const auto& global_ions = gprop.ion_species;
 
         for (const auto& pv: desc.values()) {
@@ -757,30 +852,40 @@ fvm_mechanism_data fvm_build_mechanism_data(
             }
         }
 
-        for (const auto& ion: info.ions) {
-            const auto& ion_name = ion.first;
-            const auto& ion_dep = ion.second;
-
-            if (!global_ions.count(ion_name)) {
+        for (const auto& [ion, dep]: info.ions) {
+            if (!global_ions.count(ion)) {
                 throw cable_cell_error(
-                    "mechanism "+desc.name()+" uses ion "+ion_name+ " which is missing in global properties");
+                    "mechanism "+desc.name()+" uses ion "+ion+ " which is missing in global properties");
             }
 
-            if (ion_dep.verify_ion_charge) {
-                if (ion_dep.expected_ion_charge!=global_ions.at(ion_name)) {
+            if (dep.verify_ion_charge) {
+                if (dep.expected_ion_charge!=global_ions.at(ion)) {
                     throw cable_cell_error(
-                        "mechanism "+desc.name()+" uses ion "+ion_name+ " expecting a different valence");
+                        "mechanism "+desc.name()+" uses ion "+ion+ " expecting a different valence");
                 }
             }
 
-            if (ion_dep.write_reversal_potential && (ion_dep.write_concentration_int || ion_dep.write_concentration_ext)) {
+            if (dep.write_reversal_potential && (dep.write_concentration_int || dep.write_concentration_ext)) {
                 throw cable_cell_error("mechanism "+desc.name()+" writes both reversal potential and concentration");
             }
+
+            auto is_diffusive = D.diffusive_ions.count(ion);
+            if (dep.access_concentration_diff && !is_diffusive) {
+                throw illegal_diffusive_mechanism(desc.name(), ion);
+            }
         }
     };
 
     // Track ion usage of mechanisms so that ions are only instantiated where required.
     std::unordered_map<std::string, std::vector<index_type>> ion_support;
+
+    // add diffusive ions to support: If diffusive, it's everywhere.
+    for (const auto& [ion, data]: D.diffusive_ions) {
+        auto& s = ion_support[ion];
+        s.resize(D.geometry.size());
+        std::iota(s.begin(), s.end(), 0);
+    }
+
     auto update_ion_support = [&ion_support](const mechanism_info& info, const std::vector<index_type>& cvs) {
         arb_assert(util::is_sorted(cvs));
 
@@ -1193,9 +1298,9 @@ fvm_mechanism_data fvm_build_mechanism_data(
         auto n_cv = config.cv.size();
         config.init_iconc.resize(n_cv);
         config.init_econc.resize(n_cv);
+        config.init_revpot.resize(n_cv);
         config.reset_iconc.resize(n_cv);
         config.reset_econc.resize(n_cv);
-        config.init_revpot.resize(n_cv);
 
         auto global_ion_data = value_by_key(global_dflt.ion_data, ion).value();
         auto dflt_iconc = global_ion_data.init_int_concentration.value();
@@ -1221,7 +1326,8 @@ fvm_mechanism_data fvm_build_mechanism_data(
             auto cv = config.cv[i];
             if (D.cv_area[cv]==0) continue;
 
-            for (mcable c: D.geometry.cables(cv)) {
+            const auto& cv_cables = D.geometry.cables(cv);
+            for (const mcable c: cv_cables) {
                 auto iconc = pw_over_cable(iconc_on_cable, c, dflt_iconc);
                 auto econc = pw_over_cable(econc_on_cable, c, dflt_econc);
                 auto rvpot = pw_over_cable(rvpot_on_cable, c, dflt_rvpot);
@@ -1237,12 +1343,18 @@ fvm_mechanism_data fvm_build_mechanism_data(
                 config.init_econc[i] += embedding.integrate_area(c.branch, econc_masked);
             }
 
+            // Scale all by area
             double oo_cv_area = 1./D.cv_area[cv];
             config.reset_iconc[i] *= oo_cv_area;
             config.reset_econc[i] *= oo_cv_area;
             config.init_revpot[i] *= oo_cv_area;
-            config.init_iconc[i] *= oo_cv_area;
-            config.init_econc[i] *= oo_cv_area;
+            config.init_iconc[i]  *= oo_cv_area;
+            config.init_econc[i]  *= oo_cv_area;
+        }
+
+        if (auto di = D.diffusive_ions.find(ion); di != D.diffusive_ions.end()) {
+            config.is_diffusive          = true;
+            config.face_diffusivity      = di->second.face_diffusivity;
         }
 
         config.econc_written  = write_xo.count(ion);
@@ -1325,7 +1437,6 @@ fvm_mechanism_data fvm_build_mechanism_data(
             }
         }
     }
-
     // Confirm that all ions written to by a revpot have a corresponding entry in a reversal_potential_method table.
     for (auto& kv: revpot_tbl) {
         if (!revpot_specified.count(kv.first)) {
diff --git a/arbor/fvm_layout.hpp b/arbor/fvm_layout.hpp
index 9306c78f3d4e240acaf15a20a22612770b03e0db..2c26a398c5eee500f0794dfdfc97fae5791ddf91 100644
--- a/arbor/fvm_layout.hpp
+++ b/arbor/fvm_layout.hpp
@@ -137,6 +137,12 @@ ARB_ARBOR_API cv_geometry& append(cv_geometry&, const cv_geometry&);
 // to the CV, or in the absence of any internal forks, is exact at the
 // midpoint of an unbranched CV.
 
+struct fvm_diffusion_info {
+    using value_type = fvm_value_type;
+    std::vector<value_type> face_diffusivity;
+    std::vector<std::vector<pw_constant_fn>> axial_inv_diffusivity;
+};
+    
 struct fvm_cv_discretization {
     using size_type = fvm_size_type;
     using index_type = fvm_index_type;
@@ -158,6 +164,9 @@ struct fvm_cv_discretization {
 
     // For each cell, one piece-wise constant value per branch.
     std::vector<std::vector<pw_constant_fn>> axial_resistivity; // [Î©Â·cm]
+
+    // For each diffusive ion species, their properties
+    std::unordered_map<std::string, fvm_diffusion_info> diffusive_ions;
 };
 
 // Combine two fvm_cv_geometry groups in-place.
@@ -242,6 +251,10 @@ struct fvm_ion_config {
 
     // Ion-specific (initial) reversal potential per CV.
     std::vector<value_type> init_revpot;
+
+    // diffusivity
+    bool is_diffusive = false;
+    std::vector<value_type> face_diffusivity;
 };
 
 struct fvm_stimulus_config {
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index 400df4d827edfc280958c7e410fe8e43e150c543..8621ef7eec14d455ba0ffed83f6ffb65db951121 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -27,7 +27,6 @@
 #include "fvm_layout.hpp"
 #include "fvm_lowered_cell.hpp"
 #include "label_resolution.hpp"
-#include "matrix.hpp"
 #include "profile/profiler_macro.hpp"
 #include "sampler_map.hpp"
 #include "util/maputil.hpp"
@@ -77,10 +76,10 @@ public:
 
 private:
     // Host or GPU-side back-end dependent storage.
-    using array = typename backend::array;
-    using shared_state = typename backend::shared_state;
+    using array               = typename backend::array;
+    using shared_state        = typename backend::shared_state;
     using sample_event_stream = typename backend::sample_event_stream;
-    using threshold_watcher = typename backend::threshold_watcher;
+    using threshold_watcher   = typename backend::threshold_watcher;
 
     execution_context context_;
 
@@ -90,7 +89,6 @@ private:
     sample_event_stream sample_events_;
     array sample_time_;
     array sample_value_;
-    matrix<backend> matrix_;
     threshold_watcher threshold_watcher_;
 
     value_type tmin_ = 0;
@@ -273,13 +271,13 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         sample_events_.drop_marked_events();
         PL();
 
-        // Integrate voltage by matrix solve.
-
-        PE(advance:integrate:matrix:build);
-        matrix_.assemble(state_->dt_intdom, state_->voltage, state_->current_density, state_->conductivity);
+        // Integrate voltage / solve cable eq
+        PE(advance:integrate:voltage);
+        state_->integrate_voltage();
         PL();
-        PE(advance:integrate:matrix:solve);
-        matrix_.solve(state_->voltage);
+        // Compute ionic diffusion effects
+        PE(advance:integrate:diffusion);
+        state_->integrate_diffusion();
         PL();
 
         // Integrate mechanism state.
@@ -458,8 +456,6 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
                    [&fvm_info](index_type i){ return fvm_info.cell_to_intdom[i]; });
 
     arb_assert(D.n_cell() == ncell);
-    matrix_ = matrix<backend>(D.geometry.cv_parent, D.geometry.cell_cv_divs,
-                              D.cv_capacitance, D.face_conductance, D.cv_area, fvm_info.cell_to_intdom);
     sample_events_ = sample_event_stream(nintdom);
 
     // Discretize and build gap junction info.
@@ -502,11 +498,26 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
                 D.init_membrane_potential, D.temperature_K, D.diam_um, std::move(src_to_spike),
                 data_alignment? data_alignment: 1u);
 
+    state_->solver =
+        {D.geometry.cv_parent, D.geometry.cell_cv_divs, D.cv_capacitance, D.face_conductance, D.cv_area, fvm_info.cell_to_intdom};
+
     // Instantiate mechanisms, ions, and stimuli.
+    auto mk_diff_solver = [&](const auto& fd) {
+        // TODO(TH) _all_ solvers should share their RO data, ie everything below D here.
+        return std::make_unique<typename backend::ion_state::solver_type>(D.geometry.cv_parent,
+                                                                          D.geometry.cell_cv_divs,
+                                                                          fd,
+                                                                          D.cv_area,
+                                                                          fvm_info.cell_to_intdom);
+    };
 
     for (const auto& [ion, data]: mech_data.ions) {
         if (auto charge = value_by_key(global_props.ion_species, ion)) {
-            state_->add_ion(ion, *charge, data);
+            if (data.is_diffusive) {
+                state_->add_ion(ion, *charge, data, mk_diff_solver(data.face_diffusivity));
+            } else {
+                state_->add_ion(ion, *charge, data, nullptr);
+            }
         }
         else {
             throw cable_cell_error("unrecognized ion '"+ion+"' in mechanism");
@@ -758,6 +769,8 @@ void fvm_lowered_cell_impl<Backend>::resolve_probe_address(
         cable_probe_ion_current_cell,
         cable_probe_ion_int_concentration,
         cable_probe_ion_int_concentration_cell,
+        cable_probe_ion_diff_concentration,
+        cable_probe_ion_diff_concentration_cell,
         cable_probe_ion_ext_concentration,
         cable_probe_ion_ext_concentration_cell>;
 
@@ -1086,6 +1099,16 @@ void resolve_probe(const cable_probe_ion_ext_concentration& p, probe_resolution_
     }
 }
 
+template <typename B>
+void resolve_probe(const cable_probe_ion_diff_concentration& p, probe_resolution_data<B>& R) {
+    for (mlocation loc: thingify(p.locations, R.cell.provider())) {
+        auto opt_i = R.ion_location_index(p.ion, loc);
+        if (!opt_i) continue;
+
+        R.result.push_back(fvm_probe_scalar{{R.state->ion_data.at(p.ion).Xd_.data()+*opt_i}, loc});
+    }
+}
+
 // Common implementation for int and ext concentrations across whole cell:
 template <typename B>
 void resolve_ion_conc_common(const std::vector<fvm_index_type>& ion_cvs, const fvm_value_type* src, probe_resolution_data<B>& R) {
@@ -1117,4 +1140,10 @@ void resolve_probe(const cable_probe_ion_ext_concentration_cell& p, probe_resolu
     resolve_ion_conc_common<B>(R.M.ions.at(p.ion).cv, R.state->ion_data.at(p.ion).Xo_.data(), R);
 }
 
+template <typename B>
+void resolve_probe(const cable_probe_ion_diff_concentration_cell& p, probe_resolution_data<B>& R) {
+    if (!R.state->ion_data.count(p.ion)) return;
+    resolve_ion_conc_common<B>(R.M.ions.at(p.ion).cv, R.state->ion_data.at(p.ion).Xd_.data(), R);
+}
+
 } // namespace arb
diff --git a/arbor/include/arbor/arbexcept.hpp b/arbor/include/arbor/arbexcept.hpp
index dee6b57e990d48ddd95afc23ba26ca33ecc5cdab..1a5679a1f042b42edbff5aa2255c4729f215b591 100644
--- a/arbor/include/arbor/arbexcept.hpp
+++ b/arbor/include/arbor/arbexcept.hpp
@@ -128,6 +128,12 @@ struct ARB_SYMBOL_VISIBLE no_such_parameter: arbor_exception {
     std::string param_name;
 };
 
+struct ARB_SYMBOL_VISIBLE illegal_diffusive_mechanism: arbor_exception {
+    explicit illegal_diffusive_mechanism(const std::string& mech, const std::string& ion);
+    std::string mech;
+    std::string ion;
+};
+
 struct ARB_SYMBOL_VISIBLE invalid_parameter_value: arbor_exception {
     invalid_parameter_value(const std::string& mech_name, const std::string& param_name, const std::string& value_str);
     invalid_parameter_value(const std::string& mech_name, const std::string& param_name, double value);
diff --git a/arbor/include/arbor/cable_cell.hpp b/arbor/include/arbor/cable_cell.hpp
index 5b8ef76bacaf143045d3beafe3da3355b5a0393c..e2927c8f504fbf844bb63f6949602d6922be2d9c 100644
--- a/arbor/include/arbor/cable_cell.hpp
+++ b/arbor/include/arbor/cable_cell.hpp
@@ -162,6 +162,21 @@ struct ARB_SYMBOL_VISIBLE cable_probe_ion_int_concentration_cell {
     std::string ion;
 };
 
+// Ionic diffusive concentration [mmol/L] of ion `source` at `location`.
+// Sample value type: `double`
+// Sample metadata type: `mlocation`
+struct ARB_SYMBOL_VISIBLE cable_probe_ion_diff_concentration {
+    locset locations;
+    std::string ion;
+};
+
+// Ionic diffusiev concentration [mmol/L] of ion `source` across components of the cell.
+// Sample value type: `cable_sample_range`
+// Sample metadata type: `mcable_list`
+struct ARB_SYMBOL_VISIBLE cable_probe_ion_diff_concentration_cell {
+    std::string ion;
+};
+
 // Ionic external concentration [mmol/L] of ion `source` at `location`.
 // Sample value type: `double`
 // Sample metadata type: `mlocation`
@@ -190,7 +205,8 @@ template <typename T>
 using region_assignment =
     std::conditional_t<
         std::is_same<T, density>::value || std::is_same<T, init_int_concentration>::value ||
-        std::is_same<T, init_ext_concentration>::value || std::is_same<T, init_reversal_potential>::value,
+        std::is_same<T, init_ext_concentration>::value || std::is_same<T, init_reversal_potential>::value ||
+        std::is_same<T, ion_diffusivity>::value,
         std::unordered_map<std::string, mcable_map<T>>,
         mcable_map<T>>;
 
@@ -215,7 +231,7 @@ using location_assignment =
 using cable_cell_region_map = static_typed_map<region_assignment,
     density, init_membrane_potential, axial_resistivity,
     temperature_K, membrane_capacitance, init_int_concentration,
-    init_ext_concentration, init_reversal_potential>;
+    ion_diffusivity, init_ext_concentration, init_reversal_potential>;
 
 using cable_cell_location_map = static_typed_map<location_assignment,
     synapse, junction, i_clamp, threshold_detector>;
diff --git a/arbor/include/arbor/cable_cell_param.hpp b/arbor/include/arbor/cable_cell_param.hpp
index 7ab612f6b47750a0cf4987c1950d3a6784695fd8..0e8a78ef07babb2381eb0c25fad6fc4b168ad866 100644
--- a/arbor/include/arbor/cable_cell_param.hpp
+++ b/arbor/include/arbor/cable_cell_param.hpp
@@ -32,6 +32,7 @@ struct cable_cell_ion_data {
     std::optional<double> init_int_concentration;
     std::optional<double> init_ext_concentration;
     std::optional<double> init_reversal_potential;
+    std::optional<double> diffusivity;
 };
 
 // Clamp current is described by a sine wave with amplitude governed by a
@@ -112,6 +113,12 @@ struct ARB_SYMBOL_VISIBLE init_int_concentration {
     double value = NAN; // [mM]
 };
 
+
+struct ARB_SYMBOL_VISIBLE ion_diffusivity {
+    std::string ion = "";
+    double value = NAN; // [m^2/s]
+};
+
 struct ARB_SYMBOL_VISIBLE init_ext_concentration {
     std::string ion = "";
     double value = NAN; // [mM]
@@ -226,6 +233,7 @@ using paintable =
                  axial_resistivity,
                  temperature_K,
                  membrane_capacitance,
+                 ion_diffusivity,
                  init_int_concentration,
                  init_ext_concentration,
                  init_reversal_potential,
@@ -242,6 +250,7 @@ using defaultable =
                  axial_resistivity,
                  temperature_K,
                  membrane_capacitance,
+                 ion_diffusivity,
                  init_int_concentration,
                  init_ext_concentration,
                  init_reversal_potential,
@@ -313,17 +322,18 @@ struct ARB_SYMBOL_VISIBLE cable_cell_global_properties {
     cable_cell_parameter_set default_parameters;
 
     // Convenience methods for adding a new ion together with default ion values.
-    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, double init_revpot) {
+    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, double init_revpot, double diffusivity=0.0) {
         ion_species[ion_name] = charge;
 
         auto &ion_data = default_parameters.ion_data[ion_name];
-        ion_data.init_int_concentration = init_iconc;
-        ion_data.init_ext_concentration = init_econc;
+        ion_data.init_int_concentration  = init_iconc;
+        ion_data.init_ext_concentration  = init_econc;
         ion_data.init_reversal_potential = init_revpot;
+        ion_data.diffusivity             = diffusivity;
     }
 
-    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, mechanism_desc revpot_mechanism) {
-        add_ion(ion_name, charge, init_iconc, init_econc, 0);
+    void add_ion(const std::string& ion_name, int charge, double init_iconc, double init_econc, mechanism_desc revpot_mechanism, double diffusivity=0.0) {
+        add_ion(ion_name, charge, init_iconc, init_econc, 0, diffusivity);
         default_parameters.reversal_potential_method[ion_name] = std::move(revpot_mechanism);
     }
 };
diff --git a/arbor/include/arbor/mechanism_abi.h b/arbor/include/arbor/mechanism_abi.h
index 2998bc765d90f6d54957a4e83c1038dd43a39db7..5c4f2a7891d014fa6bb6003bcbeb67f93325267c 100644
--- a/arbor/include/arbor/mechanism_abi.h
+++ b/arbor/include/arbor/mechanism_abi.h
@@ -9,8 +9,8 @@ extern "C" {
 
 // Version
 #define ARB_MECH_ABI_VERSION_MAJOR 0
-#define ARB_MECH_ABI_VERSION_MINOR 1
-#define ARB_MECH_ABI_VERSION_PATCH 0
+#define ARB_MECH_ABI_VERSION_MINOR 2
+#define ARB_MECH_ABI_VERSION_PATCH 1
 #define ARB_MECH_ABI_VERSION ((ARB_MECH_ABI_VERSION_MAJOR * 10000L * 10000L) + (ARB_MECH_ABI_VERSION_MAJOR * 10000L) + ARB_MECH_ABI_VERSION_PATCH)
 
 typedef const char* arb_mechanism_fingerprint;
@@ -40,9 +40,11 @@ inline const char* arb_mechanism_kind_str(const arb_mechanism_kind& mech) {
 // Ion state variables; view into shared_state
 typedef struct arb_ion_state {
     arb_value_type* current_density;
+    arb_value_type* conductivity;
     arb_value_type* reversal_potential;
     arb_value_type* internal_concentration;
     arb_value_type* external_concentration;
+    arb_value_type* diffusive_concentration;
     arb_value_type* ionic_charge;
     arb_index_type* index;
 } arb_ion_state;
@@ -181,6 +183,7 @@ typedef struct arb_ion_info {
     const char* name;
     bool write_int_concentration;
     bool write_ext_concentration;
+    bool use_diff_concentration;
     bool write_rev_potential;
     bool read_rev_potential;
     bool read_valence;
diff --git a/arbor/include/arbor/mechinfo.hpp b/arbor/include/arbor/mechinfo.hpp
index 39678e4c4964a49243c77c30ee708098a6da1be1..859337bd7c7a640a50c4ad4997486d3fc6eaa232 100644
--- a/arbor/include/arbor/mechinfo.hpp
+++ b/arbor/include/arbor/mechinfo.hpp
@@ -36,6 +36,8 @@ struct ion_dependency {
     bool write_concentration_int = false;
     bool write_concentration_ext = false;
 
+    bool access_concentration_diff = false;
+
     bool read_reversal_potential = false;
     bool write_reversal_potential = false;
 
diff --git a/arbor/include/arbor/simd/avx.hpp b/arbor/include/arbor/simd/avx.hpp
index 2aeaed9f19e534b16ab58338270366169ad72274..1724021d70e9cbf560db10f9018dbaf441bdc887 100644
--- a/arbor/include/arbor/simd/avx.hpp
+++ b/arbor/include/arbor/simd/avx.hpp
@@ -3,7 +3,6 @@
 // AVX/AVX2 SIMD intrinsics implementation.
 
 #ifdef __AVX__
-
 #include <cmath>
 #include <cstdint>
 #include <immintrin.h>
diff --git a/arbor/include/arbor/simd/implbase.hpp b/arbor/include/arbor/simd/implbase.hpp
index 5461108838a0d0d80e8071e6077f9ce036355863..4c3766b210e8861dc3b49d4979f81af87e38c722 100644
--- a/arbor/include/arbor/simd/implbase.hpp
+++ b/arbor/include/arbor/simd/implbase.hpp
@@ -414,7 +414,6 @@ struct implbase {
     static void scatter(tag<ImplIndex>, const vector_type& s, scalar_type* p, const typename ImplIndex::vector_type& index) {
         typename ImplIndex::scalar_type o[width];
         ImplIndex::copy_to(index, o);
-
         store a;
         I::copy_to(s, a);
 
diff --git a/arbor/include/arbor/simple_sampler.hpp b/arbor/include/arbor/simple_sampler.hpp
index ca227fee8d341cb5e7d1c8b02d2e403dd24d8867..196f1f111056ce05d5a63a1f2f45bf6c32c32fce 100644
--- a/arbor/include/arbor/simple_sampler.hpp
+++ b/arbor/include/arbor/simple_sampler.hpp
@@ -24,7 +24,7 @@ struct trace_entry {
 // `trace_data` wraps a std::vector of trace_entry with
 // a copy of the probe-specific metadata associated with a probe.
 //
-// If `Meta` is void, ignore any metadta.
+// If `Meta` is void, ignore any metadata.
 
 template <typename V, typename Meta = void>
 struct trace_data: private std::vector<trace_entry<V>> {
diff --git a/arbor/matrix.hpp b/arbor/matrix.hpp
index 3bc7a54b8591f2befcd41264048056190ce818d5..3b69db5048d72c868b12fad2f820780dfad10bda 100644
--- a/arbor/matrix.hpp
+++ b/arbor/matrix.hpp
@@ -7,6 +7,8 @@
 #include <memory/memory.hpp>
 #include <util/span.hpp>
 
+#include <arbor/arb_types.hpp>
+
 namespace arb {
 
 /// Hines matrix
@@ -15,76 +17,46 @@ namespace arb {
 template<class Backend, class State=typename Backend::matrix_state>
 class matrix {
 public:
-    using backend = Backend;
-
-    // define basic types
-    using value_type = typename backend::value_type;
-    using index_type = typename backend::index_type;
-    using size_type = typename backend::size_type;
-
-    // define storage types
-    using array = typename backend::array;
-    using iarray = typename backend::iarray;
-
-    // back end specific storage for matrix state
-    using state = State;
+    using backend    = Backend;
+    using array      = typename backend::array;
+    using iarray     = typename backend::iarray;
+    using const_view = const array&;
+    using state      = State;                         // backend specific storage for matrix state
 
     matrix() = default;
 
-    matrix(const std::vector<index_type>& pi,
-           const std::vector<index_type>& ci,
-           const std::vector<value_type>& cv_capacitance,
-           const std::vector<value_type>& face_conductance,
-           const std::vector<value_type>& cv_area,
-           const std::vector<index_type>& cell_to_intdom):
-        parent_index_(pi.begin(), pi.end()),
-        cell_index_(ci.begin(), ci.end()),
-        cell_to_intdom_(cell_to_intdom.begin(), cell_to_intdom.end()),
+    matrix(const std::vector<arb_index_type>& pi,
+           const std::vector<arb_index_type>& ci,
+           const std::vector<arb_value_type>& cv_capacitance,
+           const std::vector<arb_value_type>& face_conductance,
+           const std::vector<arb_value_type>& cv_area,
+           const std::vector<arb_index_type>& cell_to_intdom):
+        num_cells_{ci.size() - 1},
         state_(pi, ci, cv_capacitance, face_conductance, cv_area, cell_to_intdom)
     {
-        arb_assert(cell_index_[num_cells()] == index_type(parent_index_.size()));
+        arb_assert(cell_index()[num_cells()] == arb_index_type(parent_index().size()));
     }
 
     /// the dimension of the matrix (i.e. the number of rows or colums)
-    std::size_t size() const {
-        return parent_index_.size();
-    }
-
+    std::size_t size() const { return state_.size(); }
     /// the number of cell matrices that have been packed together
-    size_type num_cells() const {
-        return cell_index_.size() - 1;
-    }
-
+    std::size_t num_cells() const { return num_cells_; }
     /// the vector holding the parent index
-    const iarray& p() const { return parent_index_; }
-
+    const iarray& parent_index() const { return state_.parent_index; }
     /// the partition of the parent index over the cells
-    const iarray& cell_index() const { return cell_index_; }
-
+    const iarray& cell_index() const { return state_.cell_cv_divs; }
     /// Solve the linear system into a given solution storage.
-    void solve(array& to) {
-        state_.solve(to);
-    }
-
+    void solve(array& to) { state_.solve(to); }
     /// Assemble the matrix for given dt
-    void assemble(const array& dt_cell, const array& voltage, const array& current, const array& conductivity) {
-        state_.assemble(dt_cell, voltage, current, conductivity);
-    }
+    void assemble(const_view& dt, const_view& U, const_view& I, const_view& g) { state_.assemble(dt, U, I, g); }
 
 private:
-    /// the parent indice that describe matrix structure
-    iarray parent_index_;
-
-    /// indexes that point to the start of each cell in the index
-    iarray cell_index_;
-
-    /// indexes that point to the index of the integration domain
-    iarray cell_to_intdom_;
+   std::size_t num_cells_ = 0;
 
 public:
-    // Provide via public interface to make testing much easier.
-    // If you modify this directly without knowing what you are doing,
-    // you get what you deserve.
+    // Provide via public interface to make testing much easier. If you modify
+    // this directly without knowing what you are doing, you get what you
+    // deserve.
     state state_;
 };
 
diff --git a/arbor/mechinfo.cpp b/arbor/mechinfo.cpp
index b2dbd7d0c202788033373175168a6c23c918014e..abb54ba72da8e40b210f2363c7f36a2fb0af00eb 100644
--- a/arbor/mechinfo.cpp
+++ b/arbor/mechinfo.cpp
@@ -24,6 +24,7 @@ mechanism_info::mechanism_info(const arb_mechanism_type& m) {
         const auto& v = m.ions[idx];
         ions[v.name] = { v.write_int_concentration,
         v.write_ext_concentration,
+        v.use_diff_concentration,
         v.read_rev_potential,
         v.write_rev_potential,
         v.read_valence,
diff --git a/arborio/cableio.cpp b/arborio/cableio.cpp
index e06d81c15e8ed12391611275e37f9c0fc6bd9655..2727788b3f19129e82820ca2ce7375b853566da6 100644
--- a/arborio/cableio.cpp
+++ b/arborio/cableio.cpp
@@ -57,6 +57,9 @@ s_expr mksexp(const init_ext_concentration& c) {
 s_expr mksexp(const init_reversal_potential& c) {
     return slist("ion-reversal-potential"_symbol, s_expr(c.ion), c.value);
 }
+s_expr mksexp(const ion_diffusivity& c) {
+    return slist("ion-diffusivity"_symbol, s_expr(c.ion), c.value);
+}
 s_expr mksexp(const i_clamp& c) {
     std::vector<s_expr> evlps;
     std::transform(c.envelope.begin(), c.envelope.end(), std::back_inserter(evlps),
@@ -201,7 +204,7 @@ using version_tuple  = std::tuple<std::string>;
 #define ARBIO_DEFINE_DOUBLE_ARG(name) arb::name make_##name(const std::string& ion, double val) { return arb::name{ion, val};}
 
 ARB_PP_FOREACH(ARBIO_DEFINE_SINGLE_ARG, init_membrane_potential, temperature_K, axial_resistivity, membrane_capacitance, threshold_detector)
-ARB_PP_FOREACH(ARBIO_DEFINE_DOUBLE_ARG, init_int_concentration, init_ext_concentration, init_reversal_potential)
+ARB_PP_FOREACH(ARBIO_DEFINE_DOUBLE_ARG, init_int_concentration, init_ext_concentration, init_reversal_potential, ion_diffusivity)
 
 std::vector<arb::i_clamp::envelope_point> make_envelope(const std::vector<std::variant<envelope_tuple>>& vec) {
     std::vector<arb::i_clamp::envelope_point> envlp;
@@ -582,6 +585,8 @@ eval_map named_evals{
         "'ion_internal_concentration' with 2 arguments (ion:string val:real)")},
     {"ion-external-concentration", make_call<std::string, double>(make_init_ext_concentration,
         "'ion_external_concentration' with 2 arguments (ion:string val:real)")},
+    {"ion-diffusivity", make_call<std::string, double>(make_ion_diffusivity,
+        "'ion_diffusivity' with 2 arguments (ion:string val:real)")},
     {"ion-reversal-potential", make_call<std::string, double>(make_init_reversal_potential,
         "'ion_reversal_potential' with 2 arguments (ion:string val:real)")},
     {"envelope", make_arg_vec_call<envelope_tuple>(make_envelope,
@@ -614,6 +619,7 @@ eval_map named_evals{
     {"paint", make_call<region, axial_resistivity>(make_paint, "'paint' with 2 arguments (reg:region v:axial-resistivity)")},
     {"paint", make_call<region, init_int_concentration>(make_paint, "'paint' with 2 arguments (reg:region v:ion-internal-concentration)")},
     {"paint", make_call<region, init_ext_concentration>(make_paint, "'paint' with 2 arguments (reg:region v:ion-external-concentration)")},
+    {"paint", make_call<region, ion_diffusivity>(make_paint, "'paint' with 2 arguments (reg:region v:ion-diffusivity)")},
     {"paint", make_call<region, init_reversal_potential>(make_paint, "'paint' with 2 arguments (reg:region v:ion-reversal-potential)")},
     {"paint", make_call<region, density>(make_paint, "'paint' with 2 arguments (reg:region v:density)")},
 
@@ -623,6 +629,7 @@ eval_map named_evals{
     {"default", make_call<axial_resistivity>(make_default, "'default' with 1 argument (v:axial-resistivity)")},
     {"default", make_call<init_int_concentration>(make_default, "'default' with 1 argument (v:ion-internal-concentration)")},
     {"default", make_call<init_ext_concentration>(make_default, "'default' with 1 argument (v:ion-external-concentration)")},
+    {"default", make_call<ion_diffusivity>(make_default, "'default' with 1 argument (v:ion-diffusivity)")},
     {"default", make_call<init_reversal_potential>(make_default, "'default' with 1 argument (v:ion-reversal-potential)")},
     {"default", make_call<ion_reversal_potential_method>(make_default, "'default' with 1 argument (v:ion-reversal-potential-method)")},
     {"default", make_call<cv_policy>(make_default, "'default' with 1 argument (v:cv-policy)")},
diff --git a/doc/concepts/decor.rst b/doc/concepts/decor.rst
index 3b15a827b85ab09472166384482fb5f0bcd230d3..f817193a8349be246b0e23919a628f0b56042ae2 100644
--- a/doc/concepts/decor.rst
+++ b/doc/concepts/decor.rst
@@ -188,6 +188,7 @@ Each ion species has the following properties:
 2. *external concentration*: concentration on exterior of the membrane [mM].
 3. *reversal potential*: reversal potential [mV].
 4. *reversal potential mechanism*:  method for calculating reversal potential.
+5. *diffusivity*: diffusion coefficient for marker concentration, defaults to zero [m^2/s].
 
 Properties 1, 2 and 3 must be defined, and are used as the initial values for
 each quantity at the start of the simulation. They are specified globally,
@@ -256,6 +257,29 @@ using the *paint* interface:
     # Alternatively, one can selectively overwrite the global defaults.
     decor.paint('(tag 2)', arbor.ion('ca', rev_pot=126)
 
+To enable diffusion of ion species along the morphology (axial diffusion) one
+sets the per-species diffusivity to a positive value. It can be changed per
+region and defaults to zero. This is strictly passive transport according to the
+diffusion equation ``X' = ÃŸ âˆ†X`` where ``X`` is the species' diffusive
+concentration and ``ÃŸ`` the diffusivity constant.
+
+.. code-block:: Python
+
+    decor = arbor.decor()
+    decor.set_ion('ca', diff=23.0)
+    decor.paint('"region"', 'ca', diff=42.0)
+
+Be aware of the consequences of setting ``ÃŸ > 0`` only in some places, namely
+pile-up effects similar to reflective bounds.
+
+The diffusive concentration is *separate* from the internal concentration for
+reasons innate to the cable model, which require resetting it to its starting
+point at every timestep. It can be accessed from NMODL density and point
+mechanisms as an independent quantity, see :ref:`NMODL mechanism <nmodl>`. It is
+present on the full morphology if its associated diffusivity is set to a
+non-zero value on any subset of the morphology, ie ``region``. It is initialised
+to the value of the internal concentration at time zero.
+
 .. _cablecell-place:
 
 Placed dynamics
diff --git a/doc/dev/axial-diff.tex b/doc/dev/axial-diff.tex
new file mode 100644
index 0000000000000000000000000000000000000000..de4d178fa4ea11efe68b28656ee7e7a0a1677bac
--- /dev/null
+++ b/doc/dev/axial-diff.tex
@@ -0,0 +1,203 @@
+% Created 2022-05-04 Wed 13:36
+% Intended LaTeX compiler: pdflatex
+\documentclass[a4paper]{article}
+\usepackage[utf8]{inputenc}
+\usepackage[T1]{fontenc}
+\usepackage{graphicx}
+\usepackage{longtable}
+\usepackage{wrapfig}
+\usepackage{rotating}
+\usepackage[normalem]{ulem}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{capt-of}
+\usepackage{hyperref}
+\author{Thorsten Hater}
+\date{\today}
+\title{Axial Diffusion of Ionic Species in Arbor}
+\hypersetup{
+ pdfauthor={Thorsten Hater},
+ pdftitle={Axial Diffusion of Ionic Species in Arbor},
+ pdfkeywords={},
+ pdfsubject={},
+ pdfcreator={Emacs 28.1.50 (Org mode 9.6)}, 
+ pdflang={English}}
+\usepackage{biblatex}
+
+\begin{document}
+
+\maketitle
+\tableofcontents
+
+
+\section{Introduction}
+\label{sec:org8cd0584}
+
+Arbor's model of the neurite -- and similarly that of cable theory in general --
+is valid in a narrow region around the membrane, ie \(r_{m} - \epsilon \leq r
+\leq r_{m} - \epsilon\). Therefore, the state variables of the model -- in
+particular ionic concentrations and currents -- are defined only in said small
+region.
+
+For treating diffusion of ionic species along the axial direction of the neuron,
+however, we will either need to assume a radially homogeneous distribution of
+ions inside the cell plasma or resort to a more detailed model of the general
+dynamics.
+
+Therefore, in the axial diffusion model adopted by Arbor, we are going to assume
+that the concentration \(M_s\) of an ionic species \(s\) behaves like \(M_s(x, r,
+\theta) = M_s(x)\) and is defined on the full inside of the neuron \(0 \leq r <
+r_m\). Furthermore, in contrast to traditional cable theory, we will take into
+account the effect trans-membrane currents on the internal concentration when
+modelling diffusion, ie
+
+\begin{align*}
+\partial_{t} M_{s} = \nabla \cdot (D_{s} \nabla M_{s}) + I_{m,s}
+\end{align*}
+
+This makes the axial diffusion model richer than the plain cable theory approach
+as implemented in Arbor so far. In particular the cable model assumes internal
+(and external) concentrations to be defined in a thin shell around the membrane
+and to be buffered by a far larger volume away from that shell. Thus, these
+values are reset to the buffer value infinitely fast, ie every time step in
+Arbor, unless managed by a concentration model density mechanism. However, this
+makes diffusion impossible as mechanisms cannot not exchange data across CVs.
+Therefore, finally!, we chose to add a new quantity \texttt{Xd} that is not buffered
+and serves as a diffusing proxy concentration that can be coupled by a mechanism
+to the internal concentration if needed. This begets an awkward, but workable,
+interface with ODE models of \texttt{Xd} in NMODL, see the relevant documentation.
+
+This proxy will be initialised as \texttt{Xd = Xi} and present on the full morphology
+iff \(\D_{s} > 0\) on any non-vanishing subset of the morphology.
+
+\section{Derivation and FVM}
+\label{sec:org06e2271}
+
+The cable equation is the foundation of Arbor's model of the neuron
+\begin{align*}
+    c_{m} \partial_{t} V = \frac{1}{2 \pi a} \partial_{x} \left(\frac{\pi a^{2}}{R_{L}} \partial_{x} V\right) + i_{m}
+\end{align*}
+We want to solve the diffusion equation, excluding currents for now,
+\begin{align*}
+\partial_{t} M = \nabla \cdot (D \nabla M)
+\end{align*}
+by application of the finite volume method (as for the cable equation). That is,
+we average over the control volume (CV) \(V\) to rewrite in terms of the average
+concentration \(\bar{M}\) and apply Stoke's theorem on the right hand side. Note
+that we suppress the species subscript \(s\) for clarity where obvious from
+context or not relevant to the discussion. Thus
+\begin{align*}
+\partial_{t} \bar M_{i} = \frac{1}{\Delta V_{i}}\int\limits_{\partial V} \mathrm{d}a \cdot D \nabla M
+\end{align*}
+For linking to NMODL, note that \(M\) is identified with \texttt{Xd} in this discussion.
+Note that \(\nabla \sim \partial_{x}\) in our 1.5d model.
+
+Next, we define the fluxes
+\begin{align*}
+F_{ij} = \sigma_{ij}(\bar M_{i} - \bar M_{j})
+\end{align*}
+where \(\sigma_{ij}\) is the contact area of segments \(i\) and \(j\).
+
+Then we apply an implicit Euler time-step
+\begin{align*}
+\frac{1}{\Delta t} \left(M^{k+1} - M^{k}\right) = \sum_{j}\frac{\sigma_{ij}}{P_{L}\Delta x_{ij}}\eta_{ij} \left(M_{i}^{k+1} - M_{j}^{k+1}\right)
+\end{align*}
+where \(\eta_{ij} = 1\) if \(i\) and \(j\) are neighbours and zero otherwise. We introduced
+\(P_L = P \cdot A/l\) where \(P = 1/D\) in analogy with \(R_{L}\) ('\emph{specific diffusive
+resistivity}').
+
+This can be rewritten to expose the structure of the linear system more clearly
+\begin{align*}
+\frac{1}{\Delta t}M_{i}^{k}= \frac{1}{\Delta t}M_{i}^{k+1} + \sum\limits_{j}\beta_{ij} \left(M_{i}^{k+1} - M_{j}^{k+1} \right)
+\end{align*}
+where
+\begin{align*}
+\beta_{ij} = \frac{\sigma_{ij}}{P_{L} \Delta x_{ij}}\eta_{ij}
+\end{align*}
+
+\section{Treatment of Trans-Membrane Ion Flux}
+\label{sec:org0eeed52}
+
+Finally, we add back the ionic currents to the concentration changes to arrive at
+\begin{align*}
+\frac{1}{\Delta t}M_{i}^{k} = \left(\frac{1}{\Delta t}
+ + \sum\limits_{j}\beta_{ij}\right) M_{i}^{k+1} - \sum\limits_{j}\beta_{ij}M_{j}^{k+1} + \frac{\sigma_{i}}{q I_m(V^{k+1
+})
+\end{align*}
+where we used the CV surface \(\sigma_{i}\), the ion species' current \(I_m\) through
+\(\sigma_{i}\) and its charge \(q\).
+
+We follow Arbor's model and write Ohm's law
+\begin{align*}
+g = \frac{\partial I_m(V)}{\partial V} = \frac{\partial t}{\partial V}\frac{\partial I_m}{\partial t}
+\end{align*}
+where \(g\) is the \emph{per species conductivity} and approximately we have
+\begin{align*}
+I_m^{k+1} - I_m^{k} = g\left(V^{k+1} - V^{k}\right)
+\end{align*}
+Inserting this into the diffusion equation and rearranging to separate
+\(\cdot^{k}\) and \(\cdot^{k+1}\) terms
+\begin{align*}
+\frac{1}{\Delta t}M_{i}^{k} - \frac{\sigma_{i}}{q}\left(I_m^k - gV^k\right)= \left(\frac{1}{\Delta t}
+ + \sum\limits_{j}\beta_{ij}\right) M_{i}^{k+1} - \sum\limits_{j}\beta_{ij}M_{j}^{k+1} + \frac{\sigma_{i}g}{q} V^{k+1}
+\end{align*}
+
+In \texttt{fvm\_discretize} we compute \(\alpha_{ij}\) -- the analogue for \(\beta_{ij}\) in the cable
+equation -- called \texttt{face\_conductance} in code
+\begin{verbatim}
+mcable span{bid, parent_refpt, cv_refpt};
+double resistance =
+    embedding.integrate_ixa(span,
+                            D.axial_resistivity[0].at(bid));
+D.face_conductance[i] = 100/resistance; // 100 scales to ÂµS.
+\end{verbatim}
+
+\section{Units}
+\label{sec:org4ff2b42}
+
+We have the following units preset for us
+
+\begin{center}
+\begin{tabular}{llll}
+\hline
+Quantity & Symbol & Unit & SI\\
+\hline
+Concentration & \(M\) & \(m\mathrm{mol}/l\) & \(10^{-6} \mathrm{mol}/m^{3}\)\\
+Length & \(x\) & \(\mu m\) & \(10^{-6} m\)\\
+Area & \(A\) & \((\mu m)^{2}\) & \(10^{-12} m^{2}\)\\
+Time & \(t\) & \(ms\) & \(10^{-3} s\)\\
+\hline
+\end{tabular}
+\end{center}
+
+From this we need to fix units for
+\begin{itemize}
+\item fluxes \(F\)
+\item diffusivity \(D\) and \(D_{L}\)
+\item diffusive resistivity \(P\) and \(P_{L}\)
+\end{itemize}
+
+We start with \(D\) and apply dimensional analysis to the diffusion equation to note
+\begin{align*}
+[M]/[T] &= [M][D]/[L^{2}]\\
+\end{align*}
+which results in
+\begin{align*}
+[D] &= [L^{2}][T]^{-1}\\
+&= (\mu m)^{2}/ms = 10^{-9} m^{2}/ s
+\end{align*}
+in Arbor's internal units.
+Similarly, we rewrite the diffusion equation in its abstract form
+\begin{align*}
+\partial_{t} M = \nabla \cdot F
+\end{align*}
+to arrive at
+\begin{align*}
+[M]/[T] &= [F]/[L]\\
+\Leftrightarrow [F] &= [L][M][T]^{-1}\\
+\Rightarrow [F] &= 10^{-9} \mathrm{mol}/(m^{2} s)
+\end{align*}
+
+From Yasuda et al we have \([D_{eff}] = 10^{-12}m^{2}/s\) which fits our derivation
+for \(D\), expect for their use of \([T] = s\) as opposed to \([T]=ms\).
+\end{document}
diff --git a/doc/fileformat/nmodl.rst b/doc/fileformat/nmodl.rst
index 5175f0dd566105a918a3f3d4f5cf6d8e86b943f9..2829521f293e0cda671c4107a296f9032716f4d4 100644
--- a/doc/fileformat/nmodl.rst
+++ b/doc/fileformat/nmodl.rst
@@ -56,6 +56,7 @@ ion X current (point and junction mechanisms)    iX
 ion X reversal potential                         eX                                                   mV
 ion X internal concentration                     Xi                                                   mmol/L
 ion X external concentration                     Xo                                                   mmol/L
+ion X diffusive concentration                    Xd                                                   mmol/L
 ===============================================  ===================================================  ==========
 
 Ions
@@ -72,12 +73,22 @@ Ions
   ``PARAMETER``, ``ASSIGNED`` or ``CONSTANT``.
 * ``READ`` and ``WRITE`` permissions of ``Xi``, ``Xo``, ``eX`` and ``iX`` can be set
   in NMODL in the ``NEURON`` block. If a parameter is writable it is automatically
-  readable and doesn't need to be specified as both.
-* If ``Xi``, ``Xo``, ``eX``, ``iX`` are used in a ``PROCEDURE`` or ``FUNCTION``,
+  readable and must not be specified as both.
+* If ``Xi``, ``Xo``, ``eX``, ``iX``, ``Xd`` are used in a ``PROCEDURE`` or ``FUNCTION``,
   they need to be passed as arguments.
 * If ``Xi`` or ``Xo`` (internal and external concentrations) are written in the
-  NMODL mechanism they need to be declared as ``STATE`` variables and their initial
-  values have to be set in the ``INITIAL`` block in the mechanism.
+  NMODL mechanism they need to be declared as ``STATE`` variables and their
+  initial values have to be set in the ``INITIAL`` block in the mechanism. This
+  transfers **all** responsibility for handling ``Xi`` / ``Xo`` to the mechanism
+  and will lead to painted initial values to be ignored. If these quantities are
+  not made ``STATE`` they may be written to, but their values will be reset to
+  their initial values every time step.
+* The diffusive concentration ``Xd`` does not share this semantics. It will not
+  be reset, even if not in ``STATE``, and may freely be written. This comes at the
+  cost of awkward treatment of ODEs for ``Xd``, see the included ``decay.mod`` for
+  an example.
+* ``Xd`` is present on all cables iff its associated diffusivity is set to a
+  non-zero value.
 
 Special variables
 -----------------
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
index 7a654b4a3d309735e448ce44a15c944d5b6cef69..752737ec64ef94096dd29339f797f928d18cdb19 100644
--- a/example/CMakeLists.txt
+++ b/example/CMakeLists.txt
@@ -12,3 +12,4 @@ add_subdirectory(gap_junctions)
 add_subdirectory(single)
 add_subdirectory(probe-demo)
 add_subdirectory(lfp)
+add_subdirectory(diffusion)
diff --git a/example/diffusion/CMakeLists.txt b/example/diffusion/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..687914b75276077ceb81023a2d6ca52e15d163d4
--- /dev/null
+++ b/example/diffusion/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_executable(diffusion EXCLUDE_FROM_ALL diffusion.cpp)
+add_dependencies(examples diffusion)
+target_link_libraries(diffusion PRIVATE arbor arborio arborenv arbor-sup ext-tinyopt)
diff --git a/example/diffusion/README.md b/example/diffusion/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f570e33ffec2ca0415783da64559be881858ed8e
--- /dev/null
+++ b/example/diffusion/README.md
@@ -0,0 +1,18 @@
+# Diffusion example.
+
+Example of simulating a simple linear neuron with a diffusing sodium
+concentration. An event injecting more Na into the centre is fired at t=0. The
+measured concentrations will be written to disk further analysis.
+
+ |    |  Option  | Meaning                         | Default     |
+ |----|----------|---------------------------------|-------------|
+ | -t | --tfinal | Length of the simulation period | 1 ms        |
+ | -d | --dt     | Simulation time step            | 0.01 ms     |
+ | -s | --ds     | Sampling interval               | 0.1 ms      |
+ | -g | --gpu    | Use GPU id, enabled if >=0      | -1          |
+ | -l | --length | Length of stick                 | 30 um       |
+ | -x | --dx     | Discretisation                  | 1 um        |
+ | -i | --Xi     | Initial Na concentration        | 0 mM        |
+ | -b | --beta   | Na diffusivity                  | 0.005 m^2/s |
+ | -o | --output | Save samples                    | log.csv     |
+ 
diff --git a/example/diffusion/diffusion.cpp b/example/diffusion/diffusion.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8f94651b1029ca7bba049b51cf4e032bce48699
--- /dev/null
+++ b/example/diffusion/diffusion.cpp
@@ -0,0 +1,126 @@
+#include <any>
+#include <iomanip>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include <tinyopt/tinyopt.h>
+
+#include <arborio/label_parse.hpp>
+
+#include <arbor/load_balance.hpp>
+#include <arbor/cable_cell.hpp>
+#include <arbor/morph/morphology.hpp>
+#include <arbor/morph/segment_tree.hpp>
+#include <arbor/simulation.hpp>
+#include <arbor/sampling.hpp>
+#include <arbor/util/any_cast.hpp>
+#include <arbor/util/any_ptr.hpp>
+
+using namespace arborio::literals;
+using namespace arb;
+
+struct linear: public recipe {
+     linear(double ext, double dx, double Xi, double beta): l{ext}, d{dx}, i{Xi}, b{beta} {
+        gprop.default_parameters = neuron_parameter_defaults;
+        gprop.default_parameters.discretization = cv_policy_max_extent{d};
+    }
+
+    cell_size_type num_cells()                                   const override { return 1; }
+    cell_kind get_cell_kind(cell_gid_type)                       const override { return cell_kind::cable; }
+    std::any get_global_properties(cell_kind)                    const override { return gprop; }
+    std::vector<probe_info> get_probes(cell_gid_type)            const override { return {cable_probe_ion_diff_concentration_cell{"na"}}; }
+    std::vector<event_generator> event_generators(cell_gid_type) const override { return {explicit_generator({{{"Zap"}, 0.0, 0.005}})}; }
+    util::unique_any get_cell_description(cell_gid_type)         const override {
+        // Stick morphology
+        // -----|-----
+        segment_tree tree;
+        tree.append(mnpos, { -l, 0, 0, 3}, {l, 0, 0, 3}, 1);
+        // Setup
+        decor decor;
+        decor.set_default(init_int_concentration{"na", i});
+        decor.set_default(ion_diffusivity{"na", b});
+        decor.place("(location 0 0.5)"_ls, synapse("inject/x=na", {{"alpha", 200.0*l}}), "Zap");
+        decor.paint("(all)"_reg, density("decay/x=na"));
+        return cable_cell({tree}, {}, decor);
+    }
+
+    cable_cell_global_properties gprop;
+    double l, d, i, b;
+};
+
+std::ofstream out;
+
+void sampler(probe_metadata pm, std::size_t n, const sample_record* samples) {
+    auto ptr = util::any_cast<const mcable_list*>(pm.meta);
+    assert(ptr);
+    auto n_cable = ptr->size();
+    out << "time,prox,dist,Xd\n"
+        << std::fixed << std::setprecision(4);
+    for (std::size_t i = 0; i<n; ++i) {
+        const auto& [val, _ig] = *util::any_cast<const cable_sample_range*>(samples[i].data);
+        for (unsigned j = 0; j<n_cable; ++j) {
+            mcable loc = (*ptr)[j];
+            out << samples[i].time << ',' << loc.prox_pos << ',' << loc.dist_pos << ',' << val[j] << '\n';
+        }
+    }
+    out << '\n';
+}
+
+struct opt_t {
+    double L  = 30.0;
+    double dx = 1.0;
+    double T  = 1.0;
+    double dt = 0.01;
+    double ds = 0.1;
+    double Xi = 0.0;
+    double dX = 0.005;
+    std::string out = "log.csv";
+    int gpu = -1;
+};
+
+opt_t read_options(int argc, char** argv) {
+    auto usage = "\n"
+                 "  -t|--tfinal     [Length of the simulation period (1 ms)]\n"
+                 "  -d|--dt         [Simulation time step (0.01 ms)]\n"
+                 "  -s|--ds         [Sampling interval (0.1 ms)]\n"
+                 "  -g|--gpu        [Use GPU id (-1); enabled if >=0]\n"
+                 "  -l|--length     [Length of stick (30 um)]\n"
+                 "  -x|--dx         [Discretisation (1 um)]\n"
+                 "  -i|--Xi         [Initial Na concentration (0 mM)]\n"
+                 "  -b|--beta       [Na diffusivity (0.005 m^2/s)]\n"
+                 "  -o|--output     [Save samples (log.csv)]\n";
+    auto help = [argv, &usage] { to::usage(argv[0], usage); };
+    opt_t opt;
+    to::option options[] = {{opt.T,                                "-t", "--tfinal"},
+                            {opt.dt,                               "-d", "--dt"},
+                            {opt.ds,                               "-s", "--ds"},
+                            {opt.L,                                "-l", "--length"},
+                            {opt.dx,                               "-x", "--dx"},
+                            {opt.Xi,                               "-i", "--Xi"},
+                            {opt.dX,                               "-b", "--beta"},
+                            {opt.gpu,                              "-g", "--gpu"},
+                            {opt.out,                              "-o", "--out"},
+                            {to::action(help), to::flag, to::exit, "-h", "--help"}};
+    if (!to::run(options, argc, argv+1)) return opt_t{};
+    if (argv[1])          throw to::option_error("Unrecognized argument", argv[1]);
+    if (opt.dt <= 0.0)    throw std::runtime_error("Time step must be positive!");
+    if (opt.ds <= 0.0)    throw std::runtime_error("Sampling interval must be positive!");
+    if (opt.ds <  opt.dt) throw std::runtime_error("Time step is greater than a sampling interval!");
+    if (opt.T  <= opt.ds) throw std::runtime_error("Runtime is less than a sampling interval!");
+    if (opt.dX <= 0.0)    throw std::runtime_error("Diffusivity must be positive!");
+    return opt;
+}
+
+int main(int argc, char** argv) {
+    auto O = read_options(argc, argv);
+    out = std::ofstream{O.out};
+    if (!out.good()) throw std::runtime_error("Could not open output file for writing.");
+    auto C = make_context({1, O.gpu});
+    auto R = linear{O.L, O.dx, O.Xi, O.dX};
+    simulation S(R, partition_load_balance(R, C), C);
+    S.add_sampler(all_probes, regular_schedule(O.ds), sampler);
+    S.run(O.T, O.dt);
+    out.close();
+}
diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt
index 976304bc63295afa065417b66e33a6a2168d612b..97dbf7e336ee25ed968498199e4001c720c3aa43 100644
--- a/mechanisms/CMakeLists.txt
+++ b/mechanisms/CMakeLists.txt
@@ -27,7 +27,7 @@ make_catalogue(
   NAME default
   SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/default"
   OUTPUT "CAT_DEFAULT_SOURCES"
-  MOD exp2syn expsyn expsyn_curr expsyn_stdp hh kamt kdrmt nax nernst pas gj
+  MOD exp2syn expsyn expsyn_curr expsyn_stdp hh kamt kdrmt nax nernst pas gj decay inject
   CXX
   PREFIX "${PROJECT_SOURCE_DIR}/mechanisms"
   CXX_FLAGS_TARGET "${ARB_CXX_FLAGS_TARGET_FULL}"
diff --git a/mechanisms/default/decay.mod b/mechanisms/default/decay.mod
new file mode 100644
index 0000000000000000000000000000000000000000..a380d5fd3625457830998ab40a24c8e214c69da5
--- /dev/null
+++ b/mechanisms/default/decay.mod
@@ -0,0 +1,18 @@
+NEURON {
+    SUFFIX decay
+    USEION x WRITE xd, ix
+}
+
+INITIAL { F = xd }
+
+STATE { F }
+
+BREAKPOINT {
+   SOLVE dF METHOD cnexp
+   xd = F
+}
+
+DERIVATIVE dF {
+   F = xd
+   F' = -5*F
+}
diff --git a/mechanisms/default/inject.mod b/mechanisms/default/inject.mod
new file mode 100644
index 0000000000000000000000000000000000000000..392895764704805a3bde53a1fd0ca5bedc92332b
--- /dev/null
+++ b/mechanisms/default/inject.mod
@@ -0,0 +1,20 @@
+NEURON {
+  POINT_PROCESS inject
+  USEION x WRITE xd, ix
+  RANGE alpha, beta
+}
+
+ASSIGNED { beta }
+
+PARAMETER { alpha = 200 }
+
+INITIAL { beta = 0 }
+
+BREAKPOINT {
+    xd = xd + beta
+    beta = 0
+}
+
+NET_RECEIVE(weight) {
+    beta = alpha*weight
+}
diff --git a/modcc/blocks.hpp b/modcc/blocks.hpp
index 495a47ac0337b02a2b3f38b748bf8ccca7f66758..ff0f0b138fdd360239ef090e0865f0577b22fae9 100644
--- a/modcc/blocks.hpp
+++ b/modcc/blocks.hpp
@@ -32,6 +32,9 @@ struct IonDep {
     bool uses_concentration_int() const {
         return has_variable(name + "i");
     };
+    bool uses_concentration_diff() const {
+        return has_variable(name + "d");
+    };
     bool uses_concentration_ext() const {
         return has_variable(name + "o");
     };
diff --git a/modcc/identifier.hpp b/modcc/identifier.hpp
index dd31222f1e361ce5d3f16d1790e03795f5521f81..47c835f0f977a4c7201ec6039779e28ae09fc79a 100644
--- a/modcc/identifier.hpp
+++ b/modcc/identifier.hpp
@@ -49,7 +49,9 @@ enum class sourceKind {
     dt,
     time,
     ion_current,
+    ion_conductivity,
     ion_current_density,
+    ion_diffusive,
     ion_revpot,
     ion_iconc,
     ion_econc,
@@ -95,6 +97,7 @@ inline std::string to_string(sourceKind v) {
     case sourceKind::dt:                  return "dt";
     case sourceKind::ion_current:         return "ion_current";
     case sourceKind::ion_current_density: return "ion_current_density";
+    case sourceKind::ion_diffusive:       return "ion_diffusive";
     case sourceKind::ion_revpot:          return "ion_revpot";
     case sourceKind::ion_iconc:           return "ion_iconc";
     case sourceKind::ion_econc:           return "ion_econc";
@@ -122,6 +125,7 @@ inline sourceKind ion_source(const std::string& ion, const std::string& var, mod
     else if (var=="i"+ion) return mkind==moduleKind::density? sourceKind::ion_current_density: sourceKind::ion_current;
     else if (var=="e"+ion) return sourceKind::ion_revpot;
     else if (var==ion+"i") return sourceKind::ion_iconc;
+    else if (var==ion+"d") return sourceKind::ion_diffusive;
     else if (var==ion+"o") return sourceKind::ion_econc;
     else return sourceKind::no_source;
 }
diff --git a/modcc/module.cpp b/modcc/module.cpp
index 56d22f0326aad8df5b80736d76950a2a71864688..c167b0537f9d347da8d2f13f8b80a8c358effe3b 100644
--- a/modcc/module.cpp
+++ b/modcc/module.cpp
@@ -36,6 +36,7 @@ class NrnCurrentRewriter: public BlockRewriterBase {
                         if (src==sourceKind::current_density ||
                             src==sourceKind::current ||
                             src==sourceKind::ion_current_density ||
+                            src==sourceKind::ion_conductivity ||
                             src==sourceKind::ion_current)
                         {
                             return src;
@@ -49,12 +50,16 @@ class NrnCurrentRewriter: public BlockRewriterBase {
 
     bool has_current_update_ = false;
     std::set<std::string> current_vars_;
-    std::set<expression_ptr> conductivity_exps_;
+    std::map<std::string, expression_ptr> conductivity_exps_;
 
 public:
+
+    std::string non_specific_current = "";
     using BlockRewriterBase::visit;
 
     virtual void finalize() override {
+        // Take current name 'iX' and strip off leading 'i' to get ion name.
+        auto i2ion = [this](const auto& name) { return id("conductivity_" + name.substr(1) + "_"); };
         if (has_current_update_) {
             expression_ptr current_sum, conductivity_sum;
             for (auto& curr: current_vars_) {
@@ -66,13 +71,18 @@ public:
                             Location{}, std::move(current_sum), std::move(curr_id));
                 }
             }
-            for (auto& cond: conductivity_exps_) {
+            for (auto& [name, cond]: conductivity_exps_) {
                 if (!conductivity_sum) {
                     conductivity_sum = cond->clone();
                 } else {
                     conductivity_sum = make_expression<AddBinaryExpression>(
                             Location{}, std::move(conductivity_sum), cond->clone());
                 }
+                if (name != non_specific_current) {
+                    statements_.push_back(make_expression<AssignmentExpression>(loc_,
+                                                                                i2ion(name),
+                                                                                cond->clone()));
+                }
             }
             if (current_sum) {
                 statements_.push_back(make_expression<AssignmentExpression>(loc_,
@@ -93,13 +103,13 @@ public:
         sourceKind current_source = current_update(e);
         if (current_source != sourceKind::no_source) {
             has_current_update_ = true;
-
-            auto visited_current = current_vars_.count(e->lhs()->is_identifier()->name());
-            current_vars_.insert(e->lhs()->is_identifier()->name());
+            auto name = e->lhs()->is_identifier()->name();
+            auto visited_current = current_vars_.count(name);
+            current_vars_.insert(name);
 
             linear_test_result L = linear_test(e->rhs(), {"v"});
             if (L.coef.count("v") && !visited_current) {
-                conductivity_exps_.insert(L.coef.at("v")->clone());
+                conductivity_exps_[name] = L.coef.at("v")->clone();
             }
         }
     }
@@ -480,6 +490,9 @@ bool Module::semantic() {
     // compute_currents : update contributions to currents
     //..........................................................
     NrnCurrentRewriter compute_currents_rewriter;
+    // Register non-specific current name
+    if (neuron_block_.has_nonspecific_current()) compute_currents_rewriter.non_specific_current = neuron_block_.nonspecific_current.spelling;
+
     breakpoint->accept(&compute_currents_rewriter);
 
     for (auto& s: breakpoint->body()->statements()) {
@@ -677,7 +690,6 @@ void Module::add_variables_to_symbols() {
         // Otherwise create an indexed variable and associate it
         // with the state variable if present (via a different name)
         // for ion state updates.
-
         VariableExpression* state = nullptr;
         if (has_symbol(name)) {
             state = symbols_[name].get()->is_variable();
@@ -710,12 +722,18 @@ void Module::add_variables_to_symbols() {
         create_indexed_variable(i.spelling, current_kind, accessKind::noaccess, "", i.location);
     }
 
+    std::set<std::string> cond;
     for(auto const& ion : neuron_block_.ions) {
         for(auto const& var : ion.read) {
             update_ion_symbols(var, accessKind::read, ion.name);
         }
         for(auto const& var : ion.write) {
             update_ion_symbols(var, accessKind::write, ion.name);
+            auto name = "conductivity_" + ion.name + "_";
+            if (cond.find(name) == cond.end()) {
+                create_indexed_variable(name, sourceKind::ion_conductivity, accessKind::write, ion.name, var.location);
+                cond.insert(name);
+            }
         }
 
         if(ion.uses_valence()) {
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index 2758e43cfc116d95c9dba2e05a632d03dabb4e01..967ad1c63157b36c6d71cdf048d0c58916ef5b0a 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -52,21 +52,23 @@ struct index_prop {
 void emit_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
 void emit_simd_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
 void emit_masked_simd_procedure_proto(std::ostream&, ProcedureExpression*, const std::string&, const std::string& qualified = "");
-void emit_api_body(std::ostream&, APIMethod*, bool cv_loop = true, bool ppack_iface=true);
-void emit_simd_api_body(std::ostream&, APIMethod*, const std::vector<VariableExpression*>& scalars);
+void emit_api_body(std::ostream&, APIMethod*, bool cv_loop = true, bool ppack_iface=true, bool use_additive=false);
+void emit_simd_api_body(std::ostream&, APIMethod*, const std::vector<VariableExpression*>& scalars, bool use_additive);
 void emit_simd_index_initialize(std::ostream& out, const std::list<index_prop>& indices, simd_expr_constraint constraint);
 
 void emit_simd_body_for_loop(std::ostream& out,
                              BlockExpression* body,
                              const std::vector<LocalVariable*>& indexed_vars,
                              const std::list<index_prop>& indices,
-                             const simd_expr_constraint& constraint);
+                             const simd_expr_constraint& constraint,
+                             bool use_additive);
 
 void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
                                        const std::vector<LocalVariable*>& indexed_vars,
                                        const std::list<index_prop>& indices,
                                        const simd_expr_constraint& constraint,
-                                       std::string constraint_name);
+                                       std::string constraint_name,
+                                       bool use_additive);
 
 struct cprint {
     Expression* expr_;
@@ -230,11 +232,11 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
     }
 
     // Make implementations
-    auto emit_body = [&](APIMethod *p) {
+    auto emit_body = [&](APIMethod *p, bool add=false) {
         if (with_simd) {
-            emit_simd_api_body(out, p, vars.scalars);
+            emit_simd_api_body(out, p, vars.scalars, add);
         } else {
-            emit_api_body(out, p);
+            emit_api_body(out, p, true, true, add);
         }
     };
 
@@ -322,7 +324,7 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
     out << popindent << "}\n\n";
 
     out << "static void compute_currents(arb_mechanism_ppack* pp) {\n" << indent;
-    emit_body(current_api);
+    emit_body(current_api, true);
     out << popindent << "}\n\n";
 
     out << "static void write_ions(arb_mechanism_ppack* pp) {\n" << indent;
@@ -343,7 +345,7 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
                            pp_var_pfx,
                            net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name());
         out << indent << indent << indent << indent;
-        emit_api_body(out, net_receive_api, false, false);
+        emit_api_body(out, net_receive_api, false, false, true);
         out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n\n";
     } else {
         out << "static void apply_events(arb_mechanism_ppack*, arb_deliverable_event_stream*) {}\n\n";
@@ -563,7 +565,7 @@ void emit_state_read(std::ostream& out, LocalVariable* local) {
     ENTER(out);
     out << "arb_value_type " << cprint(local) << " = ";
 
-    if (local->is_read()) {
+    if (local->is_read() || (local->is_write() && decode_indexed_variable(local->external_variable()).additive)) {
         auto d = decode_indexed_variable(local->external_variable());
         out << scaled(d.scale) << deref(d) << ";\n";
     }
@@ -573,17 +575,23 @@ void emit_state_read(std::ostream& out, LocalVariable* local) {
     EXIT(out);
 }
 
-void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* external) {
+void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* external, bool use_additive) {
     if (!external->is_write()) return;
     ENTER(out);
     auto d = decode_indexed_variable(external);
-    double coeff = 1./d.scale;
-
-    if (d.readonly) {
-        throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
+    if (d.readonly) throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
+    std::string var, weight = pp_var_pfx + "weight[i_]", scale = scaled(1.0/d.scale), name = from->name();
+    double coeff = 1.0/d.scale;
+    {
+        std::stringstream v, s, w;
+        v << deref(d); var = v.str();
     }
-
-    if (d.accumulate) {
+    if (d.additive && use_additive) {
+            out << fmt::format("{3} -= {0};\n"
+                               "{0} = fma({1}{2}, {3}, {0});\n",
+                               var, scale, weight, name);
+    }
+    else if (d.accumulate) {
         out << deref(d) << " = fma("
             << scaled(coeff) << pp_var_pfx << "weight[i_], "
             << from->name() << ", " << deref(d) << ");\n";
@@ -594,7 +602,7 @@ void emit_state_update(std::ostream& out, Symbol* from, IndexedVariable* externa
     EXIT(out);
 }
 
-void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppack_iface) {
+void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppack_iface, bool use_additive) {
     ENTER(out);
     auto body = method->body();
     auto indexed_vars = indexed_locals(method->scope());
@@ -614,7 +622,7 @@ void emit_api_body(std::ostream& out, APIMethod* method, bool cv_loop, bool ppac
         out << cprint(body);
 
         for (auto& sym: indexed_vars) {
-            emit_state_update(out, sym, sym->external_variable());
+            emit_state_update(out, sym, sym->external_variable(), use_additive);
         }
         cv_loop && out << popindent << "}\n";
     }
@@ -768,7 +776,7 @@ void emit_simd_state_read(std::ostream& out, LocalVariable* local, simd_expr_con
     ENTER(out);
     out << "simd_value " << local->name();
 
-    if (local->is_read()) {
+    if (local->is_read() || (local->is_write() && decode_indexed_variable(local->external_variable()).additive)) {
         auto d = decode_indexed_variable(local->external_variable());
         if (d.scalar()) {
             out << " = simd_cast<simd_value>(" << pp_var_pfx << d.data_var
@@ -813,97 +821,88 @@ void emit_simd_state_read(std::ostream& out, LocalVariable* local, simd_expr_con
     EXIT(out);
 }
 
-void emit_simd_state_update(std::ostream& out, Symbol* from, IndexedVariable* external, simd_expr_constraint constraint) {
+void emit_simd_state_update(std::ostream& out, Symbol* from, IndexedVariable* external, simd_expr_constraint constraint, bool use_additive) {
     if (!external->is_write()) return;
+    ENTER(out);
 
     auto d = decode_indexed_variable(external);
-    double coeff = 1./d.scale;
-
     if (d.readonly) {
         throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
     }
 
-    ENTER(out);
+    auto ext = external->name();
+    auto name = from->name();
+    auto data = data_via_ppack(d);
+    auto node = node_index_i_name(d);
+    auto index = index_i_name(d.outer_index_var());
+
+    std::string scaled = name;
+    if (d.scale != 1.0) {
+        std::stringstream ss;
+        ss << "S::mul(" << name << ", simd_cast<simd_value>(" << as_c_double(1/d.scale) << "))";
+        scaled = ss.str();
+    }
 
-    if (d.accumulate) {
-        switch (d.index_var_kind) {
-            case index_kind::node: {
-                switch (constraint) {
-                case simd_expr_constraint::contiguous:
-                {
-                    std::string tempvar = "t_" + external->name();
-                    out << "simd_value " << tempvar << ";\n"
-                        << "assign(" << tempvar << ", indirect(" << data_via_ppack(d) << " + " << node_index_i_name(d) << ", simd_width_));\n";
-                    if (coeff != 1) {
-                        out << tempvar << " = S::fma(S::mul(w_, simd_cast<simd_value>(" << as_c_double(coeff) << "))," << from->name() << ", " << tempvar << ");\n";
-                    } else {
-                        out << tempvar << " = S::fma(w_, " << from->name() << ", " << tempvar << ");\n";
-                    }
-                    out << "indirect(" << data_via_ppack(d) << " + " << node_index_i_name(d) << ", simd_width_) = " << tempvar << ";\n";
-                    break;
-                }
-                case simd_expr_constraint::constant:
-                {
-                    out << "indirect(" << data_via_ppack(d) << ", simd_cast<simd_index>(" << node_index_i_name(d) << "), simd_width_, constraint_category_)";
-                    if (coeff != 1) {
-                        out << " += S::mul(w_, S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << "), " << from->name() << "));\n";
-                    } else {
-                        out << " += S::mul(w_, " << from->name() << ");\n";
-                    }
-                    break;
-                }
-                default :
-                {
-                    out << "indirect(" << data_via_ppack(d) << ", " << node_index_i_name(d) << ", simd_width_, constraint_category_)";
-                    if (coeff != 1) {
-                        out << " += S::mul(w_, S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << "), " << from->name() << "));\n";
-                    } else {
-                        out << " += S::mul(w_, " << from->name() << ");\n";
-                    }
-                }
-                }
-                break;
+    if (d.additive && use_additive) {
+        if (d.index_var_kind == index_kind::node) {
+            if (constraint == simd_expr_constraint::contiguous) {
+                    out << fmt::format("indirect({} + {}, simd_width_) = S::mul(w_, {});\n",
+                                       data, node, scaled);
             }
-            default: {
-                out << "indirect(" << data_via_ppack(d) << ", " << index_i_name(d.outer_index_var()) << ", simd_width_, index_constraint::none)";
-                if (coeff != 1) {
-                    out << " += S::mul(w_, S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << "), " << from->name() << "));\n";
-                } else {
-                    out << " += S::mul(w_, " << from->name() << ");\n";
-                }
-                break;
+            else {
+                    // We need this instead of simple assignment!
+                    out << fmt::format("{{\n"
+                                       "  simd_value t_{}0_ = simd_cast<simd_value>(0.0);\n"
+                                       "  assign(t_{}0_, indirect({}, simd_cast<simd_index>({}), simd_width_, constraint_category_));\n"
+                                       "  {} -= t_{}0_;\n"
+                                       "  indirect({}, simd_cast<simd_index>({}), simd_width_, constraint_category_) += S::mul(w_, {});\n"
+                                       "}}\n",
+                                       name,
+                                       name, data, node,
+                                       scaled, name,
+                                       data, node, scaled);
             }
         }
+        else {
+            out << fmt::format("indirect({}, {}, simd_width_, index_constraint::none) = S::mul(w_, {});\n",
+                               data, index, scaled);
+        }
     }
-    else {
-        switch (d.index_var_kind) {
-            case index_kind::node: {
-                switch (constraint) {
+    else if (d.accumulate) {
+        if (d.index_var_kind == index_kind::node) {
+            std::string tempvar = "t_" + external->name();
+            switch (constraint) {
                 case simd_expr_constraint::contiguous:
-                    out << "indirect(" << data_via_ppack(d) << " + " << node_index_i_name(d) << ", simd_width_) = ";
+                    out << "simd_value " << tempvar << ";\n"
+                        << "assign(" << tempvar << ", indirect(" << data << " + " << node << ", simd_width_));\n"
+                        << tempvar << " = S::fma(w_, " << scaled << ", " << tempvar << ");\n"
+                        << "indirect(" << data << " + " << node << ", simd_width_) = " << tempvar << ";\n";
                     break;
                 case simd_expr_constraint::constant:
-                    out << "indirect(" << data_via_ppack(d) << ", simd_cast<simd_index>(" << node_index_i_name(d) << "), simd_width_, constraint_category_) = ";
+                    out << "indirect(" << data << ", simd_cast<simd_index>(" << node << "), simd_width_, constraint_category_) += S::mul(w_, " << scaled << ");\n";
                     break;
                 default:
-                    out << "indirect(" << data_via_ppack(d) << ", " << node_index_i_name(d) << ", simd_width_, constraint_category_) = ";
-                }
-                break;
-            }
-            default: {
-                out << "indirect(" << data_via_ppack(d)
-                    << ", " << index_i_name(d.outer_index_var()) << ", simd_width_, index_constraint::none) = ";
-                break;
+                    out << "indirect(" << data << ", " << node << ", simd_width_, constraint_category_) += S::mul(w_, " << scaled << ");\n";
             }
-        }
-
-        if (coeff != 1) {
-            out << "(S::mul(simd_cast<simd_value>(" << as_c_double(coeff) << ")," << from->name() << "));\n";
         } else {
-            out << from->name() << ";\n";
+            out << "indirect(" << data << ", " << index << ", simd_width_, index_constraint::none) += S::mul(w_, " << scaled << ");\n";
         }
     }
-
+    else if (d.index_var_kind == index_kind::node) {
+        switch (constraint) {
+            case simd_expr_constraint::contiguous:
+                out << "indirect(" << data << " + " << node << ", simd_width_) = " << scaled << ";\n";
+                break;
+            case simd_expr_constraint::constant:
+                out << "indirect(" << data << ", simd_cast<simd_index>(" << node << "), simd_width_, constraint_category_) = " << scaled << ";\n";
+                break;
+            default:
+                out << "indirect(" << data << ", " << node << ", simd_width_, constraint_category_) = " << scaled << ";\n";
+        }
+    }
+    else {
+        out << "indirect(" << data << ", " << index << ", simd_width_, index_constraint::none) = " << scaled << ";\n";
+    }
     EXIT(out);
 }
 
@@ -959,7 +958,8 @@ void emit_simd_body_for_loop(
         const std::vector<LocalVariable*>& indexed_vars,
         const std::vector<VariableExpression*>& scalars,
         const std::list<index_prop>& indices,
-        const simd_expr_constraint& constraint) {
+        const simd_expr_constraint& constraint,
+        bool use_additive) {
     ENTER(out);
     emit_simd_index_initialize(out, indices, constraint);
 
@@ -973,17 +973,18 @@ void emit_simd_body_for_loop(
     out << printer;
 
     for (auto& sym: indexed_vars) {
-        emit_simd_state_update(out, sym, sym->external_variable(), constraint);
+        emit_simd_state_update(out, sym, sym->external_variable(), constraint, use_additive);
     }
     EXIT(out);
 }
 
 void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
-                                  const std::vector<LocalVariable*>& indexed_vars,
-                                  const std::vector<VariableExpression*>& scalars,
-                                  const std::list<index_prop>& indices,
-                                  const simd_expr_constraint& constraint,
-                                  std::string underlying_constraint_name) {
+                                       const std::vector<LocalVariable*>& indexed_vars,
+                                       const std::vector<VariableExpression*>& scalars,
+                                       const std::list<index_prop>& indices,
+                                       const simd_expr_constraint& constraint,
+                                       std::string underlying_constraint_name,
+                                       bool use_additive) {
     ENTER(out);
     out << fmt::format("constraint_category_ = index_constraint::{1};\n"
                        "for (auto i_ = 0ul; i_ < {0}index_constraints.n_{1}; i_++) {{\n"
@@ -995,13 +996,13 @@ void emit_simd_for_loop_per_constraint(std::ostream& out, BlockExpression* body,
                        "assign(w_, indirect(({}weight+index_), simd_width_));\n",
                        pp_var_pfx);
 
-    emit_simd_body_for_loop(out, body, indexed_vars, scalars, indices, constraint);
+    emit_simd_body_for_loop(out, body, indexed_vars, scalars, indices, constraint, use_additive);
 
     out << popindent << "}\n";
     EXIT(out);
 }
 
-void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<VariableExpression*>& scalars) {
+void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<VariableExpression*>& scalars, bool use_additive) {
     auto body = method->body();
     auto indexed_vars = indexed_locals(method->scope());
 
@@ -1023,25 +1024,25 @@ void emit_simd_api_body(std::ostream& out, APIMethod* method, const std::vector<
             simd_expr_constraint constraint = simd_expr_constraint::contiguous;
             std::string underlying_constraint = "contiguous";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
             //Generate for loop for all independent simd_vectors
             constraint = simd_expr_constraint::other;
             underlying_constraint = "independent";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
             //Generate for loop for all simd_vectors that have no optimizing constraints
             constraint = simd_expr_constraint::other;
             underlying_constraint = "none";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
             //Generate for loop for all constant simd_vectors
             constraint = simd_expr_constraint::constant;
             underlying_constraint = "constant";
 
-            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint);
+            emit_simd_for_loop_per_constraint(out, body, indexed_vars, scalars, indices, constraint, underlying_constraint, use_additive);
 
         }
         else {
diff --git a/modcc/printer/gpuprinter.cpp b/modcc/printer/gpuprinter.cpp
index 63c04d5cd1db3ed795b00cd957aaa816523adfce..aa3c76e221a370cf4f4b0eefc3c99f6297fe21d3 100644
--- a/modcc/printer/gpuprinter.cpp
+++ b/modcc/printer/gpuprinter.cpp
@@ -19,10 +19,19 @@ using io::indent;
 using io::popindent;
 using io::quote;
 
-void emit_api_body_cu(std::ostream& out, APIMethod* method, bool is_point_proc, bool cv_loop = true, bool ppack=true);
+static std::string scaled(double coeff) {
+    std::stringstream ss;
+    if (coeff != 1) {
+        ss << as_c_double(coeff) << '*';
+    }
+    return ss.str();
+}
+
+
+void emit_api_body_cu(std::ostream& out, APIMethod* method, bool is_point_proc, bool cv_loop = true, bool ppack=true, bool additive=false);
 void emit_procedure_body_cu(std::ostream& out, ProcedureExpression* proc);
 void emit_state_read_cu(std::ostream& out, LocalVariable* local);
-void emit_state_update_cu(std::ostream& out, Symbol* from, IndexedVariable* external, bool is_point_proc);
+void emit_state_update_cu(std::ostream& out, Symbol* from, IndexedVariable* external, bool is_point_proc, bool use_additive);
 
 const char* index_id(Symbol *s);
 
@@ -211,14 +220,14 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
     }
 
     // API methods as __global__ kernels.
-    auto emit_api_kernel = [&] (APIMethod* e) {
+    auto emit_api_kernel = [&] (APIMethod* e, bool additive=false) {
         // Only print the kernel if the method is not empty.
         if (!e->body()->statements().empty()) {
             out << "__global__\n"
                 << "void " << e->name() << "(arb_mechanism_ppack params_) {\n" << indent
                 << "int n_ = params_.width;\n"
                 << "int tid_ = threadIdx.x + blockDim.x*blockIdx.x;\n";
-            emit_api_body_cu(out, e, is_point_proc);
+            emit_api_body_cu(out, e, is_point_proc, true, true, additive);
             out << popindent << "}\n\n";
         }
     };
@@ -237,7 +246,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
                            pp_var_pfx);
     }
     emit_api_kernel(state_api);
-    emit_api_kernel(current_api);
+    emit_api_kernel(current_api, true);
     emit_api_kernel(write_ions_api);
 
     // event delivery
@@ -256,7 +265,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
                            net_receive_api->args().empty() ? "weight" : net_receive_api->args().front()->is_argument()->name(),
                            pp_var_pfx);
         out << indent << indent << indent << indent;
-        emit_api_body_cu(out, net_receive_api, is_point_proc, false, false);
+        emit_api_body_cu(out, net_receive_api, is_point_proc, false, false, false);
         out << popindent << "}\n" << popindent << "}\n" << popindent << "}\n" << popindent << "}\n";
     }
 
@@ -357,7 +366,7 @@ static std::string index_i_name(const std::string& index_var) {
     return index_var+"i_";
 }
 
-void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool cv_loop, bool ppack) {
+void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool cv_loop, bool ppack, bool additive) {
     auto body = e->body();
     auto indexed_vars = indexed_locals(e->scope());
 
@@ -435,7 +444,7 @@ void emit_api_body_cu(std::ostream& out, APIMethod* e, bool is_point_proc, bool
         out << cuprint(body);
 
         for (auto& sym: indexed_vars) {
-            emit_state_update_cu(out, sym, sym->external_variable(), is_point_proc);
+            emit_state_update_cu(out, sym, sym->external_variable(), is_point_proc, additive);
         }
         cv_loop && out << popindent << "}\n";
     }
@@ -463,7 +472,7 @@ namespace {
 void emit_state_read_cu(std::ostream& out, LocalVariable* local) {
     out << "arb_value_type " << cuprint(local) << " = ";
 
-    if (local->is_read()) {
+    if (local->is_read() || (local->is_write() && decode_indexed_variable(local->external_variable()).additive)) {
         auto d = decode_indexed_variable(local->external_variable());
         if (d.scale != 1) {
             out << as_c_double(d.scale) << "*";
@@ -477,36 +486,39 @@ void emit_state_read_cu(std::ostream& out, LocalVariable* local) {
 
 
 void emit_state_update_cu(std::ostream& out, Symbol* from,
-                          IndexedVariable* external, bool is_point_proc) {
+                          IndexedVariable* external, bool is_point_proc, bool use_additive) {
     if (!external->is_write()) return;
-
     auto d = decode_indexed_variable(external);
-    double coeff = 1./d.scale;
-
     if (d.readonly) {
         throw compiler_exception("Cannot assign to read-only external state: "+external->to_string());
     }
 
-    if (is_point_proc && d.accumulate) {
-        out << "::arb::gpu::reduce_by_key(";
-        if (coeff != 1) out << as_c_double(coeff) << '*';
-
-        out << pp_var_pfx << "weight[tid_]*" << from->name() << ',';
+    auto name   = from->name();
+    auto scale  = scaled(1.0/d.scale);
+    auto data   = pp_var_pfx + d.data_var;
+    auto index  = index_i_name(d.outer_index_var());
+    auto var    = deref(d);
+    auto weight = scale + pp_var_pfx + "weight[tid_]";
 
-        auto index_var = d.outer_index_var();
-        out << pp_var_pfx << d.data_var << ", " << index_i_name(index_var) << ", lane_mask_);\n";
+    if (d.additive && use_additive) {
+        out << name << " -= " << var << ";\n";
+        if (is_point_proc) {
+            out << fmt::format("::arb::gpu::reduce_by_key({}*{}, {}, {}, lane_mask_);\n", weight, name, data, index);
+        }
+        else {
+            out << var << " = fma(" << weight << ", " << name << ", " << var << ");\n";
+        }
     }
     else if (d.accumulate) {
-        out << deref(d) << " = fma(";
-        if (coeff != 1) out << as_c_double(coeff) << '*';
-
-        out << pp_var_pfx << "weight[tid_], " << from->name() << ", " << deref(d) << ");\n";
+        if (is_point_proc) {
+            out << "::arb::gpu::reduce_by_key(" << weight << "*" << name << ',' << data << ", " << index << ", lane_mask_);\n";
+        }
+        else {
+            out << var << " = fma(" << weight << ", " << name << ", " << var << ");\n";
+        }
     }
     else {
-        out << deref(d) << " = ";
-        if (coeff != 1) out << as_c_double(coeff) << '*';
-
-        out << from->name() << ";\n";
+        out << var << " = " << scale << name << ";\n";
     }
 }
 
diff --git a/modcc/printer/infoprinter.cpp b/modcc/printer/infoprinter.cpp
index c796b513097ac3ae9b9b3ba001efa0f297efefe8..c4211001e5cb73fdb17b518f00a3880c39f37178 100644
--- a/modcc/printer/infoprinter.cpp
+++ b/modcc/printer/infoprinter.cpp
@@ -52,9 +52,10 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op
                 id.unit_string(), val, lo, hi);
     };
     auto fmt_ion = [&](const auto& ion) {
-        return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {} }}"),
+        return fmt::format(FMT_COMPILE("{{ \"{}\", {}, {}, {}, {}, {}, {}, {}, {} }}"),
            ion.name,
            ion.writes_concentration_int(), ion.writes_concentration_ext(),
+           ion.uses_concentration_diff(),
            ion.writes_rev_potential(), ion.uses_rev_potential(),
            ion.uses_valence(), ion.verifies_valence(), ion.expected_valence);
     };
diff --git a/modcc/printer/printerutil.cpp b/modcc/printer/printerutil.cpp
index 72f208e99899d79d1de0ee6b489f36f8ebd4c62f..64914baca7f51a23188ed47ccc39dfc9e51f6ebd 100644
--- a/modcc/printer/printerutil.cpp
+++ b/modcc/printer/printerutil.cpp
@@ -141,6 +141,7 @@ ARB_LIBMODCC_API indexed_variable_info decode_indexed_variable(IndexedVariable*
     v.index_var_kind = index_kind::node;
     v.scale = 1;
     v.accumulate = true;
+    v.additive = false;
     v.readonly = true;
 
     std::string ion_pfx;
@@ -196,6 +197,11 @@ ARB_LIBMODCC_API indexed_variable_info decode_indexed_variable(IndexedVariable*
         v.scale = 0.1;
         v.readonly = false;
         break;
+    case sourceKind::ion_conductivity:
+        v.data_var = ion_pfx+".conductivity";
+        v.scale = 0.1;
+        v.readonly = false;
+        break;
     case sourceKind::ion_current:
         // unit scale; sourceKind for point processes updating an ionic current variable.
         v.data_var = ion_pfx+".current_density";
@@ -210,6 +216,12 @@ ARB_LIBMODCC_API indexed_variable_info decode_indexed_variable(IndexedVariable*
         v.data_var = ion_pfx+".internal_concentration";
         v.readonly = false;
         break;
+    case sourceKind::ion_diffusive:
+        v.data_var = ion_pfx+".diffusive_concentration";
+        v.readonly = false;
+        v.accumulate = false;
+        v.additive = true;
+        break;
     case sourceKind::ion_econc:
         v.data_var = ion_pfx+".external_concentration";
         v.readonly = false;
diff --git a/modcc/printer/printerutil.hpp b/modcc/printer/printerutil.hpp
index 608a231ad8afd5027bb9de8334868c64806da2be..bf3e406c877c4f53db77757339c4708ce33a937a 100644
--- a/modcc/printer/printerutil.hpp
+++ b/modcc/printer/printerutil.hpp
@@ -149,6 +149,7 @@ struct ARB_LIBMODCC_API indexed_variable_info {
 
     bool accumulate = true; // true => add with weight_ factor on assignment
     bool readonly = false;  // true => can never be assigned to by a mechanism
+    bool additive = false;  // only additive contributions allowed?
 
     // Scale is the conversion factor from the data variable
     // to the NMODL value.
diff --git a/python/cable_probes.cpp b/python/cable_probes.cpp
index 4ba583e322fd236b7f4297dc172875c1bce1b98c..a309578ad772965a3fbc65b82a91ec380a24089a 100644
--- a/python/cable_probes.cpp
+++ b/python/cable_probes.cpp
@@ -195,6 +195,14 @@ arb::probe_info cable_probe_ion_int_concentration_cell(const char* ion) {
     return arb::cable_probe_ion_int_concentration_cell{ion};
 }
 
+arb::probe_info cable_probe_ion_diff_concentration(const char* where, const char* ion) {
+    return arb::cable_probe_ion_diff_concentration{arborio::parse_locset_expression(where).unwrap(), ion};
+}
+
+arb::probe_info cable_probe_ion_diff_concentration_cell(const char* ion) {
+    return arb::cable_probe_ion_diff_concentration_cell{ion};
+}
+
 arb::probe_info cable_probe_ion_ext_concentration(const char* where, const char* ion) {
     return arb::cable_probe_ion_ext_concentration{arborio::parse_locset_expression(where).unwrap(), ion};
 }
@@ -284,6 +292,14 @@ void register_cable_probes(pybind11::module& m, pyarb_global_ptr global_ptr) {
         "Probe specification for cable cell internal ionic concentration for each cable in each CV.",
         "ion"_a);
 
+    m.def("cable_probe_ion_diff_concentration", &cable_probe_ion_diff_concentration,
+        "Probe specification for cable cell diffusive ionic concentration at points in a location set.",
+        "where"_a, "ion"_a);
+
+    m.def("cable_probe_ion_diff_concentration_cell", &cable_probe_ion_diff_concentration_cell,
+        "Probe specification for cable cell diffusive ionic concentration for each cable in each CV.",
+        "ion"_a);
+
     m.def("cable_probe_ion_ext_concentration", &cable_probe_ion_ext_concentration,
         "Probe specification for cable cell external ionic concentration at points in a location set.",
         "where"_a, "ion"_a);
diff --git a/python/cells.cpp b/python/cells.cpp
index f103764a2bc4aff51af23c8a98b8d5e2dc52e956..bd4f1ba91653f0cfc63c9949a4802322f2d0df1d 100644
--- a/python/cells.cpp
+++ b/python/cells.cpp
@@ -549,7 +549,7 @@ void register_cells(pybind11::module& m) {
             [](arb::cable_cell_global_properties& props, const char* ion,
                optional<double> valence, optional<double> int_con,
                optional<double> ext_con, optional<double> rev_pot,
-               pybind11::object method)
+               pybind11::object method, optional<double> diff)
             {
                 if (!props.ion_species.count(ion) && !valence) {
                     throw std::runtime_error(util::pprintf("New ion species: '{}', missing valence", ion));
@@ -560,6 +560,7 @@ void register_cells(pybind11::module& m) {
                 if (int_con) data.init_int_concentration  = *int_con;
                 if (ext_con) data.init_ext_concentration  = *ext_con;
                 if (rev_pot) data.init_reversal_potential = *rev_pot;
+                if (diff)    data.diffusivity             = *diff;
 
                 if (auto m = maybe_method(method)) {
                     props.default_parameters.reversal_potential_method[ion] = *m;
@@ -571,6 +572,7 @@ void register_cells(pybind11::module& m) {
             pybind11::arg_v("ext_con", pybind11::none(), "initial external concentration [mM]."),
             pybind11::arg_v("rev_pot", pybind11::none(), "reversal potential [mV]."),
             pybind11::arg_v("method",  pybind11::none(), "method for calculating reversal potential."),
+            pybind11::arg_v("diff",    pybind11::none(), "diffusivity [m^2/s]."),
             "Set the global default properties of ion species named 'ion'.\n"
             "There are 3 ion species predefined in arbor: 'ca', 'na' and 'k'.\n"
             "If 'ion' in not one of these ions it will be added to the list, making it\n"
@@ -620,11 +622,13 @@ void register_cells(pybind11::module& m) {
         .def("set_ion",
             [](arb::decor& d, const char* ion,
                optional<double> int_con, optional<double> ext_con,
-               optional<double> rev_pot, pybind11::object method)
+               optional<double> rev_pot, pybind11::object method,
+               optional<double> diff)
             {
                 if (int_con) d.set_default(arb::init_int_concentration{ion, *int_con});
                 if (ext_con) d.set_default(arb::init_ext_concentration{ion, *ext_con});
                 if (rev_pot) d.set_default(arb::init_reversal_potential{ion, *rev_pot});
+                if (diff)    d.set_default(arb::ion_diffusivity{ion, *diff});
                 if (auto m = maybe_method(method)) {
                     d.set_default(arb::ion_reversal_potential_method{ion, *m});
                 }
@@ -634,6 +638,7 @@ void register_cells(pybind11::module& m) {
             pybind11::arg_v("ext_con", pybind11::none(), "initial external concentration [mM]."),
             pybind11::arg_v("rev_pot", pybind11::none(), "reversal potential [mV]."),
             pybind11::arg_v("method",  pybind11::none(), "method for calculating reversal potential."),
+            pybind11::arg_v("diff",    pybind11::none(), "diffusivity [m^2/s]."),
             "Set the properties of ion species named 'ion' that will be applied\n"
             "by default everywhere on the cell. Species concentrations and reversal\n"
             "potential can be overridden on specific regions using the paint interface, \n"
@@ -668,16 +673,19 @@ void register_cells(pybind11::module& m) {
         // Paint ion species initial conditions on a region.
         .def("paint",
             [](arb::decor& dec, const char* region, const char* name,
-               optional<double> int_con, optional<double> ext_con, optional<double> rev_pot) {
+               optional<double> int_con, optional<double> ext_con,
+               optional<double> rev_pot, optional<double> diff) {
                 auto r = arborio::parse_region_expression(region).unwrap();
                 if (int_con) dec.paint(r, arb::init_int_concentration{name, *int_con});
                 if (ext_con) dec.paint(r, arb::init_ext_concentration{name, *ext_con});
                 if (rev_pot) dec.paint(r, arb::init_reversal_potential{name, *rev_pot});
+                if (diff)    dec.paint(r, arb::ion_diffusivity{name, *diff});
             },
             "region"_a, pybind11::kw_only(), "ion_name"_a,
             pybind11::arg_v("int_con", pybind11::none(), "Initial internal concentration [mM]"),
             pybind11::arg_v("ext_con", pybind11::none(), "Initial external concentration [mM]"),
             pybind11::arg_v("rev_pot", pybind11::none(), "Initial reversal potential [mV]"),
+            pybind11::arg_v("diff",    pybind11::none(), "Diffusivity [m^2/s]"),
             "Set ion species properties conditions on a region.")
         // Place synapses
         .def("place",
diff --git a/python/example/diffusion.py b/python/example/diffusion.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d9f06bd834049ed6b3e9212532bf815fc506d78
--- /dev/null
+++ b/python/example/diffusion.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+import arbor as A
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+
+
+class recipe(A.recipe):
+    def __init__(self, cell, probes):
+        A.recipe.__init__(self)
+        self.the_cell = cell
+        self.the_probes = probes
+        self.the_props = A.neuron_cable_properties()
+        self.the_props.catalogue = A.default_catalogue()
+
+    def num_cells(self):
+        return 1
+
+    def cell_kind(self, gid):
+        return A.cell_kind.cable
+
+    def cell_description(self, gid):
+        return self.the_cell
+
+    def probes(self, gid):
+        return self.the_probes
+
+    def global_properties(self, kind):
+        return self.the_props
+
+
+tree = A.segment_tree()
+s = tree.append(A.mnpos, A.mpoint(-3, 0, 0, 3), A.mpoint(3, 0, 0, 3), tag=1)
+_ = tree.append(s, A.mpoint(3, 0, 0, 1), A.mpoint(33, 0, 0, 1), tag=3)
+
+dec = A.decor()
+dec.set_property(Vm=-40)
+# dec.paint('(tag 1)', A.density('hh'))
+dec.discretization(A.cv_policy("(max-extent 5)"))
+
+# Set up ion diffusion
+dec.set_ion("na", int_con=1.0, ext_con=140, rev_pot=50, diff=0.005)
+dec.paint("(tag 1)", ion_name="na", int_con=100.0, diff=0.01)
+
+prb = [
+    A.cable_probe_ion_diff_concentration_cell("na"),
+]
+cel = A.cable_cell(tree, A.label_dict(), dec)
+rec = recipe(cel, prb)
+ctx = A.context()
+dom = A.partition_load_balance(rec, ctx)
+sim = A.simulation(rec, dom, ctx)
+hdl = (sim.sample((0, 0), A.regular_schedule(0.1)),)
+
+sim.run(tfinal=0.5)
+
+sns.set_theme()
+fg, ax = plt.subplots()
+for h in hdl:
+    for d, m in sim.samples(h):
+        xs = d[:, 0]
+        for lbl, ix in zip(m, range(1, d.shape[1])):
+            ys = d[:, ix]
+            print(lbl, ys.min(), ys.max())
+            ax.plot(xs, ys, label=lbl)
+ax.legend()
+fg.savefig("results.pdf")
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 98b5eeed9341e0c95ecc4b48424c196910be49dc..2fe2ff90d8ddeab4283d4bfe2879fab079be09f6 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -112,6 +112,7 @@ set(unit_sources
     test_forest.cpp
     test_fvm_layout.cpp
     test_fvm_lowered.cpp
+    test_diffusion.cpp
     test_index.cpp
     test_kinetic_linear.cpp
     test_lexcmp.cpp
diff --git a/test/unit/test_diffusion.cpp b/test/unit/test_diffusion.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..37e29222567d99f0be7d84d48d9af4e71c467dcd
--- /dev/null
+++ b/test/unit/test_diffusion.cpp
@@ -0,0 +1,288 @@
+#include <cmath>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "../gtest.h"
+
+#include <arborio/label_parse.hpp>
+
+#include <arbor/common_types.hpp>
+#include <arbor/domain_decomposition.hpp>
+#include <arbor/fvm_types.hpp>
+#include <arbor/load_balance.hpp>
+#include <arbor/math.hpp>
+#include <arbor/cable_cell.hpp>
+#include <arbor/recipe.hpp>
+#include <arbor/sampling.hpp>
+#include <arbor/simulation.hpp>
+#include <arbor/schedule.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/util/any_ptr.hpp>
+
+#include <arborenv/default_env.hpp>
+
+using namespace std::string_literals;
+using namespace arborio::literals;
+
+using namespace arb;
+
+constexpr double epsilon  = 1e-6;
+#ifdef ARB_GPU_ENABLED
+constexpr int    with_gpu = 0;
+#else
+constexpr int    with_gpu = -1;
+#endif
+
+struct linear: public recipe {
+    linear(double x, double d, double c): extent{x}, diameter{d}, cv_length{c} {
+        gprop.default_parameters = arb::neuron_parameter_defaults;
+        gprop.default_parameters.discretization = arb::cv_policy_max_extent{cv_length};
+        // Stick morphology
+        // -----x-----
+        segment_tree tree;
+        auto p = mnpos;
+        p = tree.append(p, { -extent, 0, 0, diameter}, {extent, 0, 0, diameter}, 1);
+        morph = {tree};
+    }
+
+    arb::cell_size_type num_cells()                             const override { return 1; }
+    arb::cell_kind get_cell_kind(arb::cell_gid_type)            const override { return arb::cell_kind::cable; }
+    std::any get_global_properties(arb::cell_kind)              const override { return gprop; }
+    std::vector<arb::probe_info> get_probes(arb::cell_gid_type) const override { return {arb::cable_probe_ion_diff_concentration_cell{"na"}}; }
+    util::unique_any get_cell_description(arb::cell_gid_type)   const override { return arb::cable_cell(morph, {}, decor); }
+
+    std::vector<arb::event_generator> event_generators(arb::cell_gid_type gid) const override {
+        std::vector<arb::event_generator> result;
+        for (const auto& [t, w]: inject_at) result.push_back(arb::explicit_generator({{{"Zap"}, t, w}}));
+        return result;
+    }
+
+    arb::cable_cell_global_properties gprop;
+    double extent = 1.0,
+           diameter = 1.0,
+           cv_length = 1.0;
+    std::vector<std::tuple<double, float>> inject_at;
+    morphology morph;
+    arb::decor decor;
+
+    linear& add_decay()  { decor.paint("(all)"_reg, arb::density("decay/x=na")); return *this; }
+    linear& add_inject() { decor.place("(location 0 0.5)"_ls, arb::synapse("inject/x=na", {{"alpha", 200.0*cv_length}}), "Zap"); return *this; }
+    linear& add_event(double t, float w) { inject_at.push_back({t, w}); return *this; }
+    linear& set_diffusivity(double d, std::optional<region> rg = {}) {
+        if (rg) decor.paint(*rg, ion_diffusivity{"na", d});
+        else    decor.set_default(ion_diffusivity{"na", d});
+        return *this;
+    }
+    linear& set_concentration(double d, std::optional<region> rg = {}) {
+        if (rg) decor.paint(*rg, init_int_concentration{"na", d});
+        else    decor.set_default(init_int_concentration{"na", d});
+        return *this;
+    }
+};
+
+using result_t = std::vector<std::tuple<double, double, double>>;
+
+testing::AssertionResult all_near(const result_t& a, const result_t& b, double eps) {
+    if (a.size() != b.size()) return testing::AssertionFailure() << "sequences differ in length";
+    std::stringstream res;
+    for (size_t ix = 0; ix < a.size(); ++ix) {
+        const auto&[ax, ay, az] = a[ix];
+        const auto&[bx, by, bz] = b[ix];
+        if (fabs(ax - bx) > eps) res << " X elements " << ax << " and " << bx << " differ at index " << ix << ".";
+        if (fabs(ay - by) > eps) res << " Y elements " << ay << " and " << by << " differ at index " << ix << ".";
+        if (fabs(az - bz) > eps) res << " Z elements " << az << " and " << bz << " differ at index " << ix << ".";
+    }
+    std::string str = res.str();
+    if (str.empty()) return testing::AssertionSuccess();
+    else             return testing::AssertionFailure() << str;
+}
+
+testing::AssertionResult run(const linear& rec, const result_t exp) {
+    result_t sample_values;
+    auto sampler = [&sample_values](arb::probe_metadata pm, std::size_t n, const arb::sample_record* samples) {
+        sample_values.clear();
+        auto ptr = arb::util::any_cast<const arb::mcable_list*>(pm.meta);
+        ASSERT_NE(ptr, nullptr);
+        auto n_cable = ptr->size();
+        for (std::size_t i = 0; i<n; ++i) {
+            const auto& [val, _ig] = *arb::util::any_cast<const arb::cable_sample_range*>(samples[i].data);
+            for (unsigned j = 0; j<n_cable; ++j) {
+                arb::mcable loc = (*ptr)[j];
+                sample_values.push_back({samples[i].time, loc.prox_pos, val[j]});
+            }
+        }
+    };
+    auto ctx = make_context({arbenv::default_concurrency(), with_gpu});
+    auto sim = simulation{rec, partition_load_balance(rec, ctx), ctx};
+    sim.add_sampler(arb::all_probes, arb::regular_schedule(0.1), sampler);
+    sim.run(0.11, 0.01);
+    return all_near(sample_values, exp, epsilon);
+}
+
+TEST(diffusion, errors) {
+    {
+        // Cannot R/W Xd w/o setting diffusivity
+        auto rec = linear{30, 3, 1}.add_decay();
+        ASSERT_THROW(run(rec, {}), illegal_diffusive_mechanism);
+    }
+    {
+        // Cannot R/W Xd w/o setting diffusivity
+        auto rec = linear{30, 3, 1}.add_inject();
+        ASSERT_THROW(run(rec, {}), illegal_diffusive_mechanism);
+    }
+    {
+        // No negative diffusivity
+        auto rec = linear{30, 3, 1}.set_diffusivity(-42.0, "(all)"_reg);
+        ASSERT_THROW(run(rec, {}), cable_cell_error);
+    }
+    {
+        // No negative diffusivity
+        auto rec = linear{30, 3, 1}.set_diffusivity(-42.0);
+        ASSERT_THROW(run(rec, {}), cable_cell_error);
+    }
+}
+
+TEST(diffusion, by_initial_concentration) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .set_concentration(0.1, "(cable 0 0.5 0.6)"_reg);
+    result_t exp = {{0.000000, 0.000000, 0.000000},
+                    {0.000000, 0.100000, 0.000000},
+                    {0.000000, 0.200000, 0.000000},
+                    {0.000000, 0.300000, 0.000000},
+                    {0.000000, 0.400000, 0.000000},
+                    {0.000000, 0.500000, 0.100000},
+                    {0.000000, 0.600000, 0.000000},
+                    {0.000000, 0.700000, 0.000000},
+                    {0.000000, 0.800000, 0.000000},
+                    {0.000000, 0.900000, 0.000000},
+                    {0.100000, 0.000000, 0.000000},
+                    {0.100000, 0.100000, 0.000000},
+                    {0.100000, 0.200000, 0.000000},
+                    {0.100000, 0.300000, 0.000023},
+                    {0.100000, 0.400000, 0.001991},
+                    {0.100000, 0.500000, 0.095973},
+                    {0.100000, 0.600000, 0.001991},
+                    {0.100000, 0.700000, 0.000023},
+                    {0.100000, 0.800000, 0.000000},
+                    {0.100000, 0.900000, 0.000000}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, by_event) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .add_inject()
+        .add_event(0, 0.005);
+    result_t exp = {{ 0.000000,  0.000000,  0.000000},
+                    { 0.000000,  0.100000,  0.000000},
+                    { 0.000000,  0.200000,  0.000000},
+                    { 0.000000,  0.300000,  0.000000},
+                    { 0.000000,  0.400000,  0.000000},
+                    { 0.000000,  0.500000, 53.051647},
+                    { 0.000000,  0.600000,  0.000000},
+                    { 0.000000,  0.700000,  0.000000},
+                    { 0.000000,  0.800000,  0.000000},
+                    { 0.000000,  0.900000,  0.000000},
+                    { 0.100000,  0.000000,  0.000000},
+                    { 0.100000,  0.100000,  0.000001},
+                    { 0.100000,  0.200000,  0.000100},
+                    { 0.100000,  0.300000,  0.012051},
+                    { 0.100000,  0.400000,  1.056130},
+                    { 0.100000,  0.500000, 50.915085},
+                    { 0.100000,  0.600000,  1.056130},
+                    { 0.100000,  0.700000,  0.012051},
+                    { 0.100000,  0.800000,  0.000100},
+                    { 0.100000,  0.900000,  0.000001}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, decay) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(1e-300)
+        .set_concentration(0.1)
+        .add_decay();
+    result_t exp = {{ 0.000000,  0.000000,  0.100000},
+                    { 0.000000,  0.100000,  0.100000},
+                    { 0.000000,  0.200000,  0.100000},
+                    { 0.000000,  0.300000,  0.100000},
+                    { 0.000000,  0.400000,  0.100000},
+                    { 0.000000,  0.500000,  0.100000},
+                    { 0.000000,  0.600000,  0.100000},
+                    { 0.000000,  0.700000,  0.100000},
+                    { 0.000000,  0.800000,  0.100000},
+                    { 0.000000,  0.900000,  0.100000},
+                    { 0.100000,  0.000000,  0.060647},
+                    { 0.100000,  0.100000,  0.060647},
+                    { 0.100000,  0.200000,  0.060647},
+                    { 0.100000,  0.300000,  0.060647},
+                    { 0.100000,  0.400000,  0.060647},
+                    { 0.100000,  0.500000,  0.060647},
+                    { 0.100000,  0.600000,  0.060647},
+                    { 0.100000,  0.700000,  0.060647},
+                    { 0.100000,  0.800000,  0.060647},
+                    { 0.100000,  0.900000,  0.060647}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, decay_by_initial_concentration) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .set_concentration(0.1, "(cable 0 0.5 0.6)"_reg)
+        .add_decay();
+    result_t exp = {{ 0.000000,  0.000000,  0.000000},
+                    { 0.000000,  0.100000,  0.000000},
+                    { 0.000000,  0.200000,  0.000000},
+                    { 0.000000,  0.300000,  0.000000},
+                    { 0.000000,  0.400000,  0.000000},
+                    { 0.000000,  0.500000,  0.100000},
+                    { 0.000000,  0.600000,  0.000000},
+                    { 0.000000,  0.700000,  0.000000},
+                    { 0.000000,  0.800000,  0.000000},
+                    { 0.000000,  0.900000,  0.000000},
+                    { 0.100000,  0.000000,  0.000000},
+                    { 0.100000,  0.100000,  0.000000},
+                    { 0.100000,  0.200000,  0.000000},
+                    { 0.100000,  0.300000,  0.000014},
+                    { 0.100000,  0.400000,  0.001207},
+                    { 0.100000,  0.500000,  0.058204},
+                    { 0.100000,  0.600000,  0.001207},
+                    { 0.100000,  0.700000,  0.000014},
+                    { 0.100000,  0.800000,  0.000000},
+                    { 0.100000,  0.900000,  0.000000}};
+    EXPECT_TRUE(run(rec, exp));
+}
+
+TEST(diffusion, decay_by_event) {
+    auto rec = linear{30, 3, 6}
+        .set_diffusivity(0.005)
+        .set_concentration(0.0)
+        .add_decay()
+        .add_inject()
+        .add_event(0, 0.005);
+    result_t exp = {{ 0.000000,  0.000000,  0.000000},
+                    { 0.000000,  0.100000,  0.000000},
+                    { 0.000000,  0.200000,  0.000000},
+                    { 0.000000,  0.300000,  0.000000},
+                    { 0.000000,  0.400000,  0.000000},
+                    { 0.000000,  0.500000, 53.051647},
+                    { 0.000000,  0.600000,  0.000000},
+                    { 0.000000,  0.700000,  0.000000},
+                    { 0.000000,  0.800000,  0.000000},
+                    { 0.000000,  0.900000,  0.000000},
+                    { 0.100000,  0.000000,  0.000000},
+                    { 0.100000,  0.100000,  0.000000},
+                    { 0.100000,  0.200000,  0.000061},
+                    { 0.100000,  0.300000,  0.007308},
+                    { 0.100000,  0.400000,  0.640508},
+                    { 0.100000,  0.500000, 30.878342},
+                    { 0.100000,  0.600000,  0.640508},
+                    { 0.100000,  0.700000,  0.007308},
+                    { 0.100000,  0.800000,  0.000061},
+                    { 0.100000,  0.900000,  0.000000}};
+    EXPECT_TRUE(run(rec, exp));
+}
diff --git a/test/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp
index e26c8db8a8f765ff0ea8f9774ab89f7f4a445c49..9bdd9bab6d0d96abd26b85b0afad6b53de84197a 100644
--- a/test/unit/test_fvm_layout.cpp
+++ b/test/unit/test_fvm_layout.cpp
@@ -1474,7 +1474,7 @@ TEST(fvm_layout, valence_verify) {
     EXPECT_THROW(fvm_build_mechanism_data(gprop, cells, gids, gj_conns, D), cable_cell_error);
 
     // Adding ion, should be fine now:
-    gprop.default_parameters.ion_data["cl"] = { 1., 1., 0. };
+    gprop.default_parameters.ion_data["cl"] = { 1., 1., 0., 0. };
     gprop.ion_species["cl"] = -1;
     EXPECT_NO_THROW(fvm_build_mechanism_data(gprop, cells, gids, gj_conns, D));
 
diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp
index b07208fdcf85c9445239d685027621936b4274e4..d9d1aa76c7ceebc0eb73f2560b4d51cbda281bae 100644
--- a/test/unit/test_fvm_lowered.cpp
+++ b/test/unit/test_fvm_lowered.cpp
@@ -48,8 +48,7 @@ using fvm_cell = arb::fvm_lowered_cell_impl<backend>;
 using shared_state = backend::shared_state;
 ACCESS_BIND(std::unique_ptr<shared_state> fvm_cell::*, private_state_ptr, &fvm_cell::state_)
 
-using matrix = arb::matrix<arb::multicore::backend>;
-ACCESS_BIND(matrix fvm_cell::*, private_matrix_ptr, &fvm_cell::matrix_)
+using matrix = arb::multicore::cable_solver;
 
 ACCESS_BIND(std::vector<arb::mechanism_ptr> fvm_cell::*, private_mechanisms_ptr, &fvm_cell::mechanisms_)
 
@@ -202,8 +201,8 @@ TEST(fvm_lowered, matrix_init)
     fvm_cell fvcell(*context);
     fvcell.initialize({0}, cable1d_recipe(cell));
 
-    auto& J = fvcell.*private_matrix_ptr;
     auto& S = fvcell.*private_state_ptr;
+    auto& J = S->solver;
     EXPECT_EQ(J.size(), 12u);
 
     // Test that the matrix is initialized with sensible values
@@ -211,14 +210,13 @@ TEST(fvm_lowered, matrix_init)
     fvcell.integrate(0.01, 0.01, {}, {});
 
     auto n = J.size();
-    auto& mat = J.state_;
 
-    EXPECT_FALSE(arb::util::any_of(util::subrange_view(mat.u, 1, n), isnan));
-    EXPECT_FALSE(arb::util::any_of(mat.d, isnan));
+    EXPECT_FALSE(arb::util::any_of(util::subrange_view(J.u, 1, n), isnan));
+    EXPECT_FALSE(arb::util::any_of(J.d, isnan));
     EXPECT_FALSE(arb::util::any_of(S->voltage, isnan));
 
-    EXPECT_FALSE(arb::util::any_of(util::subrange_view(mat.u, 1, n), ispos));
-    EXPECT_FALSE(arb::util::any_of(mat.d, isneg));
+    EXPECT_FALSE(arb::util::any_of(util::subrange_view(J.u, 1, n), ispos));
+    EXPECT_FALSE(arb::util::any_of(J.d, isneg));
 }
 
 TEST(fvm_lowered, target_handles) {
diff --git a/test/unit/test_matrix.cpp b/test/unit/test_matrix.cpp
index f63bf86fe9473cb4cbb63fe415fa5ce60a9cc664..12959eb63e11fea524b452f9b94d4de67b488f8e 100644
--- a/test/unit/test_matrix.cpp
+++ b/test/unit/test_matrix.cpp
@@ -5,7 +5,6 @@
 
 #include <arbor/math.hpp>
 
-#include "matrix.hpp"
 #include "backends/multicore/fvm.hpp"
 #include "util/rangeutil.hpp"
 #include "util/span.hpp"
@@ -16,21 +15,21 @@ using namespace arb;
 
 using backend     = multicore::backend;
 using array       = backend::array;
-using matrix_type = matrix<backend>;
-using index_type  = matrix_type::index_type;
-using value_type  = matrix_type::value_type;
+using solver_type = backend::cable_solver;
+using index_type  = arb_index_type;
+using value_type  = arb_value_type;
 
 using vvec = std::vector<value_type>;
 
 TEST(matrix, construct_from_parent_only)
 {
     std::vector<index_type> p = {0,0,1};
-    matrix_type m(p, {0, 3}, vvec(3), vvec(3), vvec(3), {0});
+    solver_type m(p, {0, 3}, vvec(3), vvec(3), vvec(3), {0});
     EXPECT_EQ(m.num_cells(), 1u);
     EXPECT_EQ(m.size(), 3u);
     EXPECT_EQ(p.size(), 3u);
 
-    auto mp = m.p();
+    auto mp = m.parent_index;
     EXPECT_EQ(mp[0], index_type(0));
     EXPECT_EQ(mp[1], index_type(0));
     EXPECT_EQ(mp[2], index_type(1));
@@ -43,11 +42,10 @@ TEST(matrix, solve_host)
 
     // trivial case : 1x1 matrix
     {
-        matrix_type m({0}, {0,1}, vvec(1), vvec(1), vvec(1), {0});
-        auto& state = m.state_;
-        fill(state.d,  2);
-        fill(state.u, -1);
-        fill(state.rhs,1);
+        solver_type m({0}, {0,1}, vvec(1), vvec(1), vvec(1), {0});
+        fill(m.d,  2);
+        fill(m.u, -1);
+        fill(m.rhs,1);
 
         auto x = array({0});
         m.solve(x);
@@ -60,16 +58,14 @@ TEST(matrix, solve_host)
         for(auto n : make_span(2, 1001)) {
             auto p = std::vector<index_type>(n);
             std::iota(p.begin()+1, p.end(), 0);
-            matrix_type m(p, {0, n}, vvec(n), vvec(n), vvec(n), {0});
+            solver_type m(p, {0, n}, vvec(n), vvec(n), vvec(n), {0});
 
             EXPECT_EQ(m.size(), (unsigned)n);
             EXPECT_EQ(m.num_cells(), 1u);
 
-            auto& A = m.state_;
-
-            fill(A.d,  2);
-            fill(A.u, -1);
-            fill(A.rhs,1);
+            fill(m.d,  2);
+            fill(m.u, -1);
+            fill(m.rhs,1);
 
 
             auto x = array();
@@ -100,15 +96,14 @@ TEST(matrix, zero_diagonal)
     std::vector<index_type> p = {0, 0, 1, 3, 3, 5, 5};
     std::vector<index_type> c = {0, 3, 5, 7};
     std::vector<index_type> i = {0, 1, 2};
-    matrix_type m(p, c, vvec(7), vvec(7), vvec(7), i);
+    solver_type m(p, c, vvec(7), vvec(7), vvec(7), i);
 
     EXPECT_EQ(7u, m.size());
     EXPECT_EQ(3u, m.num_cells());
 
-    auto& A = m.state_;
-    assign(A.d,   vvec({2,  3,  2, 0,  0,  4,  5}));
-    assign(A.u,   vvec({0, -1, -1, 0, -1,  0, -2}));
-    assign(A.rhs, vvec({3,  5,  7, 7,  8, 16, 32}));
+    assign(m.d,   vvec({2,  3,  2, 0,  0,  4,  5}));
+    assign(m.u,   vvec({0, -1, -1, 0, -1,  0, -2}));
+    assign(m.rhs, vvec({3,  5,  7, 7,  8, 16, 32}));
 
     // Expected solution:
     std::vector<value_type> expected = {4, 5, 6, 7, 8, 9, 10};
@@ -125,7 +120,7 @@ TEST(matrix, zero_diagonal_assembled)
     // test case from CV data.
 
     using util::assign;
-    using array = matrix_type::array;
+    using array = solver_type::array;
 
     // Combined matrix may have zero-blocks, corresponding to a zero dt.
     // Zero-blocks are indicated by zero value in the diagonal (the off-diagonal
@@ -162,7 +157,7 @@ TEST(matrix, zero_diagonal_assembled)
     // Expected solution:
     // x = [ 4 5 6 7 8 9 10 ]
 
-    matrix_type m(p, c, Cm, g, area, s);
+    solver_type m(p, c, Cm, g, area, s);
     m.assemble(dt, v, i, mg);
 
     auto x = array({0, 0, 0, 0, 0, 0, 0});
@@ -184,4 +179,3 @@ TEST(matrix, zero_diagonal_assembled)
 
     EXPECT_TRUE(testing::seq_almost_eq<double>(expected, x));
 }
-
diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
index 9212f3b0c95a968d4eac054065d15c64a3ae47e3..8a00ddf4e8e27cdca67f36d9850ac08a1bcec860 100644
--- a/test/unit/test_matrix_cpuvsgpu.cpp
+++ b/test/unit/test_matrix_cpuvsgpu.cpp
@@ -36,8 +36,8 @@ using std::end;
 //  * matrices all have same size and structure
 TEST(matrix, assemble)
 {
-    using gpu_state = gpu::backend::matrix_state;
-    using mc_state  = multicore::backend::matrix_state;
+    using gpu_state = gpu::backend::cable_solver;
+    using mc_state  = multicore::backend::cable_solver;
 
     using T = fvm_value_type;
     using I = fvm_index_type;
diff --git a/test/unit/test_s_expr.cpp b/test/unit/test_s_expr.cpp
index f8aa256bfdfb612752b6ea03405a4b5612671cb0..be436e78e924610e5619b7a345c7eda422a210a1 100644
--- a/test/unit/test_s_expr.cpp
+++ b/test/unit/test_s_expr.cpp
@@ -479,6 +479,9 @@ std::ostream& operator<<(std::ostream& o, const membrane_capacitance& p) {
 std::ostream& operator<<(std::ostream& o, const init_int_concentration& p) {
     return o << "(ion-internal-concentration \"" << p.ion << "\" " << p.value << ')';
 }
+std::ostream& operator<<(std::ostream& o, const ion_diffusivity& p) {
+    return o << "(ion-diffusivity \"" << p.ion << "\" " << p.value << ')';
+}
 std::ostream& operator<<(std::ostream& o, const init_ext_concentration& p) {
     return o << "(ion-external-concentration \"" << p.ion << "\" " << p.value << ')';
 }