From c976c6660c9320c68b8f6916baf77eb61b564029 Mon Sep 17 00:00:00 2001
From: boeschf <48126478+boeschf@users.noreply.github.com>
Date: Wed, 5 Oct 2022 10:04:01 +0200
Subject: [PATCH] SDE (#1884)

Main changes
- uncorrelated and independently distributed white noise generation for point and density mechanisms
- enabled by extending nmodl dialect and adjusting modcc (WHITE_NOISE block, stochastic solver method)
- SDEs are solved by Euler-Maruyama method (synapse collapsing disabled in this case)
- CPU and GPU backends responsible for creating random numbers using random123
- simulation takes a seed value
- bumped ABI due to addition of random numbers in ppack
Incidental changes
- builder pattern for simulation
- pimpl idiom supported by util classes
---
 CMakeLists.txt                                |  11 +
 arbor/CMakeLists.txt                          |   8 +
 arbor/backends/gpu/chunk_writer.hpp           |  54 +
 arbor/backends/gpu/gpu_store_types.hpp        |   1 +
 arbor/backends/gpu/rand.cpp                   |  62 ++
 arbor/backends/gpu/rand.cu                    |  64 ++
 arbor/backends/gpu/rand.hpp                   |  37 +
 arbor/backends/gpu/rand_on_cpu.hpp            |  47 +
 arbor/backends/gpu/rand_on_gpu.hpp            |  47 +
 arbor/backends/gpu/shared_state.cpp           |  61 +-
 arbor/backends/gpu/shared_state.hpp           |   9 +-
 arbor/backends/gpu/threshold_watcher.hpp      |   1 -
 arbor/backends/multicore/rand.cpp             |  33 +
 arbor/backends/multicore/shared_state.cpp     |  48 +-
 arbor/backends/multicore/shared_state.hpp     |  14 +-
 .../backends/multicore/threshold_watcher.hpp  |   3 +-
 arbor/backends/rand_fwd.hpp                   |  34 +
 arbor/backends/rand_impl.hpp                  |  35 +
 arbor/cell_group_factory.cpp                  |   6 +-
 arbor/cell_group_factory.hpp                  |   2 +-
 arbor/fvm_layout.cpp                          |   2 +-
 arbor/fvm_lowered_cell.hpp                    |   3 +-
 arbor/fvm_lowered_cell_impl.cpp               |   7 +-
 arbor/fvm_lowered_cell_impl.hpp               |  29 +-
 arbor/include/arbor/arb_types.inc             |   1 +
 arbor/include/arbor/mechanism.hpp             |   3 +
 arbor/include/arbor/mechanism_abi.h           |  13 +-
 arbor/include/arbor/mechinfo.hpp              |   7 +
 arbor/include/arbor/simulation.hpp            |  82 +-
 arbor/include/git-source-id                   |   2 +
 arbor/mechinfo.cpp                            |   4 +
 arbor/simulation.cpp                          |  16 +-
 arbor/util/pimpl.hpp                          |  40 +
 arbor/util/pimpl_src.hpp                      |  58 ++
 arbor/util/unwind.hpp                         |   4 +-
 doc/concepts/interconnectivity.rst            |   2 +-
 doc/concepts/mechanisms.rst                   |  54 +
 doc/concepts/simulation.rst                   |   9 +-
 doc/cpp/simulation.rst                        |  23 +-
 doc/dev/index.rst                             |   1 +
 doc/dev/sde.rst                               | 132 +++
 doc/fileformat/nmodl.rst                      |  39 +
 doc/python/morphology.rst                     |   3 +-
 doc/python/simulation.rst                     |  11 +-
 modcc/blocks.cpp                              |   7 +
 modcc/blocks.hpp                              |  18 +-
 modcc/expression.cpp                          |  14 +
 modcc/expression.hpp                          |  33 +-
 modcc/module.cpp                              |  82 +-
 modcc/module.hpp                              |   5 +
 modcc/parser.cpp                              |  66 ++
 modcc/parser.hpp                              |   1 +
 modcc/printer/cprinter.cpp                    |  13 +-
 modcc/printer/cprinter.hpp                    |   2 +
 modcc/printer/gpuprinter.cpp                  |   7 +-
 modcc/printer/gpuprinter.hpp                  |   1 +
 modcc/printer/infoprinter.cpp                 |   8 +-
 modcc/printer/printerutil.cpp                 |   7 +
 modcc/printer/printerutil.hpp                 |   1 +
 modcc/solvers.cpp                             | 163 +++
 modcc/solvers.hpp                             |  40 +
 modcc/symdiff.cpp                             |   6 +-
 modcc/symdiff.hpp                             |   1 +
 modcc/token.cpp                               |   3 +
 modcc/token.hpp                               |   3 +-
 modcc/visitor.hpp                             |   7 +-
 python/cells.cpp                              |   2 +
 python/simulation.cpp                         |  12 +-
 test/CMakeLists.txt                           |   5 +-
 test/unit-modcc/mod_files/test10.mod          |  35 +
 test/unit-modcc/mod_files/test9.mod           |  31 +
 test/unit-modcc/test_module.cpp               |  48 +-
 test/unit-modcc/test_parser.cpp               |  26 +
 test/unit/CMakeLists.txt                      |   6 +
 test/unit/test_matrix_cpuvsgpu.cpp            |   5 +-
 test/unit/test_sde.cpp                        | 973 ++++++++++++++++++
 test/unit/test_simulation.cpp                 |  24 +
 ...n_reverting_stochastic_density_process.mod |  26 +
 ..._reverting_stochastic_density_process2.mod |  29 +
 .../mean_reverting_stochastic_process.mod     |  42 +
 .../mean_reverting_stochastic_process2.mod    |  36 +
 test/unit/testing/stochastic_volatility.mod   |  42 +
 82 files changed, 2830 insertions(+), 122 deletions(-)
 create mode 100644 arbor/backends/gpu/chunk_writer.hpp
 create mode 100644 arbor/backends/gpu/rand.cpp
 create mode 100644 arbor/backends/gpu/rand.cu
 create mode 100644 arbor/backends/gpu/rand.hpp
 create mode 100644 arbor/backends/gpu/rand_on_cpu.hpp
 create mode 100644 arbor/backends/gpu/rand_on_gpu.hpp
 create mode 100644 arbor/backends/multicore/rand.cpp
 create mode 100644 arbor/backends/rand_fwd.hpp
 create mode 100644 arbor/backends/rand_impl.hpp
 create mode 100644 arbor/util/pimpl.hpp
 create mode 100644 arbor/util/pimpl_src.hpp
 create mode 100644 doc/dev/sde.rst
 create mode 100644 test/unit-modcc/mod_files/test10.mod
 create mode 100644 test/unit-modcc/mod_files/test9.mod
 create mode 100644 test/unit/test_sde.cpp
 create mode 100644 test/unit/testing/mean_reverting_stochastic_density_process.mod
 create mode 100644 test/unit/testing/mean_reverting_stochastic_density_process2.mod
 create mode 100644 test/unit/testing/mean_reverting_stochastic_process.mod
 create mode 100644 test/unit/testing/mean_reverting_stochastic_process2.mod
 create mode 100644 test/unit/testing/stochastic_volatility.mod

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c3c1c85e..3c76befd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,6 +46,8 @@ option(ARB_BACKTRACE "Enable stacktraces on assertion and exceptions (requires B
 
 set(ARB_GPU "none" CACHE STRING "GPU backend and compiler configuration")
 set_property(CACHE PROPERTY STRINGS "none" "cuda" "cuda-clang" "hip")
+option(ARB_USE_GPU_RNG
+    "Use GPU generated random numbers (only cuda, not bitwise equal to CPU version)" OFF)
 
 # Use bundled 3rd party libraries
 
@@ -405,6 +407,15 @@ if(ARB_VECTORIZE)
     list(APPEND ARB_MODCC_FLAGS "--simd")
 endif()
 
+# Random number creation
+# -----------------------------------------------
+
+if(ARB_USE_GPU_RNG AND (ARB_WITH_NVCC OR ARB_WITH_CUDA_CLANG))
+    set(ARB_USE_GPU_RNG_IMPL TRUE)
+else()
+    set(ARB_USE_GPU_RNG_IMPL FALSE)
+endif()
+
 #----------------------------------------------------------
 # Set up install paths, permissions.
 #----------------------------------------------------------
diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt
index 0e79aefc..b58ebff4 100644
--- a/arbor/CMakeLists.txt
+++ b/arbor/CMakeLists.txt
@@ -4,6 +4,7 @@ set(arbor_sources
     arbexcept.cpp
     assert.cpp
     backends/multicore/shared_state.cpp
+    backends/multicore/rand.cpp
     communication/communicator.cpp
     communication/dry_run_context.cpp
     benchmark_cell_group.cpp
@@ -81,8 +82,15 @@ if(ARB_WITH_GPU)
         backends/gpu/forest.cpp
         backends/gpu/stimulus.cu
         backends/gpu/threshold_watcher.cu
+        backends/gpu/rand.cpp
         memory/fill.cu
     )
+    if (ARB_USE_GPU_RNG_IMPL)
+        list(APPEND arbor_sources backends/gpu/rand.cu)
+    else()
+        set_source_files_properties(backends/gpu/rand.cpp PROPERTIES
+            COMPILE_DEFINITIONS ARB_ARBOR_NO_GPU_RAND)
+    endif()
 endif()
 
 if(ARB_WITH_MPI)
diff --git a/arbor/backends/gpu/chunk_writer.hpp b/arbor/backends/gpu/chunk_writer.hpp
new file mode 100644
index 00000000..59647e34
--- /dev/null
+++ b/arbor/backends/gpu/chunk_writer.hpp
@@ -0,0 +1,54 @@
+#include <cstddef>
+
+#include "memory/memory.hpp"
+#include "memory/copy.hpp"
+#include "util/meta.hpp"
+
+#pragma once
+
+namespace arb {
+namespace gpu {
+namespace {
+template <typename T>
+struct chunk_writer {
+    T* end; // device ptr
+    const std::size_t stride;
+
+    chunk_writer(T* data, std::size_t stride): end(data), stride(stride) {}
+
+    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
+    T* append(Seq&& seq) {
+        arb_assert(std::size(seq)==stride);
+        return append_freely(std::forward<Seq>(seq));
+    }
+
+    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
+    T* append_freely(Seq&& seq) {
+        std::size_t n = std::size(seq);
+        memory::copy(memory::host_view<T>(const_cast<T*>(std::data(seq)), n), memory::device_view<T>(end, n));
+        auto p = end;
+        end += n;
+        return p;
+    }
+
+    T* fill(T value) {
+        memory::fill(memory::device_view<T>(end, stride), value);
+        auto p = end;
+        end += stride;
+        return p;
+    }
+
+    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
+    T* append_with_padding(Seq&& seq, typename util::sequence_traits<Seq>::value_type value) {
+        std::size_t n = std::size(seq);
+        arb_assert(n <= stride);
+        std::size_t r = stride - n;
+        auto p = append_freely(std::forward<Seq>(seq));
+        memory::fill(memory::device_view<typename util::sequence_traits<Seq>::value_type>(end, r), value);
+        end += r;
+        return p;
+    }
+};
+} // anonymous namespace
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/gpu_store_types.hpp b/arbor/backends/gpu/gpu_store_types.hpp
index 51073c18..c20c086f 100644
--- a/arbor/backends/gpu/gpu_store_types.hpp
+++ b/arbor/backends/gpu/gpu_store_types.hpp
@@ -17,6 +17,7 @@ namespace gpu {
 
 using array  = memory::device_vector<arb_value_type>;
 using iarray = memory::device_vector<arb_index_type>;
+using sarray = memory::device_vector<arb_size_type>;
 
 using deliverable_event_stream = arb::gpu::multi_event_stream<deliverable_event>;
 using sample_event_stream = arb::gpu::multi_event_stream<sample_event>;
diff --git a/arbor/backends/gpu/rand.cpp b/arbor/backends/gpu/rand.cpp
new file mode 100644
index 00000000..513394f7
--- /dev/null
+++ b/arbor/backends/gpu/rand.cpp
@@ -0,0 +1,62 @@
+#include <util/span.hpp>
+#include <backends/gpu/chunk_writer.hpp>
+#include <backends/gpu/rand.hpp>
+
+#ifdef ARB_ARBOR_NO_GPU_RAND
+#include <backends/gpu/rand_on_cpu.hpp>
+#else
+#include <backends/gpu/rand_on_gpu.hpp>
+#endif
+
+ARB_INSTANTIATE_PIMPL(arb::gpu::random_numbers::impl)
+
+namespace arb {
+namespace gpu {
+
+void random_numbers::instantiate(mechanism& m, std::size_t value_width_padded,
+    const mechanism_layout& pos_data, arb_seed_type seed) {
+
+    using util::make_span;
+
+    // Allocate view pointers for random nubers
+    std::size_t num_random_numbers_per_cv = m.mech_.n_random_variables;
+    std::size_t random_number_storage = num_random_numbers_per_cv*cbprng::cache_size();
+    for (auto& v : random_numbers_) v.resize(num_random_numbers_per_cv);
+
+    // Allocate bulk storage
+    std::size_t count = random_number_storage*value_width_padded;
+    data_ = array(count, NAN);
+
+    // Set random numbers
+    chunk_writer writer(data_.data(), value_width_padded);
+    for (auto idx_v: make_span(num_random_numbers_per_cv)) {
+        for (auto idx_c: make_span(cbprng::cache_size())) {
+            random_numbers_[idx_c][idx_v] = writer.fill(0);
+        }
+    }
+
+    // Shift data to GPU
+    for (auto idx_c: make_span(cbprng::cache_size()))
+        random_numbers_d_[idx_c] = memory::on_gpu(random_numbers_[idx_c]);
+
+    // Instantiate implementation details
+    impl_ = util::make_pimpl<impl>(random_numbers_[0][0], m, value_width_padded, pos_data, seed);
+}
+
+void random_numbers::update(mechanism& m) {
+    // Assign new random numbers by selecting the next cache
+    const auto counter = random_number_update_counter_++;
+    const auto cache_idx = cbprng::cache_index(counter);
+    m.ppack_.random_numbers = random_numbers_d_[cache_idx].data();
+
+    // Generate random numbers every cbprng::cache_size() iterations:
+    // For each random variable we will generate cbprng::cache_size() values per site
+    // and there are width sites.
+    // The RNG will be seeded by a global seed, the mechanism id, the variable index, the
+    // current site's global cell, the site index within its cell and a counter representing
+    // time.
+    if (cache_idx == 0) impl_->update(m, counter);
+}
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/rand.cu b/arbor/backends/gpu/rand.cu
new file mode 100644
index 00000000..e31c211e
--- /dev/null
+++ b/arbor/backends/gpu/rand.cu
@@ -0,0 +1,64 @@
+#include <arbor/gpu/gpu_api.hpp>
+#include <arbor/gpu/gpu_common.hpp>
+
+#include "backends/rand_impl.hpp"
+
+namespace arb {
+namespace gpu {
+
+namespace kernel {
+__global__
+void generate_random_numbers(
+    arb_value_type* __restrict__ dst,
+    std::size_t width,
+    std::size_t width_padded,
+    std::size_t num_rv,
+    arb::cbprng::value_type seed,
+    arb::cbprng::value_type mech_id,
+    arb::cbprng::value_type counter,
+    arb_size_type const * __restrict__ gids,
+    arb_size_type const * __restrict__ idxs,
+    unsigned cache_size) {
+    // location and variable number extracted from thread block
+    const int i = threadIdx.x + blockDim.x*blockIdx.x;
+    const arb::cbprng::value_type n = blockIdx.y;
+
+    if (i < width) {
+        const arb::cbprng::value_type gid = gids[i];
+        const arb::cbprng::value_type idx = idxs[i];
+        const auto r = arb::cbprng::generator{}(arb::cbprng::array_type{seed, mech_id, n, counter},
+            arb::cbprng::array_type{gid, idx, 0xdeadf00dull, 0xdeadbeefull});
+        const auto a = r123::boxmuller(r[0], r[1]);
+        const auto b = r123::boxmuller(r[2], r[3]);
+        dst[i + width_padded*(0 + cache_size*n)] = a.x;
+        dst[i + width_padded*(1 + cache_size*n)] = a.y;
+        dst[i + width_padded*(2 + cache_size*n)] = b.x;
+        dst[i + width_padded*(3 + cache_size*n)] = b.y;
+    }
+}
+} // namespace kernel
+
+void generate_random_numbers(
+    arb_value_type* dst,        // points to random number storage
+    std::size_t width,          // number of sites
+    std::size_t width_padded,   // padded number of sites
+    std::size_t num_rv,         // number of random variables
+    cbprng::value_type seed,    // simulation seed value
+    cbprng::value_type mech_id, // mechanism id
+    cbprng::value_type counter, // step counter
+    arb_size_type const * gid,  // global cell ids (size = width)
+    arb_size_type const * idx   // per-cell location index (size = width)
+    ) {
+    using impl::block_count;
+
+    unsigned const block_dim = 128;
+    unsigned const grid_dim_x = block_count(width, block_dim);
+    unsigned const grid_dim_y = num_rv;
+
+    kernel::generate_random_numbers<<<dim3{grid_dim_x, grid_dim_y, 1}, block_dim>>>(
+        dst, width, width_padded, num_rv, seed, mech_id, counter, gid, idx, cbprng::cache_size());
+}
+
+} // namespace gpu
+} // namespace arb
+
diff --git a/arbor/backends/gpu/rand.hpp b/arbor/backends/gpu/rand.hpp
new file mode 100644
index 00000000..a8e06ad8
--- /dev/null
+++ b/arbor/backends/gpu/rand.hpp
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include <arbor/mechanism.hpp>
+
+#include <util/pimpl.hpp>
+#include <backends/rand_fwd.hpp>
+#include <backends/gpu/gpu_store_types.hpp>
+
+namespace arb {
+namespace gpu {
+
+class random_numbers {
+public:
+    void instantiate(mechanism& m, std::size_t value_width_padded, const mechanism_layout& pos_data,
+        arb_seed_type seed);
+
+    void update(mechanism& m);
+
+  private:
+    // random number device storage
+    array data_;
+    std::array<std::vector<arb_value_type*>, cbprng::cache_size()>           random_numbers_;
+    std::array<memory::device_vector<arb_value_type*>, cbprng::cache_size()> random_numbers_d_;
+
+    // time step counter
+    cbprng::counter_type random_number_update_counter_ = 0u;
+
+    // pimpl
+    struct impl;
+    util::pimpl<impl> impl_;
+};
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/rand_on_cpu.hpp b/arbor/backends/gpu/rand_on_cpu.hpp
new file mode 100644
index 00000000..e0022cea
--- /dev/null
+++ b/arbor/backends/gpu/rand_on_cpu.hpp
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <util/pimpl_src.hpp>
+#include <memory/gpu_wrappers.hpp>
+#include <backends/gpu/rand.hpp>
+
+namespace arb {
+namespace gpu {
+
+struct random_numbers::impl {
+    // pointer to random number device storage
+    arb_value_type* random_numbers_;
+
+    // general parameters
+    std::size_t value_width_padded_;
+    arb_seed_type seed_;
+
+    // auxillary random number device storage
+    std::vector<arb_value_type> random_numbers_h_;
+    std::vector<arb_size_type> gid_;
+    std::vector<arb_size_type> idx_;
+
+    impl(arb_value_type* data, mechanism& m, std::size_t value_width_padded,
+        const mechanism_layout& pos_data, arb_seed_type seed) :
+        random_numbers_{data},
+        value_width_padded_{value_width_padded},
+        seed_{seed},
+        random_numbers_h_(m.mech_.n_random_variables * cbprng::cache_size() * value_width_padded),
+        gid_{pos_data.gid},
+        idx_{pos_data.idx} {}
+
+    void update(mechanism& m, cbprng::value_type counter) {
+        const auto num_rv = m.mech_.n_random_variables;
+        const auto width = m.ppack_.width;
+        const cbprng::value_type mech_id = m.mechanism_id();
+        // generate random numbers on the host
+        arb_value_type* dst = random_numbers_h_.data();
+        arb::multicore::generate_random_numbers(dst, width, value_width_padded_, num_rv, seed_,
+            mech_id, counter, gid_.data(), idx_.data());
+        // transfer values to device
+        memory::gpu_memcpy_h2d(random_numbers_, dst,
+            (num_rv*cbprng::cache_size()*value_width_padded_)*sizeof(arb_value_type));
+    }
+};
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/rand_on_gpu.hpp b/arbor/backends/gpu/rand_on_gpu.hpp
new file mode 100644
index 00000000..9239ee02
--- /dev/null
+++ b/arbor/backends/gpu/rand_on_gpu.hpp
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <util/pimpl_src.hpp>
+#include <backends/gpu/rand.hpp>
+
+namespace arb {
+namespace gpu {
+
+struct random_numbers::impl {
+    // pointer to random number device storage
+    arb_value_type* random_numbers_;
+
+    // general parameters
+    std::size_t value_width_padded_;
+    arb_seed_type seed_;
+
+    // auxillary random number device storage
+    sarray sindices_;
+    std::vector<arb_size_type*> prng_indices_;
+    memory::device_vector<arb_size_type*> prng_indices_d_;
+
+    impl(arb_value_type* data, mechanism& m, std::size_t value_width_padded,
+        const mechanism_layout& pos_data, arb_seed_type seed) :
+        random_numbers_{data},
+        value_width_padded_{value_width_padded},
+        seed_{seed} {
+
+        sindices_ = sarray(2*value_width_padded);
+        chunk_writer writer(sindices_.data(), value_width_padded);
+        prng_indices_.resize(2);
+        prng_indices_[0] = writer.append_with_padding(pos_data.gid, 0);
+        prng_indices_[1] = writer.append_with_padding(pos_data.idx, 0);
+        prng_indices_d_ = memory::on_gpu(prng_indices_);
+    }
+
+    void update(mechanism& m, cbprng::value_type counter) {
+        const auto num_rv = m.mech_.n_random_variables;
+        const auto width = m.ppack_.width;
+        const cbprng::value_type mech_id = m.mechanism_id();
+        // generate random numbers on the device
+        generate_random_numbers(random_numbers_, width, value_width_padded_, num_rv, seed_,
+            mech_id, counter, prng_indices_[0], prng_indices_[1]);
+    }
+};
+
+} // namespace gpu
+} // namespace arb
diff --git a/arbor/backends/gpu/shared_state.cpp b/arbor/backends/gpu/shared_state.cpp
index a7d918ba..d6c66065 100644
--- a/arbor/backends/gpu/shared_state.cpp
+++ b/arbor/backends/gpu/shared_state.cpp
@@ -9,6 +9,7 @@
 #include "backends/gpu/gpu_store_types.hpp"
 #include "backends/gpu/shared_state.hpp"
 #include "backends/multi_event_stream_state.hpp"
+#include "backends/gpu/chunk_writer.hpp"
 #include "memory/copy.hpp"
 #include "memory/gpu_wrappers.hpp"
 #include "memory/wrappers.hpp"
@@ -188,7 +189,8 @@ shared_state::shared_state(
     const std::vector<arb_value_type>& temperature_K,
     const std::vector<arb_value_type>& diam,
     const std::vector<arb_index_type>& src_to_spike,
-    unsigned // alignment parameter ignored.
+    unsigned, // alignment parameter ignored.
+    arb_seed_type cbprng_seed_
     ):
     n_intdom(n_intdom),
     n_detector(n_detector),
@@ -207,44 +209,13 @@ shared_state::shared_state(
     diam_um(make_const_view(diam)),
     time_since_spike(n_cell*n_detector),
     src_to_spike(make_const_view(src_to_spike)),
+    cbprng_seed(cbprng_seed_),
     deliverable_events(n_intdom)
 {
     memory::fill(time_since_spike, -1.0);
     add_scalar(temperature_degC.size(), temperature_degC.data(), -273.15);
 }
 
-namespace {
-template <typename T>
-struct chunk_writer {
-    T* end; // device ptr
-    const std::size_t stride;
-
-    chunk_writer(T* data, std::size_t stride): end(data), stride(stride) {}
-
-    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
-    T* append(Seq&& seq) {
-        arb_assert(std::size(seq)==stride);
-        return append_freely(std::forward<Seq>(seq));
-    }
-
-    template <typename Seq, typename = std::enable_if_t<util::is_contiguous_v<Seq>>>
-    T* append_freely(Seq&& seq) {
-        std::size_t n = std::size(seq);
-        memory::copy(memory::host_view<T>(const_cast<T*>(std::data(seq)), n), memory::device_view<T>(end, n));
-        auto p = end;
-        end += n;
-        return p;
-    }
-
-    T* fill(T value) {
-        memory::fill(memory::device_view<T>(end, stride), value);
-        auto p = end;
-        end += stride;
-        return p;
-    }
-};
-}
-
 void shared_state::set_parameter(mechanism& m, const std::string& key, const std::vector<arb_value_type>& values) {
     if (values.size()!=m.ppack_.width) throw arbor_internal_error("mechanism parameter size mismatch");
     const auto& store = storage.at(m.mechanism_id());
@@ -263,6 +234,13 @@ void shared_state::set_parameter(mechanism& m, const std::string& key, const std
     memory::copy(memory::make_const_view(values), memory::device_view<arb_value_type>(data, m.ppack_.width));
 }
 
+void shared_state::update_prng_state(mechanism& m) {
+    if (!m.mech_.n_random_variables) return;
+    auto const mech_id = m.mechanism_id();
+    auto& store = storage[mech_id];
+    store.random_numbers_.update(m);
+}
+
 const arb_value_type* shared_state::mechanism_state_data(const mechanism& m, const std::string& key) {
     const auto& store = storage.at(m.mechanism_id());
 
@@ -336,10 +314,10 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
         // Allocate bulk storage
         std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*width_padded + m.mech_.n_globals;
         store.data_   = array(count, NAN);
-        chunk_writer writer(store.data_.data(), width);
+        chunk_writer writer(store.data_.data(), width_padded);
 
         // First sub-array of data_ is used for weight_
-        m.ppack_.weight = writer.append(pos_data.weight);
+        m.ppack_.weight = writer.append_with_padding(pos_data.weight, 0);
         // Set fields
         for (auto idx: make_span(m.mech_.n_parameters)) {
             store.parameters_[idx] = writer.fill(m.mech_.parameters[idx].default_value);
@@ -347,6 +325,7 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
         for (auto idx: make_span(m.mech_.n_state_vars)) {
             store.state_vars_[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
         }
+
         // Assign global scalar parameters. NB: Last chunk, since it breaks the width striding.
         for (auto idx: make_span(m.mech_.n_globals)) store.globals_[idx] = m.mech_.globals[idx].default_value;
         for (auto& [k, v]: overrides.globals) {
@@ -368,10 +347,10 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
         // Allocate bulk storage
         std::size_t count = mult_in_place + peer_indices + m.mech_.n_ions + 1;
         store.indices_ = iarray(count*width_padded);
-        chunk_writer writer(store.indices_.data(), width);
+        chunk_writer writer(store.indices_.data(), width_padded);
 
         // Setup node indices
-        m.ppack_.node_index = writer.append(pos_data.cv);
+        m.ppack_.node_index = writer.append_with_padding(pos_data.cv, 0);
         // Create ion indices
         for (auto idx: make_span(m.mech_.n_ions)) {
             auto  ion = m.mech_.ions[idx].name;
@@ -383,14 +362,14 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
             auto ni = memory::on_host(oion->node_index_);
             auto indices = util::index_into(pos_data.cv, ni);
             std::vector<arb_index_type> mech_ion_index(indices.begin(), indices.end());
-            store.ion_states_[idx].index = writer.append(mech_ion_index);
+            store.ion_states_[idx].index = writer.append_with_padding(mech_ion_index, 0);
         }
 
-        m.ppack_.multiplicity = mult_in_place? writer.append(pos_data.multiplicity): nullptr;
+        m.ppack_.multiplicity = mult_in_place? writer.append_with_padding(pos_data.multiplicity, 0): nullptr;
         // `peer_index` holds the peer CV of each CV in node_index.
         // Peer CVs are only filled for gap junction mechanisms. They are used
         // to index the voltage at the other side of a gap-junction connection.
-        m.ppack_.peer_index = peer_indices? writer.append(pos_data.peer_cv): nullptr;
+        m.ppack_.peer_index = peer_indices? writer.append_with_padding(pos_data.peer_cv, 0): nullptr;
     }
 
     // Shift data to GPU, set up pointers
@@ -402,6 +381,8 @@ void shared_state::instantiate(mechanism& m, unsigned id, const mechanism_overri
 
     store.ion_states_d_ = memory::on_gpu(store.ion_states_);
     m.ppack_.ion_states = store.ion_states_d_.data();
+
+    store.random_numbers_.instantiate(m, width_padded, pos_data, cbprng_seed);
 }
 
 void shared_state::integrate_voltage() {
diff --git a/arbor/backends/gpu/shared_state.hpp b/arbor/backends/gpu/shared_state.hpp
index 6252d034..c09cdd12 100644
--- a/arbor/backends/gpu/shared_state.hpp
+++ b/arbor/backends/gpu/shared_state.hpp
@@ -10,6 +10,7 @@
 
 #include "fvm_layout.hpp"
 
+#include "backends/gpu/rand.hpp"
 #include "backends/gpu/gpu_store_types.hpp"
 #include "backends/gpu/stimulus.hpp"
 #include "backends/gpu/diffusion_state.hpp"
@@ -123,6 +124,7 @@ struct ARB_ARBOR_API shared_state {
         memory::device_vector<arb_value_type*> parameters_d_;
         memory::device_vector<arb_value_type*> state_vars_d_;
         memory::device_vector<arb_ion_state>   ion_states_d_;
+        random_numbers random_numbers_;
     };
 
     using cable_solver = arb::gpu::matrix_state_fine<arb_value_type, arb_index_type>;
@@ -151,6 +153,8 @@ struct ARB_ARBOR_API shared_state {
     array time_since_spike;   // Stores time since last spike on any detector, organized by cell.
     iarray src_to_spike;      // Maps spike source index to spike index
 
+    arb_seed_type cbprng_seed; // random number generator seed
+
     istim_state stim_data;
     std::unordered_map<std::string, ion_state> ion_data;
     deliverable_event_stream deliverable_events;
@@ -168,7 +172,8 @@ struct ARB_ARBOR_API shared_state {
         const std::vector<arb_value_type>& temperature_K,
         const std::vector<arb_value_type>& diam,
         const std::vector<arb_index_type>& src_to_spike,
-        unsigned // align parameter ignored
+        unsigned, // align parameter ignored
+        arb_seed_type cbprng_seed_ = 0u
     );
 
     // Setup a mechanism and tie its backing store to this object
@@ -176,6 +181,8 @@ struct ARB_ARBOR_API shared_state {
 
     void set_parameter(mechanism&, const std::string&, const std::vector<arb_value_type>&);
 
+    void update_prng_state(mechanism&);
+
     // Note: returned pointer points to device memory.
     const arb_value_type* mechanism_state_data(const mechanism& m, const std::string& key);
 
diff --git a/arbor/backends/gpu/threshold_watcher.hpp b/arbor/backends/gpu/threshold_watcher.hpp
index 199565b2..35c65c25 100644
--- a/arbor/backends/gpu/threshold_watcher.hpp
+++ b/arbor/backends/gpu/threshold_watcher.hpp
@@ -110,7 +110,6 @@ public:
     /// crossed since current time t, and the last time the test was
     /// performed.
     void test(array* time_since_spike) {
-        arb_assert(values_);
 
         if (size()>0) {
             test_thresholds_impl(
diff --git a/arbor/backends/multicore/rand.cpp b/arbor/backends/multicore/rand.cpp
new file mode 100644
index 00000000..b69676c0
--- /dev/null
+++ b/arbor/backends/multicore/rand.cpp
@@ -0,0 +1,33 @@
+#include "backends/rand_impl.hpp"
+
+namespace arb {
+namespace multicore {
+
+void generate_random_numbers(
+    arb_value_type* dst,        // points to random number storage
+    std::size_t width,          // number of sites
+    std::size_t width_padded,   // padded number of sites
+    std::size_t num_rv,         // number of random variables
+    cbprng::value_type seed,    // simulation seed value
+    cbprng::value_type mech_id, // mechanism id
+    cbprng::value_type counter, // step counter
+    arb_size_type const * gid,  // global cell ids (size = width)
+    arb_size_type const * idx   // per-cell location index (size = width)
+    ) {
+    for (std::size_t n=0; n<num_rv; ++n) {
+        for (std::size_t i=0; i<width; ++i) {
+            const auto r = cbprng::generator{}({seed, mech_id, n, counter},
+                    {gid[i], idx[i], 0xdeadf00dull, 0xdeadbeefull});
+            const auto [a0, a1] = r123::boxmuller(r[0], r[1]);
+            const auto [a2, a3] = r123::boxmuller(r[2], r[3]);
+            dst[i + width_padded*(0 + cbprng::cache_size()*n)] = a0;
+            dst[i + width_padded*(1 + cbprng::cache_size()*n)] = a1;
+            dst[i + width_padded*(2 + cbprng::cache_size()*n)] = a2;
+            dst[i + width_padded*(3 + cbprng::cache_size()*n)] = a3;
+        }
+    }
+}
+
+} // namespace multicore
+} // namespace arb
+
diff --git a/arbor/backends/multicore/shared_state.cpp b/arbor/backends/multicore/shared_state.cpp
index 4ebab08c..e7c2e8f8 100644
--- a/arbor/backends/multicore/shared_state.cpp
+++ b/arbor/backends/multicore/shared_state.cpp
@@ -16,6 +16,7 @@
 #include <arbor/simd/simd.hpp>
 
 #include "backends/event.hpp"
+#include "backends/rand_impl.hpp"
 #include "io/sepval.hpp"
 #include "util/index_into.hpp"
 #include "util/padded_alloc.hpp"
@@ -203,7 +204,8 @@ shared_state::shared_state(
     const std::vector<arb_value_type>& temperature_K,
     const std::vector<arb_value_type>& diam,
     const std::vector<arb_index_type>& src_to_spike,
-    unsigned align
+    unsigned align,
+    arb_seed_type cbprng_seed_
 ):
     alignment(min_alignment(align)),
     alloc(alignment),
@@ -224,6 +226,7 @@ shared_state::shared_state(
     diam_um(diam.begin(), diam.end(), pad(alignment)),
     time_since_spike(n_cell*n_detector, pad(alignment)),
     src_to_spike(src_to_spike.begin(), src_to_spike.end(), pad(alignment)),
+    cbprng_seed(cbprng_seed_),
     deliverable_events(n_intdom)
 {
     // For indices in the padded tail of cv_to_intdom, set index to last valid intdom index.
@@ -464,6 +467,31 @@ const arb_value_type* shared_state::mechanism_state_data(const mechanism& m, con
     return nullptr;
 }
 
+void shared_state::update_prng_state(mechanism& m) {
+    if (!m.mech_.n_random_variables) return;
+    const auto mech_id = m.mechanism_id();
+    auto& store = storage[mech_id];
+    const auto counter = store.random_number_update_counter_++;
+    const auto cache_idx = cbprng::cache_index(counter);
+
+    m.ppack_.random_numbers = store.random_numbers_[cache_idx].data();
+
+    if (cache_idx == 0) {
+        // Generate random numbers every cbprng::cache_size() iterations:
+        // For each random variable we will generate cbprng::cache_size() values per site
+        // and there are width sites.
+        // The RNG will be seeded by a global seed, the mechanism id, the variable index, the
+        // current site's global cell, the site index within its cell and a counter representing
+        // time.
+        const auto num_rv = store.random_numbers_[cache_idx].size();
+        const auto width_padded = store.value_width_padded;
+        const auto width = m.ppack_.width;
+        arb_value_type* dst = store.random_numbers_[0][0];
+        generate_random_numbers(dst, width, width_padded, num_rv, cbprng_seed, mech_id, counter,
+            store.gid_.data(), store.idx_.data());
+    }
+}
+
 // The derived class (typically generated code from modcc) holds pointers that need
 // to be set to point inside the shared state, or into the allocated parameter/variable
 // data block.
@@ -512,6 +540,10 @@ void shared_state::instantiate(arb::mechanism& m, unsigned id, const mechanism_o
     if (storage.find(id) != storage.end()) throw arb::arbor_internal_error("Duplicate mech id in shared state");
     auto& store = storage[id];
 
+    // store indices for random number generation
+    store.gid_ = pos_data.gid;
+    store.idx_ = pos_data.idx;
+
     // Allocate view pointers (except globals!)
     store.state_vars_.resize(m.mech_.n_state_vars); m.ppack_.state_vars = store.state_vars_.data();
     store.parameters_.resize(m.mech_.n_parameters); m.ppack_.parameters = store.parameters_.data();
@@ -537,9 +569,17 @@ void shared_state::instantiate(arb::mechanism& m, unsigned id, const mechanism_o
 
     // Initialize state and parameter vectors with default values.
     {
+        // Allocate view pointers for random nubers
+        std::size_t num_random_numbers_per_cv = m.mech_.n_random_variables;
+        std::size_t random_number_storage = num_random_numbers_per_cv*cbprng::cache_size();
+        for (auto& v : store.random_numbers_) v.resize(num_random_numbers_per_cv);
+        m.ppack_.random_numbers = store.random_numbers_[0].data();
+
         // Allocate bulk storage
         std::size_t value_width_padded = extend_width<arb_value_type>(m, pos_data.cv.size());
-        std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1)*value_width_padded + m.mech_.n_globals;
+        store.value_width_padded = value_width_padded;
+        std::size_t count = (m.mech_.n_state_vars + m.mech_.n_parameters + 1 +
+            random_number_storage)*value_width_padded + m.mech_.n_globals;
         store.data_ = array(count, NAN, pad);
         chunk_writer writer(store.data_.data(), value_width_padded);
 
@@ -552,6 +592,10 @@ void shared_state::instantiate(arb::mechanism& m, unsigned id, const mechanism_o
         for (auto idx: make_span(m.mech_.n_state_vars)) {
             m.ppack_.state_vars[idx] = writer.fill(m.mech_.state_vars[idx].default_value);
         }
+        // Set random numbers
+        for (auto idx_v: make_span(num_random_numbers_per_cv))
+            for (auto idx_c: make_span(cbprng::cache_size()))
+                store.random_numbers_[idx_c][idx_v] = writer.fill(0);
 
         // Assign global scalar parameters
         m.ppack_.globals = writer.end;
diff --git a/arbor/backends/multicore/shared_state.hpp b/arbor/backends/multicore/shared_state.hpp
index e23c088d..7d306ce7 100644
--- a/arbor/backends/multicore/shared_state.hpp
+++ b/arbor/backends/multicore/shared_state.hpp
@@ -14,6 +14,7 @@
 #include <arbor/simd/simd.hpp>
 
 #include "backends/event.hpp"
+#include "backends/rand_fwd.hpp"
 #include "util/padded_alloc.hpp"
 #include "util/rangeutil.hpp"
 
@@ -122,11 +123,17 @@ struct ARB_ARBOR_API shared_state {
     struct mech_storage {
         array data_;
         iarray indices_;
+        std::size_t value_width_padded;
         constraint_partition constraints_;
         std::vector<arb_value_type>  globals_;
         std::vector<arb_value_type*> parameters_;
         std::vector<arb_value_type*> state_vars_;
         std::vector<arb_ion_state>   ion_states_;
+
+        std::array<std::vector<arb_value_type*>, cbprng::cache_size()> random_numbers_;
+        std::vector<arb_size_type> gid_;
+        std::vector<arb_size_type> idx_;
+        cbprng::counter_type random_number_update_counter_ = 0u;
     };
 
     cable_solver solver;
@@ -155,6 +162,8 @@ struct ARB_ARBOR_API shared_state {
     array time_since_spike;   // Stores time since last spike on any detector, organized by cell.
     iarray src_to_spike;      // Maps spike source index to spike index
 
+    arb_seed_type cbprng_seed; // random number generator seed
+
     istim_state stim_data;
     std::unordered_map<std::string, ion_state> ion_data;
     deliverable_event_stream deliverable_events;
@@ -172,13 +181,16 @@ struct ARB_ARBOR_API shared_state {
         const std::vector<arb_value_type>& temperature_K,
         const std::vector<arb_value_type>& diam,
         const std::vector<arb_index_type>& src_to_spike,
-        unsigned align
+        unsigned align,
+        arb_seed_type cbprng_seed_ = 0u
     );
 
     void instantiate(mechanism&, unsigned, const mechanism_overrides&, const mechanism_layout&);
 
     void set_parameter(mechanism&, const std::string&, const std::vector<arb_value_type>&);
 
+    void update_prng_state(mechanism&);
+
     const arb_value_type* mechanism_state_data(const mechanism&, const std::string&);
 
     void add_ion(
diff --git a/arbor/backends/multicore/threshold_watcher.hpp b/arbor/backends/multicore/threshold_watcher.hpp
index 755c1591..139f9281 100644
--- a/arbor/backends/multicore/threshold_watcher.hpp
+++ b/arbor/backends/multicore/threshold_watcher.hpp
@@ -67,7 +67,8 @@ public:
     /// Crossing events are recorded for each threshold that
     /// is crossed since the last call to test
     void test(array* time_since_spike) {
-        arb_assert(values_);
+        // either number of cvs is 0 or values_ is not null
+        arb_assert((n_cv_ == 0) || (bool)values_);
 
         // Reset all spike times to -1.0 indicating no spike has been recorded on the detector
         const arb_value_type* t_before = t_before_ptr_->data();
diff --git a/arbor/backends/rand_fwd.hpp b/arbor/backends/rand_fwd.hpp
new file mode 100644
index 00000000..ccd45732
--- /dev/null
+++ b/arbor/backends/rand_fwd.hpp
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <cstddef>
+
+#include <arbor/arb_types.hpp>
+
+namespace arb {
+
+namespace cbprng {
+
+using value_type = arb_seed_type;
+using counter_type = value_type;
+
+inline constexpr counter_type cache_size() { return 4; }
+inline constexpr counter_type cache_index(counter_type c) { return (3u & c); }
+
+} // namespace cbprng
+
+// multicore implementation forward declaration
+namespace multicore {
+void generate_random_numbers(arb_value_type* dst, std::size_t width, std::size_t width_padded,
+    std::size_t num_rv, cbprng::value_type seed, cbprng::value_type mech_id,
+    cbprng::value_type counter, arb_size_type const * gid, arb_size_type const * idx);
+} // namespace multicore
+
+// gpu implementation forward declaration
+namespace gpu {
+void generate_random_numbers(arb_value_type* dst, std::size_t width, std::size_t width_padded,
+    std::size_t num_rv, cbprng::value_type seed, cbprng::value_type mech_id,
+    cbprng::value_type counter, arb_size_type const * gid, arb_size_type const * idx);
+} // namespace gpu
+
+} // namespace arb
+
diff --git a/arbor/backends/rand_impl.hpp b/arbor/backends/rand_impl.hpp
new file mode 100644
index 00000000..ef937615
--- /dev/null
+++ b/arbor/backends/rand_impl.hpp
@@ -0,0 +1,35 @@
+#pragma once
+
+// This file is intended to be included in the source files directly in order
+// to be compiled by either the host or the device compiler 
+
+#include <type_traits>
+
+#include <Random123/boxmuller.hpp>
+#include <Random123/threefry.h>
+
+#include "backends/rand_fwd.hpp"
+
+namespace arb {
+namespace cbprng {
+
+// checks that chosen random number generator has the correct layout in terms
+// of number of counter and key fields
+struct traits_ {
+    using generator_type = r123::Threefry4x64_R<12>;
+    using counter_type = typename generator_type::ctr_type;
+    using key_type = counter_type;
+    using array_type = counter_type;
+
+    static_assert(counter_type::static_size == cache_size());
+    static_assert(std::is_same<typename counter_type::value_type, value_type>::value);
+    static_assert(std::is_same<typename generator_type::key_type, key_type>::value);
+};
+
+// export the main types for counter based random number generation
+using generator  = traits_::generator_type;
+using array_type = traits_::array_type;
+
+} // namespace cbprng
+} // namespace arb
+
diff --git a/arbor/cell_group_factory.cpp b/arbor/cell_group_factory.cpp
index 69af0ffd..e7fb7006 100644
--- a/arbor/cell_group_factory.cpp
+++ b/arbor/cell_group_factory.cpp
@@ -20,14 +20,14 @@ cell_group_ptr make_cell_group(Args&&... args) {
 }
 
 ARB_ARBOR_API cell_group_factory cell_kind_implementation(
-        cell_kind ck, backend_kind bk, const execution_context& ctx)
+        cell_kind ck, backend_kind bk, const execution_context& ctx, arb_seed_type seed)
 {
     using gid_vector = std::vector<cell_gid_type>;
 
     switch (ck) {
     case cell_kind::cable:
-        return [bk, ctx](const gid_vector& gids, const recipe& rec, cell_label_range& cg_sources, cell_label_range& cg_targets) {
-            return make_cell_group<mc_cell_group>(gids, rec, cg_sources, cg_targets, make_fvm_lowered_cell(bk, ctx));
+        return [bk, ctx, seed](const gid_vector& gids, const recipe& rec, cell_label_range& cg_sources, cell_label_range& cg_targets) {
+            return make_cell_group<mc_cell_group>(gids, rec, cg_sources, cg_targets, make_fvm_lowered_cell(bk, ctx, seed));
         };
 
     case cell_kind::spike_source:
diff --git a/arbor/cell_group_factory.hpp b/arbor/cell_group_factory.hpp
index b8a321c4..3f784b3f 100644
--- a/arbor/cell_group_factory.hpp
+++ b/arbor/cell_group_factory.hpp
@@ -22,7 +22,7 @@ using cell_group_factory = std::function<
         cell_group_ptr(const std::vector<cell_gid_type>&, const recipe&, cell_label_range& cg_sources, cell_label_range& cg_targets)>;
 
 ARB_ARBOR_API cell_group_factory cell_kind_implementation(
-        cell_kind, backend_kind, const execution_context&);
+        cell_kind, backend_kind, const execution_context&, std::uint64_t = 0);
 
 inline bool cell_kind_supported(
         cell_kind c, backend_kind b, const execution_context& ctx)
diff --git a/arbor/fvm_layout.cpp b/arbor/fvm_layout.cpp
index b68bf7e5..27d59688 100644
--- a/arbor/fvm_layout.cpp
+++ b/arbor/fvm_layout.cpp
@@ -1108,7 +1108,7 @@ fvm_mechanism_data fvm_build_mechanism_data(
             return a.target_index<b.target_index;
         });
 
-        bool coalesce = info.linear && gprop.coalesce_synapses;
+        bool coalesce = !info.random_variables.size() && info.linear && gprop.coalesce_synapses;
 
         fvm_mechanism_config config;
         config.kind = arb_mechanism_kind_point;
diff --git a/arbor/fvm_lowered_cell.hpp b/arbor/fvm_lowered_cell.hpp
index 9dba2ade..a276ebbf 100644
--- a/arbor/fvm_lowered_cell.hpp
+++ b/arbor/fvm_lowered_cell.hpp
@@ -236,6 +236,7 @@ struct fvm_lowered_cell {
 
 using fvm_lowered_cell_ptr = std::unique_ptr<fvm_lowered_cell>;
 
-ARB_ARBOR_API fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p, const execution_context& ctx);
+ARB_ARBOR_API fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p, const execution_context& ctx,
+        std::uint64_t seed = 0);
 
 } // namespace arb
diff --git a/arbor/fvm_lowered_cell_impl.cpp b/arbor/fvm_lowered_cell_impl.cpp
index 9e521a9e..2a0d15d3 100644
--- a/arbor/fvm_lowered_cell_impl.cpp
+++ b/arbor/fvm_lowered_cell_impl.cpp
@@ -13,13 +13,14 @@
 
 namespace arb {
 
-fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p, const execution_context& ctx) {
+fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p, const execution_context& ctx,
+                                           arb_seed_type seed) {
     switch (p) {
     case backend_kind::multicore:
-        return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<multicore::backend>(ctx));
+        return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<multicore::backend>(ctx, seed));
     case backend_kind::gpu:
 #ifdef ARB_GPU_ENABLED
-        return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<gpu::backend>(ctx));
+        return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<gpu::backend>(ctx, seed));
 #endif
         ; // fall through
     default:
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index a83e3432..09902f5c 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -46,7 +46,11 @@ public:
     using index_type = arb_index_type;
     using size_type = arb_size_type;
 
-    fvm_lowered_cell_impl(execution_context ctx): context_(ctx), threshold_watcher_(ctx) {};
+    fvm_lowered_cell_impl(execution_context ctx, arb_seed_type seed = 0):
+        context_(ctx),
+        threshold_watcher_(ctx),
+        seed_{seed}
+    {};
 
     void reset() override;
 
@@ -98,6 +102,9 @@ private:
     // Optional non-physical voltage check threshold
     std::optional<double> check_voltage_mV_;
 
+    // random number generator seed value
+    arb_seed_type seed_;
+
     // Flag indicating that at least one of the mechanisms implements the post_events procedure
     bool post_events_ = false;
 
@@ -216,7 +223,6 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
     // TODO: Consider devolving more of this to back-end routines (e.g.
     // per-compartment dt probably not a win on GPU), possibly rumbling
     // complete fvm state into shared state object.
-
     while (remaining_steps) {
         // Update any required reversal potentials based on ionic concs.
 
@@ -283,6 +289,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         // Integrate mechanism state.
 
         for (auto& m: mechanisms_) {
+            state_->update_prng_state(*m);
             m->update_state();
         }
 
@@ -485,6 +492,11 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
         cv_to_cell = D.geometry.cv_to_cell;
     }
 
+    // map control volume ids to global cell ids
+    std::vector<arb_index_type> cv_to_gid(D.geometry.cv_to_cell.size());
+    std::transform(D.geometry.cv_to_cell.begin(), D.geometry.cv_to_cell.end(), cv_to_gid.begin(),
+        [&gids](auto i){return gids[i]; });
+
     // Create shared cell state.
     // Shared state vectors should accommodate each mechanism's data alignment requests.
 
@@ -495,7 +507,7 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
     state_ = std::make_unique<shared_state>(
                 nintdom, ncell, max_detector, cv_to_intdom, std::move(cv_to_cell),
                 D.init_membrane_potential, D.temperature_K, D.diam_um, std::move(src_to_spike),
-                data_alignment? data_alignment: 1u);
+                data_alignment? data_alignment: 1u, seed_);
 
     state_->solver =
         {D.geometry.cv_parent, D.geometry.cell_cv_divs, D.cv_capacitance, D.face_conductance, D.cv_area, fvm_info.cell_to_intdom};
@@ -550,14 +562,20 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
         // contribution in the CV, and F is the scaling factor required
         // to convert from the mechanism current contribution units to A/m².
 
+        arb_size_type idx_offset = 0;
         switch (config.kind) {
         case arb_mechanism_kind_point:
             // Point mechanism contributions are in [nA]; CV area A in [µm^2].
             // F = 1/A * [nA/µm²] / [A/m²] = 1000/A.
 
+            layout.gid.resize(config.cv.size());
+            layout.idx.resize(layout.gid.size());
             for (auto i: count_along(config.cv)) {
                 auto cv = layout.cv[i];
                 layout.weight[i] = 1000/D.cv_area[cv];
+                layout.gid[i] = cv_to_gid[cv];
+                if (i>0 && (layout.gid[i-1] != layout.gid[i])) idx_offset = i;
+                layout.idx[i] = i - idx_offset;
 
                 if (config.target.empty()) continue;
 
@@ -584,8 +602,13 @@ fvm_initialization_data fvm_lowered_cell_impl<Backend>::initialize(
         case arb_mechanism_kind_density:
             // Current density contributions from mechanism are already in [A/m²].
 
+            layout.gid.resize(layout.cv.size());
+            layout.idx.resize(layout.gid.size());
             for (auto i: count_along(layout.cv)) {
                 layout.weight[i] = config.norm_area[i];
+                layout.gid[i] = cv_to_gid[i];
+                if (i>0 && (layout.gid[i-1] != layout.gid[i])) idx_offset = i;
+                layout.idx[i] = i - idx_offset;
             }
             break;
         case arb_mechanism_kind_reversal_potential:
diff --git a/arbor/include/arbor/arb_types.inc b/arbor/include/arbor/arb_types.inc
index ac60304d..dcb5166b 100644
--- a/arbor/include/arbor/arb_types.inc
+++ b/arbor/include/arbor/arb_types.inc
@@ -2,3 +2,4 @@ typedef double   arb_value_type;
 typedef float    arb_weight_type;
 typedef int      arb_index_type;
 typedef uint32_t arb_size_type;
+typedef uint64_t arb_seed_type;
diff --git a/arbor/include/arbor/mechanism.hpp b/arbor/include/arbor/mechanism.hpp
index 5ab873af..25af14bd 100644
--- a/arbor/include/arbor/mechanism.hpp
+++ b/arbor/include/arbor/mechanism.hpp
@@ -101,6 +101,9 @@ struct mechanism_layout {
     // Number of logical point processes at in-instance index;
     // if empty, point processes are not coalesced and all multipliers are 1.
     std::vector<arb_index_type> multiplicity;
+
+    std::vector<arb_size_type> gid;
+    std::vector<arb_size_type> idx;
 };
 
 struct mechanism_overrides {
diff --git a/arbor/include/arbor/mechanism_abi.h b/arbor/include/arbor/mechanism_abi.h
index ce32ce3b..a5050ed8 100644
--- a/arbor/include/arbor/mechanism_abi.h
+++ b/arbor/include/arbor/mechanism_abi.h
@@ -22,7 +22,7 @@ extern "C" {
 
 // Version
 #define ARB_MECH_ABI_VERSION_MAJOR 0
-#define ARB_MECH_ABI_VERSION_MINOR 2
+#define ARB_MECH_ABI_VERSION_MINOR 3
 #define ARB_MECH_ABI_VERSION_PATCH 1
 #define ARB_MECH_ABI_VERSION ((ARB_MECH_ABI_VERSION_MAJOR * 10000L * 10000L) + (ARB_MECH_ABI_VERSION_MAJOR * 10000L) + ARB_MECH_ABI_VERSION_PATCH)
 
@@ -94,7 +94,7 @@ typedef struct arb_constraint_partition {
 
 // Parameter Pack
 typedef struct arb_mechanism_ppack {
-    arb_size_type  width;                        // Number of CVs.
+    arb_size_type   width;                       // Number of CVs.
     arb_index_type  n_detectors;                 // Number of spike detectors.
     arb_index_type* vec_ci;
     arb_index_type* vec_di;
@@ -118,6 +118,8 @@ typedef struct arb_mechanism_ppack {
     arb_value_type** state_vars;                    // Array of integrable state.      (Array)
     arb_value_type*  globals;                       // Array of global constant state. (Scalar)
     arb_ion_state*   ion_states;                    // Array of views into shared state.
+
+    arb_value_type const * const * random_numbers;  // Array of random numbers
 } arb_mechanism_ppack;
 
 
@@ -203,6 +205,11 @@ typedef struct arb_ion_info {
     int  expected_valence;
 } arb_ion_info;
 
+typedef struct arb_random_variable_info {
+    const char* name;
+    arb_size_type index;
+} arb_random_variable_info;
+
 // Backend independent data
 typedef struct arb_mechanism_type {
     // Metadata
@@ -221,6 +228,8 @@ typedef struct arb_mechanism_type {
     arb_size_type             n_parameters;
     arb_ion_info*             ions;             // Ion properties
     arb_size_type             n_ions;
+    arb_random_variable_info* random_variables; // Random variable properties
+    arb_size_type             n_random_variables;
 } arb_mechanism_type;
 
 // Bundle a type and its interfaces
diff --git a/arbor/include/arbor/mechinfo.hpp b/arbor/include/arbor/mechinfo.hpp
index 859337bd..6d717a65 100644
--- a/arbor/include/arbor/mechinfo.hpp
+++ b/arbor/include/arbor/mechinfo.hpp
@@ -48,6 +48,10 @@ struct ion_dependency {
     int expected_ion_charge = 0;
 };
 
+struct random_variable {
+    arb_index_type index = 0;
+};
+
 // A hash of the mechanism dynamics description is used to ensure that offline-compiled
 // mechanism implementations are correctly associated with their corresponding generated
 // mechanism information.
@@ -78,6 +82,9 @@ struct ARB_ARBOR_API mechanism_info {
     // Ion dependencies.
     std::unordered_map<std::string, ion_dependency> ions;
 
+    // Random variables
+    std::unordered_map<std::string, arb_size_type> random_variables;
+
     mechanism_fingerprint fingerprint;
 
     bool linear = false;
diff --git a/arbor/include/arbor/simulation.hpp b/arbor/include/arbor/simulation.hpp
index a96e0cd7..4acdadaf 100644
--- a/arbor/include/arbor/simulation.hpp
+++ b/arbor/include/arbor/simulation.hpp
@@ -7,6 +7,7 @@
 #include <functional>
 
 #include <arbor/export.hpp>
+#include <arbor/arb_types.hpp>
 #include <arbor/common_types.hpp>
 #include <arbor/context.hpp>
 #include <arbor/domain_decomposition.hpp>
@@ -25,14 +26,24 @@ using epoch_function = std::function<void(double time, double tfinal)>;
 // simulation_state comprises private implementation for simulation class.
 class simulation_state;
 
+class simulation_builder;
+
 class ARB_ARBOR_API simulation {
 public:
-
-    simulation(const recipe& rec, context ctx, const domain_decomposition& decomp);
+    simulation(const recipe& rec, context ctx, const domain_decomposition& decomp,
+               arb_seed_type seed = 0);
 
     simulation(const recipe& rec,
-               context ctx=make_context(),
-               std::function<domain_decomposition(const recipe&, context)> balancer=[](auto& r, auto c) { return partition_load_balance(r, c); }): simulation(rec, ctx, balancer(rec, ctx)) {}
+               context ctx = make_context(),
+               std::function<domain_decomposition(const recipe&, context)> balancer = 
+                   [](auto& r, auto c) { return partition_load_balance(r, c); },
+               arb_seed_type seed = 0):
+        simulation(rec, ctx, balancer(rec, ctx)) {}
+
+    simulation(simulation const&) = delete;
+    simulation(simulation&&);
+
+    static simulation_builder create(recipe const &);
 
     void update(const connectivity& rec);
 
@@ -82,6 +93,69 @@ private:
     std::unique_ptr<simulation_state> impl_;
 };
 
+// Builder pattern for simulation class to help with construction.
+// Simulation constructor arguments can be added through setter functions in any order or left out
+// entirely, in which case a sane default is chosen. The build member function instantiates the
+// simulation with the current arguments and returns it.
+class ARB_ARBOR_API simulation_builder {
+public:
+
+    simulation_builder(recipe const& rec) noexcept : rec_{rec} {}
+
+    simulation_builder(simulation_builder&&) = default;
+    simulation_builder(simulation_builder const&) = default;
+
+    simulation_builder& set_context(context ctx) noexcept {
+        ctx_ = ctx;
+        return *this;
+    }
+
+    simulation_builder& set_decomposition(domain_decomposition decomp) noexcept {
+        balancer_ = [decomp = std::move(decomp)](const recipe&, context) {return decomp; };
+        return *this;
+    }
+
+    simulation_builder& set_balancer(
+        std::function<domain_decomposition(const recipe&, context)> balancer) noexcept {
+        balancer_ = std::move(balancer);
+        return *this;
+    }
+
+    simulation_builder& set_seed(arb_seed_type seed) noexcept {
+        seed_ = seed;
+        return *this;
+    }
+
+    operator simulation() const { return build(); }
+
+    std::unique_ptr<simulation> make_unique() const {
+        return std::make_unique<simulation>(build());
+    }
+
+private:
+    simulation build() const {
+        return ctx_ ?
+            build(ctx_):
+            build(make_context());
+    }
+
+    simulation build(context ctx) const {
+        return balancer_ ?
+            build(ctx, balancer_(rec_, ctx)):
+            build(ctx, partition_load_balance(rec_, ctx));
+    }
+
+    simulation build(context ctx, domain_decomposition const& decomp) const {
+        return simulation(rec_, ctx, decomp, seed_);
+    }
+
+private:
+    const recipe& rec_;
+    context ctx_;
+    std::function<domain_decomposition(const recipe&, context)> balancer_;
+    arb_seed_type seed_ = 0u;
+};
+
 // An epoch callback function that prints out a text progress bar.
 ARB_ARBOR_API epoch_function epoch_progress_bar();
 
diff --git a/arbor/include/git-source-id b/arbor/include/git-source-id
index f2cc49cb..783d1041 100755
--- a/arbor/include/git-source-id
+++ b/arbor/include/git-source-id
@@ -71,6 +71,8 @@ __end__
 if [ -n "$version_dev" ]; then echo "#define ARB_VERSION_DEV \"${version_dev}\""; fi
 
 for feature in "$@"; do
+    echo "#ifndef ARB_${feature}_ENABLED"
     echo "#define ARB_${feature}_ENABLED"
+    echo "#endif"
 done
 
diff --git a/arbor/mechinfo.cpp b/arbor/mechinfo.cpp
index abb54ba7..1d5a1520 100644
--- a/arbor/mechinfo.cpp
+++ b/arbor/mechinfo.cpp
@@ -31,6 +31,10 @@ mechanism_info::mechanism_info(const arb_mechanism_type& m) {
         v.verify_valence,
         v.expected_valence };
     }
+    for (auto idx: util::make_span(m.n_random_variables)) {
+        const auto& rv = m.random_variables[idx];
+        random_variables[rv.name] = rv.index;
+    }
 }
 
 }
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index a822ae61..55350d68 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -90,7 +90,7 @@ ARB_ARBOR_API void merge_cell_events(
 
 class simulation_state {
 public:
-    simulation_state(const recipe& rec, const domain_decomposition& decomp, context ctx);
+    simulation_state(const recipe& rec, const domain_decomposition& decomp, context ctx, arb_seed_type seed);
 
     void update(const connectivity& rec);
 
@@ -189,7 +189,8 @@ private:
 simulation_state::simulation_state(
         const recipe& rec,
         const domain_decomposition& decomp,
-        context ctx
+        context ctx,
+        arb_seed_type seed
     ):
     ctx_{ctx},
     ddc_{decomp},
@@ -205,7 +206,7 @@ simulation_state::simulation_state(
         [&](cell_group_ptr& group, int i) {
           const auto& group_info = decomp.group(i);
           cell_label_range sources, targets;
-          auto factory = cell_kind_implementation(group_info.kind, group_info.backend, *ctx_);
+          auto factory = cell_kind_implementation(group_info.kind, group_info.backend, *ctx_, seed);
           group = factory(group_info.gids, rec, sources, targets);
 
           cg_sources[i] = cell_labels_and_gids(std::move(sources), group_info.gids);
@@ -515,12 +516,15 @@ void simulation_state::inject_events(const cse_vector& events) {
 
 // Simulation class implementations forward to implementation class.
 
+simulation_builder simulation::create(recipe const & rec) { return {rec}; };
+
 simulation::simulation(
     const recipe& rec,
     context ctx,
-    const domain_decomposition& decomp)
+    const domain_decomposition& decomp,
+    arb_seed_type seed)
 {
-    impl_.reset(new simulation_state(rec, decomp, ctx));
+    impl_.reset(new simulation_state(rec, decomp, ctx, seed));
 }
 
 void simulation::reset() {
@@ -581,6 +585,8 @@ void simulation::inject_events(const cse_vector& events) {
     impl_->inject_events(events);
 }
 
+simulation::simulation(simulation&&) = default;
+
 simulation::~simulation() = default;
 
 ARB_ARBOR_API epoch_function epoch_progress_bar() {
diff --git a/arbor/util/pimpl.hpp b/arbor/util/pimpl.hpp
new file mode 100644
index 00000000..740ce7d4
--- /dev/null
+++ b/arbor/util/pimpl.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <memory>
+
+namespace arb {
+namespace util {
+
+// A simple wrapper for the pimpl idom inspired by Herb Sutter's GotW #101
+template<typename T>
+class pimpl
+{
+  private:
+    std::unique_ptr<T> m;
+
+  public:
+    ~pimpl();
+    
+    pimpl() noexcept;
+
+    template<typename... Args>
+    pimpl(Args&&... args);
+
+    pimpl(pimpl const&) = delete;
+    pimpl(pimpl&&) noexcept;
+
+    pimpl& operator=(pimpl const&) = delete;
+    pimpl& operator=(pimpl&&) noexcept;
+
+    T*       operator->() noexcept;
+    const T* operator->() const noexcept;
+    T&       operator*() noexcept;
+    const T& operator*() const noexcept;
+};
+
+
+template<typename T, typename... Args>
+pimpl<T> make_pimpl(Args&&... args);
+
+} // namespace util
+} // namespace arb
diff --git a/arbor/util/pimpl_src.hpp b/arbor/util/pimpl_src.hpp
new file mode 100644
index 00000000..98fd0de4
--- /dev/null
+++ b/arbor/util/pimpl_src.hpp
@@ -0,0 +1,58 @@
+#pragma once
+
+// Contains pimpl wrapper's implementation which delegates to the actual implementation
+// Include this file in the implementation's source file
+
+#include <utility>
+
+#include <util/pimpl.hpp>
+
+namespace arb {
+namespace util {
+
+template<typename T>
+pimpl<T>::~pimpl() = default;
+
+template<typename T>
+pimpl<T>::pimpl() noexcept = default;
+
+template<typename T>
+template<typename... Args>
+pimpl<T>::pimpl(Args&&... args)
+: m{new T{std::forward<Args>(args)...}} {}
+
+template<typename T>
+pimpl<T>::pimpl(pimpl&&) noexcept = default;
+
+template<typename T>
+pimpl<T>& pimpl<T>::operator=(pimpl&&) noexcept = default;
+
+template<typename T>
+T* pimpl<T>::operator->() noexcept { return m.get(); }
+
+template<typename T>
+const T* pimpl<T>::operator->() const noexcept { return m.get(); }
+
+template<typename T>
+T& pimpl<T>::operator*() noexcept { return *m.get(); }
+
+template<typename T>
+const T& pimpl<T>::operator*() const noexcept { return *m.get(); }
+
+template<typename T, typename... Args>
+pimpl<T> make_pimpl(Args&&... args) {
+    return pimpl<T>{std::forward<Args>(args)...};
+}
+
+} // namespace util
+} // namespace arb
+
+// In order to avoid linker errors for the constructors and destructor, the pimpl template needs to
+// be instantiated in the source file. This macro helps with this boilerplate code. Note, that it
+// needs to be placed in the default namespace.
+#define ARB_INSTANTIATE_PIMPL(T) \
+namespace arb {                  \
+namespace util {                 \
+template struct pimpl<T>;        \
+}                                \
+}
diff --git a/arbor/util/unwind.hpp b/arbor/util/unwind.hpp
index 6459eec2..9ce2aae4 100644
--- a/arbor/util/unwind.hpp
+++ b/arbor/util/unwind.hpp
@@ -5,6 +5,8 @@
 #include <string>
 #include <vector>
 
+#include <arbor/export.hpp>
+
 namespace arb {
 namespace util {
 
@@ -17,7 +19,7 @@ struct source_location {
 
 /// Builds a stack trace when constructed.
 /// NOTE: if WITH_BACKTRACE is not defined, the methods are empty
-class backtrace {
+class ARB_ARBOR_API backtrace {
 public:
     /// the default constructor will build and store the strack trace.
     backtrace();
diff --git a/doc/concepts/interconnectivity.rst b/doc/concepts/interconnectivity.rst
index a88e44eb..68cfa7d7 100644
--- a/doc/concepts/interconnectivity.rst
+++ b/doc/concepts/interconnectivity.rst
@@ -49,7 +49,7 @@ The ``update_connections`` method accepts either a full ``recipe`` (but will
 Currently ``connectivity`` is only available in C++; Python users have to pass a
 full recipe.
 
-.. warn::
+.. warning::
 
    The semantics of connection updates are subtle and might produce surprising
    results if handled carelessly. In particular, spikes in-flight over a
diff --git a/doc/concepts/mechanisms.rst b/doc/concepts/mechanisms.rst
index 3146af8d..d3125971 100644
--- a/doc/concepts/mechanisms.rst
+++ b/doc/concepts/mechanisms.rst
@@ -263,6 +263,60 @@ In NMODL, junction mechanisms are identified using the ``JUNCTION_PROCESS`` keyw
     ``JUNCTION_PROCESS`` is an Arbor-specific extension to NMODL. The NMODL description of gap-junction
     mechanisms in arbor is not identical to NEURON's though it is similar.
 
+.. _mechanisms-sde:
+
+Stochastic Processes
+''''''''''''''''''''
+
+Arbor offers support for stochastic processes at the level of
+:ref:`point mechanisms <mechanisms-point>` and :ref:`density mechanisms <mechanisms-density>`.
+These processes can be modelled as systems of stochastic differential equations (SDEs). In general,
+such equations have the differential form:
+
+.. math::
+
+    d\textbf{X}(t) = \textbf{f}(t, \textbf{X}(t)) dt + \sum_{i=0}^{M-1} \textbf{l}_i(t,\textbf{X}(t)) d B_i(t),
+
+where :math:`\textbf{X}` is the vector of state variables, while the vector valued function
+:math:`\textbf{f}` represents the deterministic differential. The *M* functions :math:`\textbf{l}_i`
+are each associated with the Brownian Motion :math:`B_i` (Wiener process). The Brownian motions are
+assumed to be standard: 
+
+.. math::
+
+    \begin{align*}
+    B_i(0) &= 0 \\
+    E[B_i(t)] &= 0 \\
+    E[B_i^2(t)] &= t
+    \end{align*}
+
+The above differential form is an informal way of expressing the corresponding integral equation,
+
+.. math::
+
+    \textbf{X}(t+s) = \textbf{X}(t) + \int_t^{t+s} \textbf{f}(\tau, \textbf{X}(\tau)) d\tau + \sum_{i=0}^{M-1} \int_t^{t+s} \textbf{l}_i(\tau,\textbf{X}(\tau)) d B_i(\tau).
+
+
+By defining a random process called **stationary white noise** as the formal derivative
+:math:`W_i(t) = \dfrac{d B_i(t)}{dt}`, we can write the system of equations using a shorthand
+notation as
+
+.. math::
+
+    \textbf{X}^\prime(t) = \textbf{f}(t, \textbf{X}(t)) + \sum_{i=0}^{M-1} \textbf{l}_i(t,\textbf{X}(t)) W_i(t)
+
+Since we used standard Brownian Motions above, the withe noises :math:`W_i(t)` are Gaussian for all
+*t* with :math:`\mu=0`, :math:`\sigma^2=1`.
+
+In Arbor, the white noises :math:`W_i` are assumed to be independent of each other. Furthermore,
+each connection end point (point mechanism) or control volume (density mechanism) are assumed to
+generate independent noise, as well. The system of stochastic equations is interpreted in the `Itô
+sense <https://en.wikipedia.org/wiki/It%C3%B4_calculus>`_ and numerically solved using the
+Euler-Maruyama method.
+For specifics about the notation to define stochastic processes, please
+consult the :ref:`Arbor-specific NMODL extension <format-sde>`.
+
+
 API
 ---
 
diff --git a/doc/concepts/simulation.rst b/doc/concepts/simulation.rst
index c0ab208e..88d11f4f 100644
--- a/doc/concepts/simulation.rst
+++ b/doc/concepts/simulation.rst
@@ -24,10 +24,11 @@ Configuring data extraction
 ---------------------------
 
 Two kinds of data extraction can be set up:
-1. certain state variables can be :ref:`sampled <probesample>` by attaching a
-   sampler to a probe.
-2. spikes can be recorded by either a callback (C++) or a preset recording model
-   (Python), see the API docs linked below.
+
+# certain state variables can be :ref:`sampled <probesample>` by attaching a
+  sampler to a probe.
+# spikes can be recorded by either a callback (C++) or a preset recording model
+  (Python), see the API docs linked below.
 
 Simulation execution and interaction
 ------------------------------------
diff --git a/doc/cpp/simulation.rst b/doc/cpp/simulation.rst
index 9332e200..ba4fe3ea 100644
--- a/doc/cpp/simulation.rst
+++ b/doc/cpp/simulation.rst
@@ -38,6 +38,19 @@ then build the simulation.
         // Instantiate the simulation.
         arb::simulation sim(recipe, decomp, context);
 
+All the simulation's constructor arguments are optional, except the recipe, and assume
+default values if not specified. In order to simplify construction of a simulation, the helper class
+:cpp:class:`arb::simulation_builder` can be used to better control constrution arguments:
+
+.. container:: example-code
+
+    .. code-block:: cpp
+
+        arb::simulation sim =                // implicit conversion to simulation
+            arb::simulation::create(recipe)  // the recipe is always required
+                .add_context(context)        // optionally add a context
+                .add_decompostion(decomp)    // optionally add a decompostion
+                .add_seed(42);               // optionally add a seed value
 
 Class documentation
 -------------------
@@ -57,6 +70,7 @@ Class documentation
             *   an :cpp:class:`arb::domain_decomposition` that describes how the
                 cells in the model are assigned to hardware resources;
             *   an :cpp:class:`arb::context` which is used to execute the simulation.
+            *   a :cpp:class:`uint64_t` in order to seed the pseudo pandom number generator (optional)
         * **Experimental inputs** that can change between model runs, such
           as external spike trains.
 
@@ -77,7 +91,14 @@ Class documentation
 
     **Constructor:**
 
-    .. cpp:function:: simulation(const recipe& rec, const domain_decomposition& decomp, const context& ctx)
+    .. cpp:function:: simulation(const recipe& rec, const domain_decomposition& decomp, const context& ctx, std::uint64_t seed)
+
+    **Static member functions:**
+
+    .. cpp:function:: simulation_builder create(const recipe& rec)
+
+        Returns a builder object to which the constructor arguments can be passed selectively (see
+        also example above).
 
     **Experimental inputs:**
 
diff --git a/doc/dev/index.rst b/doc/dev/index.rst
index 9de6651c..17d4db34 100644
--- a/doc/dev/index.rst
+++ b/doc/dev/index.rst
@@ -19,6 +19,7 @@ Here we document internal components of Arbor. These pages can be useful if you'
    communication
    debug
    matrix_solver
+   sde
    simd_api
    shared_state
    export
diff --git a/doc/dev/sde.rst b/doc/dev/sde.rst
new file mode 100644
index 00000000..d92cc5dd
--- /dev/null
+++ b/doc/dev/sde.rst
@@ -0,0 +1,132 @@
+.. _sde:
+
+Stochastic Differential Equations
+=================================
+
+We want to solve a system of SDEs,
+
+.. math::
+
+    \textbf{X}^\prime(t) = \textbf{f}(t, \textbf{X}(t)) + \sum_{i=0}^{M-1} \textbf{l}_i(t,\textbf{X}(t)) W_i(t),
+
+which describes the dynamics of an arbitrary mechanism's state :math:`\textbf{X}`, see also
+:ref:`this short SDE overview <mechanisms-sde>` and the corresponding :ref:`Arbor-specific NMODL
+extension <format-sde>`.
+
+Analytical Solutions
+--------------------
+
+For systems of linear SDEs with 
+
+.. math::
+
+    \begin{align}
+    \textbf{f}\left(t, \textbf{X}(t)\right) &= \textbf{A}(t) \textbf{X}(t) + \textbf{a}(t) \\
+    \textbf{l}_i\left(t, \textbf{X}(t)\right) &= \textbf{B}_i(t) \textbf{X}(t) + \textbf{b}_i(t)
+    \end{align}
+
+ 
+there exist ordinary differential equations for moments of the process :math:`\textbf{X}`.  The
+expectation :math:`\textbf{m}(t) = E\left[\textbf{X}(t)\right]` and the second moment matrix
+:math:`\textbf{P}(t) = E\left[\textbf{X}(t) \textbf{X}^T(t)\right]` can be computed with
+
+.. math::
+
+    \begin{align}
+    \textbf{m}^\prime(t) = &\textbf{A}(t) \textbf{m}(t) + \textbf{a}(t)\\
+    \textbf{m}(0) = &\textbf{x}_0 \\
+    \textbf{P}^\prime(t) = &
+          \textbf{A}(t)\textbf{P}(t)   + \textbf{P}(t)\textbf{A}^T(t)
+        + \textbf{a}(t)\textbf{m}^T(t) + \textbf{m}(t)\textbf{a}^T(t) \\
+       &+ \sum_{i=0}^{M-1} \left[
+          \textbf{B}_i(t)\textbf{P}(t)\textbf{B}^T_i(t)
+        + \textbf{B}_i(t)\textbf{m}(t)\textbf{b}^T_i(t)
+        + \textbf{b}_i(t)\textbf{m}^T(t)\textbf{B}^T_i(t)
+        + \textbf{b}_i(t)\textbf{b}^T(t) \right] \\
+    \textbf{P}(0) = &\textbf{x}_0 \textbf{x}^T_0 
+    \end{align}
+
+Thus, we could in principle use our existing solvers for the above moment equations. Note, that the
+equations for the second order moment are not linear, in general. Once the moments are computed, we
+could then sample from the resulting mean and covariance matrix, using the fact that the solution is
+normally distributed, with :math:`\textbf{X}(t) \sim N\left(\textbf{m}(t), \textbf{P}(t) -
+\textbf{m}(t)\textbf{m}^T(t)\right)`. This approach has a few drawbacks, however:
+
+* tracking of the moment equations increases the size of the system
+* there exist no general solutions for non-linear systems of SDEs
+
+Therefore, we choose to solve the SDEs numerically. While there exist a number of different methods
+for scalar SDEs, not all of them can be easily extended to systems of SDEs. One of the simplest
+approaches, the Euler-Maruyama method, offers a few advantages:
+
+* works for (non-linear) systems of SDEs
+* has simple implementation
+* exhibits good performance
+
+On the other hand, this solver only guarantees first order accuracy, which usually requires the time
+steps to be small.
+
+
+Euler-Maruyama Solver
+---------------------
+
+The Euler-Maruyama method is a first order stochastic Runge-Kutta method, where the forward Euler
+method is its deterministic counterpart. The method can be derived by approximating the solution of
+the SDE with an It\^o-Taylor series and truncating at first order.  Higher-order stochastic
+Runge-Kutta methods are not as easy to use as their deterministic counterparts, especially for the
+vector case (system of SDEs).
+            
+**Algorithm:** For each time step :math:`t_k = k ~\Delta t`
+
+* draw random variables :math:`\Delta \textbf{W}  \sim N(\textbf{0}, \textbf{Q}\Delta t)`
+* compute :math:`\hat{\textbf{X}}(t_{k+1}) = \hat{\textbf{X}}(t_k) + f(t_k, \hat{\textbf{X}}(t_k)) \Delta t + \sum_{i=0}^{M-1} \textbf{l}_i(t_k,\hat{\textbf{X}}(t_k)) \Delta W_{i}`
+
+where :math:`\textbf{Q}` is the correlation matrix of the white noises :math:`W_i`.
+
+
+Stochastic Mechanisms
+---------------------
+
+This Euler-Maruyama solver is implemented in modcc and requires a normally distributed noise source.
+Since we assume that this noise is uncorrelated, we need independent random variables for every
+location where a mechanism is placed at or painted on. We can achieve this by carefully seeding the
+random number generator with a unique fingerprint of each such location, consisting of
+
+* global seed value (per simulation)
+* global cell id
+* per-cell connection end point index (point mech.) or per-cell control volume index (density mech)
+* mechanism id
+* per-mechanism variable identifier
+* per-mechanism simulation time (in the form of an integral counter)
+
+By using these seed value, we can guarantee that the same random numbers are generated regardless of
+concurrency, domain decomposition, and computational backend. Note, that because of our assumption
+of independence, the correlation between the white noises is zero, :math:`\textbf{Q} = \textbf{1}`.
+
+Random Number Generation
+------------------------
+
+Counter based random number generators (CBPRNGs) as implemented in Random123 offer a simple solution
+for both CPU and GPU kernels to generate independent random numbers based on the above seed values.
+We use the *Threefry-4x64-12* algorithm because
+
+* it offers 8 64-bit fields for placing the seed values
+* it is reasonably fast on both CPU and GPU
+
+Due to the structure of the *Threefry* and other CBPRNGs, we get multiple indpendent uniformly
+distributed values per invocation. In particular, *Threefry-4x64-12* returns 4 such values. Throwing
+away 3 out of the 4 values would result in a quite significant performance penalty, so we use all 4
+values, instead. This is achieved by circling through a cache of 4 values per location, and updating
+the cache every 4th time step.
+
+Normal Distribution
+-------------------
+
+The generated random numbers :math:`Z_i` must then be transformed into standard normally distributed
+values :math:`X_i`.  There exist a number of different algorithms, however, we use the Box-Muller
+transform because it requires exactly 2 independent uniformly distributed values to generate 2
+independent normally distributed values. Other methods, such as the Ziggurat algorithm, use
+rejection sampling which may unevenly exhaust our cache and make parallelization more difficult.
+
+For the Euler-Maruyama solver we need normal random numbers with variance :math:`\sigma^2 = \Delta t`.
+Thus, we scale the generated random number accordingly, :math:`\Delta W_{i} = \sqrt{\Delta t} X_i`.
diff --git a/doc/fileformat/nmodl.rst b/doc/fileformat/nmodl.rst
index 262b1011..3ebf3d3d 100644
--- a/doc/fileformat/nmodl.rst
+++ b/doc/fileformat/nmodl.rst
@@ -158,6 +158,45 @@ Arbor-specific features
   made available through the ``v_peer`` variable while the local membrane potential
   is available through ``v``, as usual.
 
+
+.. _format-sde:
+
+Stochastic Processes
+--------------------
+
+Arbor supports :ref:`stochastic processes <mechanisms-sde>` in the form of stochastic differential
+equations. The *white noise* sources can be defined in the model files using a ``WHITE_NOISE`` block:
+
+.. code:: none
+
+   WHITE_NOISE {
+       a b 
+       c
+   }
+
+Arbitrary white noise variables can be declared (``a, b, c`` in the example above). The
+noise will be appropriately scaled with the numerical time step and can be considered unitless. In
+order to influence the white noise generation, a seed value can be set at the level of the
+simulation through the optional constructor argument ``seed``
+(see :ref:`here <pysimulation>` or :ref:`here <cppsimulation>`).
+
+If the state is updated by involving at least one of the declared white noise variables
+the system is considered to be stochastic:
+
+.. code:: none
+
+   DERIVATIVE state {
+       s' = f + g*a
+   }
+
+The solver method must then accordingly set to ``stochastic``:
+
+.. code:: none
+
+   BREAKPOINT {
+       SOLVE state METHOD stochastic
+   }
+
 Nernst
 ------
 Many mechanisms make use of the reversal potential of an ion (``eX`` for ion ``X``).
diff --git a/doc/python/morphology.rst b/doc/python/morphology.rst
index c6b78f64..f72fb781 100644
--- a/doc/python/morphology.rst
+++ b/doc/python/morphology.rst
@@ -256,7 +256,7 @@ Cable cell morphology
 
     .. method:: apply_isometry(iso)
 
-        Apply an :type:`isometry` to the segment tree, returns the transformed tree as a copy.
+        Apply an :py:class:`isometry` to the segment tree, returns the transformed tree as a copy.
         Isometries are rotations around an arbritary axis and/or translations; they can
         be instantiated using ``translate`` and ``rotate`` and combined
         using the ``*`` operator.
@@ -267,6 +267,7 @@ Cable cell morphology
     .. method:: equivalent(other)
 
         Two trees are equivalent if
+
         1. the root segments' ``prox`` and ``dist`` points and their ``tags``
            are identical.
         2. recursively: all sub-trees starting at the current segment are
diff --git a/doc/python/simulation.rst b/doc/python/simulation.rst
index 76531eff..b84b11c5 100644
--- a/doc/python/simulation.rst
+++ b/doc/python/simulation.rst
@@ -53,6 +53,7 @@ over the local and distributed hardware resources (see :ref:`pydomdec`). Then, t
     * an :py:class:`arbor.recipe` that describes the model;
     * an :py:class:`arbor.domain_decomposition` that describes how the cells in the model are assigned to hardware resources;
     * an :py:class:`arbor.context` which is used to execute the simulation.
+    * a non-negative :py:class:`int` in order to seed the pseudo pandom number generator (optional)
 
     Simulations provide an interface for executing and interacting with the model:
 
@@ -63,12 +64,12 @@ over the local and distributed hardware resources (see :ref:`pydomdec`). Then, t
 
     **Constructor:**
 
-    .. function:: simulation(recipe, context, domain_decomposition)
+    .. function:: simulation(recipe, domain_decomposition, context, seed)
 
-        Initialize the model described by an :py:class:`arbor.recipe`, with
-        cells and network distributed according to
-        :py:class:`arbor.domain_decomposition`, and computational resources
-        described by :py:class:`arbor.context`.
+        Initialize the model described by an :py:class:`arbor.recipe`, with cells and network
+        distributed according to :py:class:`arbor.domain_decomposition`, computational resources
+        described by :py:class:`arbor.context` and with a seed value for generating reproducible
+        random numbers (optional, default value: `0`).
 
     **Updating Model State:**
 
diff --git a/modcc/blocks.cpp b/modcc/blocks.cpp
index 8e979ca5..2bc80734 100644
--- a/modcc/blocks.cpp
+++ b/modcc/blocks.cpp
@@ -77,3 +77,10 @@ ARB_LIBMODCC_API std::ostream& operator<<(std::ostream& os, AssignedBlock const&
 
     return os;
 }
+
+ARB_LIBMODCC_API std::ostream& operator<<(std::ostream& os, WhiteNoiseBlock const& W) {
+    os << blue("WhiteNoiseBlock")   << std::endl;
+    os << "  parameters : "  << W.parameters << std::endl;
+
+    return os;
+}
diff --git a/modcc/blocks.hpp b/modcc/blocks.hpp
index ff0f0b13..1e12b542 100644
--- a/modcc/blocks.hpp
+++ b/modcc/blocks.hpp
@@ -4,6 +4,7 @@
 #include <iosfwd>
 #include <string>
 #include <vector>
+#include <set>
 
 #include "identifier.hpp"
 #include "location.hpp"
@@ -149,7 +150,7 @@ struct ParameterBlock {
     }
 };
 
-// information stored in a NEURON {} block in mod file
+// information stored in a ASSIGNED {} block in mod file
 struct AssignedBlock {
     std::vector<Id> parameters;
 
@@ -161,6 +162,19 @@ struct AssignedBlock {
     }
 };
 
+// information stored in a WHITE_NOISE {} block in mod file
+struct WhiteNoiseBlock {
+    std::vector<Id> parameters;
+    std::map<std::string, unsigned int> used;
+
+    auto begin() -> decltype(parameters.begin()) {
+        return parameters.begin();
+    }
+    auto end() -> decltype(parameters.end()) {
+        return parameters.end();
+    }
+};
+
 ////////////////////////////////////////////////
 // helpers for pretty printing block information
 ////////////////////////////////////////////////
@@ -182,3 +196,5 @@ ARB_LIBMODCC_API std::ostream& operator<<(std::ostream& os, UnitsBlock const& U)
 ARB_LIBMODCC_API std::ostream& operator<<(std::ostream& os, ParameterBlock const& P);
 
 ARB_LIBMODCC_API std::ostream& operator<<(std::ostream& os, AssignedBlock const& A);
+
+ARB_LIBMODCC_API std::ostream& operator<<(std::ostream& os, WhiteNoiseBlock const& W);
diff --git a/modcc/expression.cpp b/modcc/expression.cpp
index 8eb9fab2..f0917410 100644
--- a/modcc/expression.cpp
+++ b/modcc/expression.cpp
@@ -11,6 +11,8 @@ inline std::string to_string(symbolKind k) {
             return std::string("indexed variable");
         case symbolKind::local_variable:
             return std::string("local");
+        case symbolKind::white_noise:
+            return std::string("white noise");
         case symbolKind::procedure:
             return std::string("procedure");
         case symbolKind::function:
@@ -76,6 +78,15 @@ std::string LocalVariable::to_string() const {
     return s;
 }
 
+/*******************************************************************************
+  WhiteNoise
+*******************************************************************************/
+
+std::string WhiteNoise::to_string() const {
+    std::string s = blue("White Noise") + " " + yellow(name());
+    return s;
+}
+
 /*******************************************************************************
   IdentifierExpression
 *******************************************************************************/
@@ -1008,6 +1019,9 @@ void Symbol::accept(Visitor *v) {
 void LocalVariable::accept(Visitor *v) {
     v->visit(this);
 }
+void WhiteNoise::accept(Visitor *v) {
+    v->visit(this);
+}
 void IdentifierExpression::accept(Visitor *v) {
     v->visit(this);
 }
diff --git a/modcc/expression.hpp b/modcc/expression.hpp
index d369e7a9..f9ed282c 100644
--- a/modcc/expression.hpp
+++ b/modcc/expression.hpp
@@ -52,6 +52,7 @@ class ARB_LIBMODCC_API PostEventExpression;
 class ARB_LIBMODCC_API APIMethod;
 class ARB_LIBMODCC_API IndexedVariable;
 class ARB_LIBMODCC_API LocalVariable;
+class ARB_LIBMODCC_API WhiteNoise;
 
 using expression_ptr = std::unique_ptr<Expression>;
 using symbol_ptr = std::unique_ptr<Symbol>;
@@ -95,6 +96,7 @@ enum class symbolKind {
     variable,         ///< variable at module scope
     indexed_variable, ///< a variable that is indexed
     local_variable,   ///< variable at local scope
+    white_noise,      ///< white noise variable
 };
 std::string to_string(symbolKind k);
 
@@ -102,6 +104,7 @@ std::string to_string(symbolKind k);
 enum class solverMethod {
     cnexp, // for diagonal linear ODE systems.
     sparse, // for non-diagonal linear ODE systems.
+    stochastic, // for systems of SDEs
     none
 };
 
@@ -111,10 +114,11 @@ enum class solverVariant {
 };
 
 static std::string to_string(solverMethod m) {
-    switch(m) {
-        case solverMethod::cnexp:  return std::string("cnexp");
-        case solverMethod::sparse: return std::string("sparse");
-        case solverMethod::none:   return std::string("none");
+    switch (m) {
+        case solverMethod::cnexp:      return std::string("cnexp");
+        case solverMethod::sparse:     return std::string("sparse");
+        case solverMethod::stochastic: return std::string("stochastic");
+        case solverMethod::none:       return std::string("none");
     }
     return std::string("<error : undefined solverMethod>");
 }
@@ -254,6 +258,7 @@ public :
     virtual APIMethod*            is_api_method()        {return nullptr;}
     virtual IndexedVariable*      is_indexed_variable()  {return nullptr;}
     virtual LocalVariable*        is_local_variable()    {return nullptr;}
+    virtual WhiteNoise*           is_white_noise()       {return nullptr;}
 
 private :
     std::string name_;
@@ -643,6 +648,26 @@ private :
     localVariableKind kind_;
 };
 
+class ARB_LIBMODCC_API WhiteNoise : public Symbol {
+public:
+    WhiteNoise(Location loc, std::string name)
+    :   Symbol(std::move(loc), std::move(name), symbolKind::white_noise)
+    {}
+    
+    ~WhiteNoise() {}
+
+    WhiteNoise* is_white_noise() override {return this; }
+
+    std::string to_string() const override;
+    void accept(Visitor *v) override;
+
+    unsigned int index() const noexcept { return index_; }
+
+    void set_index(unsigned int i) noexcept { index_ = i; }
+
+private:
+    unsigned int index_ = 0;
+};
 
 // a SOLVE statement
 class ARB_LIBMODCC_API SolveExpression : public Expression {
diff --git a/modcc/module.cpp b/modcc/module.cpp
index 7dafa6ed..45fc00d5 100644
--- a/modcc/module.cpp
+++ b/modcc/module.cpp
@@ -152,9 +152,9 @@ bool Module::semantic() {
     // that all symbols are correctly used
     ////////////////////////////////////////////////////////////////////////////
 
-    // first add variables defined in the NEURON, ASSIGNED and PARAMETER
+    // first add variables defined in the NEURON, ASSIGNED, WHITE_NOISE and PARAMETER
     // blocks these symbols have "global" scope, i.e. they are visible to all
-    // functions and procedurs in the mechanism
+    // functions and procedures in the mechanism
     add_variables_to_symbols();
 
     // Helper which iterates over a vector of Symbols, moving them into the
@@ -163,6 +163,7 @@ bool Module::semantic() {
     // is already in the symbol table.
     bool linear_homogeneous = true;
     std::vector<std::string> state_vars;
+    std::vector<std::string> white_noise_vars;
     auto move_symbols = [this] (std::vector<symbol_ptr>& symbol_list) {
         for(auto& symbol: symbol_list) {
             bool is_found = (symbols_.find(symbol->name()) != symbols_.end());
@@ -267,6 +268,13 @@ bool Module::semantic() {
     for (auto& sym: symbols_) {
         Location loc;
 
+        // get white noise variable names
+        auto wn = sym.second->is_white_noise();
+        if (wn) {
+            white_noise_vars.push_back(wn->name());
+            continue;
+        }
+
         auto state = sym.second->is_variable();
         if (!state || !state->is_state()) continue;
         state_vars.push_back(state->name());
@@ -310,6 +318,11 @@ bool Module::semantic() {
 
     for(auto& e : *proc_init->body()) {
         auto solve_expression = e->is_solve_statement();
+        if (!white_noise_vars.empty() && involves_identifier(e, white_noise_vars)) {
+            error("An error occured while compiling the INITIAL block. "
+                  "White noise is not allowed.", e->location());
+            return false;
+        }
         if (solve_expression) {
             // Grab SOLVE statements, put them in `body` after translation.
             std::set<std::string> solved_ids;
@@ -409,22 +422,57 @@ bool Module::semantic() {
             solve_body = linear_rewrite(deriv->body(), state_vars);
         }
 
-        // Calculate linearity and homogeneity of the statements in the derivative block.
+        // Calculate linearity, homogeneity and stochasticity of the statements in the derivative block.
         bool linear = true;
         bool homogeneous = true;
+        bool needs_substitution = false;
         for (auto& s: solve_body->is_block()->statements()) {
+            // loop over declared white noise variables
+            if (!white_noise_vars.empty()) {
+                for (auto const & w : white_noise_vars) {
+                    // check whether a statement contains an expression involving white noise
+                    if (involves_identifier(s, w)) {
+                        // mark the white noise variable as used, and set its index if we see this
+                        // variable for the first time
+                        auto it = white_noise_block_.used.insert(std::make_pair(w,0u));
+                        if (it.second) {
+                            // set white noise lookup index
+                            const unsigned int idx = white_noise_block_.used.size()-1;
+                            symbols_.find(w)->second->is_white_noise()->set_index(idx);
+                            it.first->second = idx;
+                        }
+                    }
+                }
+            }
             if(s->is_assignment() && !state_vars.empty()) {
                 linear_test_result r = linear_test(s->is_assignment()->rhs(), state_vars);
+                if (!s->is_assignment()->lhs()->is_derivative() && !r.is_constant)
+                    needs_substitution = true;
                 linear &= r.is_linear;
                 homogeneous &= r.is_homogeneous;
             }
         }
         linear_homogeneous &= (linear & homogeneous);
 
+        // filter out unused white noise variables from the white noise vector
+        white_noise_vars.clear();
+        for (auto const & w : white_noise_block_.used) {
+            white_noise_vars.push_back(w.first);
+        }
+        bool stochastic = (white_noise_vars.size() > 0u);
+
+        if (stochastic && (solve_expression->method() != solverMethod::stochastic)) {
+            error("SOLVE expression '" + solve_expression->name() + "' involves white noise and can "
+                  "only be solved using the stochastic method", solve_expression->location());
+            return false;
+        }
+
         // Construct solver based on system kind, linearity and solver method.
         std::unique_ptr<SolverVisitorBase> solver;
         switch(solve_expression->method()) {
         case solverMethod::cnexp:
+            if (needs_substitution)
+                warning("Assignments to local variables containing state variables will not be integrated in time");
             solver = std::make_unique<CnexpSolverVisitor>();
             break;
         case solverMethod::sparse: {
@@ -436,6 +484,9 @@ bool Module::semantic() {
             }
             break;
         }
+        case solverMethod::stochastic:
+                solver = std::make_unique<EulerMaruyamaSolverVisitor>(white_noise_vars);
+            break;
         case solverMethod::none:
             if (deriv->kind()==procedureKind::linear) {
                 solver = std::make_unique<LinearSolverVisitor>(state_vars);
@@ -466,7 +517,6 @@ bool Module::semantic() {
             // Copy body into advance_state.
             for (auto& stmt: solve_block->is_block()->statements()) {
                 api_state->body()->statements().push_back(std::move(stmt));
-
             }
         }
         else {
@@ -570,8 +620,7 @@ bool Module::semantic() {
 void Module::add_variables_to_symbols() {
     auto create_variable =
         [this](const Token& token, accessKind a, visibilityKind v, linkageKind l,
-               rangeKind r, bool is_state = false) -> symbol_ptr&
-        {
+               rangeKind r, bool is_state = false) -> symbol_ptr& {
             auto var = new VariableExpression(token.location, token.spelling);
             var->access(a);
             var->visibility(v);
@@ -582,11 +631,10 @@ void Module::add_variables_to_symbols() {
         };
 
     // add indexed variables to the table
-    auto create_indexed_variable = [this]
-        (std::string const& name, sourceKind data_source,
-         accessKind acc, std::string ch, Location loc) -> symbol_ptr&
-    {
-        if(symbols_.count(name)) {
+    auto create_indexed_variable =
+        [this](std::string const& name, sourceKind data_source,
+               accessKind acc, std::string ch, Location loc) -> symbol_ptr& {
+        if (symbols_.count(name)) {
             throw compiler_exception(
                 pprintf("the symbol % already exists", yellow(name)), loc);
         }
@@ -594,6 +642,14 @@ void Module::add_variables_to_symbols() {
             make_symbol<IndexedVariable>(loc, name, data_source, acc, ch);
     };
 
+    auto create_white_noise = [this](Token const & token) -> symbol_ptr& {
+        if (symbols_.count(token.spelling)) {
+            throw compiler_exception(
+                pprintf("the symbol % already exists", yellow(token.spelling)), token.location);
+        }
+        return symbols_[token.spelling] = make_symbol<WhiteNoise>(token.location, token.spelling);
+    };
+
     sourceKind current_kind = kind_==moduleKind::density? sourceKind::current_density: sourceKind::current;
     sourceKind conductance_kind = kind_==moduleKind::density? sourceKind::conductivity: sourceKind::conductance;
 
@@ -789,6 +845,10 @@ void Module::add_variables_to_symbols() {
                 var.location);
         }
     }
+
+    for (const Id& id: white_noise_block_) {
+        create_white_noise(id.token);
+    }
 }
 
 int Module::semantic_func_proc() {
diff --git a/modcc/module.hpp b/modcc/module.hpp
index 8411328f..b75897d5 100644
--- a/modcc/module.hpp
+++ b/modcc/module.hpp
@@ -58,6 +58,9 @@ public:
     // Retrieve list of assigned variable ids.
     AssignedBlock const& assigned_block() const {return assigned_block_;}
 
+    // Retrieve list of white noise ids.
+    WhiteNoiseBlock const& white_noise_block() const {return white_noise_block_;}
+
     // Retrieve list of parameter variable ids.
     ParameterBlock const&  parameter_block()  const {return parameter_block_;}
 
@@ -70,6 +73,7 @@ public:
     void units_block(const UnitsBlock& u) { units_block_ = u; }
     void parameter_block(const ParameterBlock& p) { parameter_block_ = p; }
     void assigned_block(const AssignedBlock& a) { assigned_block_ = a; }
+    void white_noise_block(const WhiteNoiseBlock& w) { white_noise_block_ = w; }
 
     // Add global procedure or function, before semantic pass (called from Parser).
     void add_callable(symbol_ptr callable);
@@ -122,6 +126,7 @@ private:
     UnitsBlock units_block_;
     ParameterBlock parameter_block_;
     AssignedBlock assigned_block_;
+    WhiteNoiseBlock white_noise_block_;
     bool linear_;
     bool post_events_;
 
diff --git a/modcc/parser.cpp b/modcc/parser.cpp
index 965e4946..e7279a92 100644
--- a/modcc/parser.cpp
+++ b/modcc/parser.cpp
@@ -103,6 +103,9 @@ bool Parser::parse() {
         case tok::assigned:
             parse_assigned_block();
             break;
+        case tok::white_noise:
+            parse_white_noise_block();
+            break;
         // INITIAL, KINETIC, DERIVATIVE, PROCEDURE, NET_RECEIVE and BREAKPOINT blocks
         // are all lowered to ProcedureExpression
         case tok::net_receive:
@@ -665,6 +668,66 @@ ass_exit:
     return;
 }
 
+void Parser::parse_white_noise_block() {
+    WhiteNoiseBlock block;
+
+    get_token();
+
+    // assert that the block starts with a curly brace
+    if (token_.type != tok::lbrace) {
+        error(pprintf("WHITE_NOISE block must start with a curly brace {, found '%'", token_.spelling));
+        return;
+    }
+
+    int success = 1;
+
+    // there are no use cases for curly brace in an WHITE_NOISE block, so we don't have to count them
+    get_token();
+    while (token_.type != tok::rbrace && token_.type != tok::eof) {
+        int line = location_.line;
+        std::vector<Token> variables; // we can have more than one variable on a line
+
+        // the first token must be ...
+        if (token_.type != tok::identifier) {
+            success = 0;
+            goto wn_exit;
+        }
+        // read all of the identifiers until we run out of identifiers or reach a new line
+        while (token_.type == tok::identifier && line == location_.line) {
+            variables.push_back(token_);
+            get_token();
+        }
+
+        // there must be no paramters at the end of the line
+        if (line == location_.line && token_.type == tok::lparen) {
+            success = 0;
+            goto wn_exit;
+        }
+        else {
+            for (auto const& t: variables) {
+                block.parameters.push_back(Id(t, "", {}));
+            }
+        }
+    }
+
+    // error if EOF before closing curly brace
+    if (token_.type == tok::eof) {
+        error("WHITE_NOISE block must have closing '}'");
+        goto wn_exit;
+    }
+
+    get_token(); // consume closing brace
+
+    module_->white_noise_block(block);
+
+wn_exit:
+    // only write error message if one hasn't already been logged by the lexer
+    if (!success && status_ == lexerStatus::happy) {
+        error(pprintf("WHITE_NOISE block unexpected symbol '%'", token_.spelling));
+    }
+    return;
+}
+
 // Parse a value (integral or real) with possible preceding unary minus,
 // and return as a string.
 std::string Parser::value_literal() {
@@ -1591,6 +1654,9 @@ expression_ptr Parser::parse_solve() {
         case tok::sparse:
             method = solverMethod::sparse;
             break;
+        case tok::stochastic:
+            method = solverMethod::stochastic;
+            break;
         default:
             goto solve_statement_error;
         }
diff --git a/modcc/parser.hpp b/modcc/parser.hpp
index bf82af8d..89a893c1 100644
--- a/modcc/parser.hpp
+++ b/modcc/parser.hpp
@@ -58,6 +58,7 @@ public:
     void parse_parameter_block();
     void parse_constant_block();
     void parse_assigned_block();
+    void parse_white_noise_block();
     void parse_title();
 
     std::unordered_map<std::string, std::string> constants_map_;
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index fd1ecc15..d86df4fa 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -243,7 +243,7 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
         }
     };
 
-    const auto& [state_ids, global_ids, param_ids] = public_variable_ids(module_);
+    const auto& [state_ids, global_ids, param_ids, white_noise_ids] = public_variable_ids(module_);
     const auto& assigned_ids = module_.assigned_block().parameters;
     out << fmt::format(FMT_COMPILE("#define PPACK_IFACE_BLOCK \\\n"
                                    "[[maybe_unused]] auto  {0}width             = pp->width;\\\n"
@@ -270,6 +270,7 @@ ARB_LIBMODCC_API std::string emit_cpp_source(const Module& module_, const printe
         out << fmt::format("[[maybe_unused]] auto {}{} = pp->globals[{}];\\\n", pp_var_pfx, scalar.name(), global);
         global++;
     }
+    out << fmt::format("[[maybe_unused]] auto const * const * {}random_numbers = pp->random_numbers;\\\n", pp_var_pfx);
     auto param = 0, state = 0;
     for (const auto& array: state_ids) {
         out << fmt::format("[[maybe_unused]] auto* {}{} = pp->state_vars[{}];\\\n", pp_var_pfx, array.name(), state);
@@ -448,6 +449,10 @@ void CPrinter::visit(LocalVariable* sym) {
     out_ << sym->name();
 }
 
+void CPrinter::visit(WhiteNoise* sym) {
+    out_ << fmt::format("{}random_numbers[{}][i_]", pp_var_pfx, sym->index());
+}
+
 void CPrinter::visit(VariableExpression *sym) {
     out_ << fmt::format("{}{}{}", pp_var_pfx, sym->name(), sym->is_range() ? "[i_]": "");
 }
@@ -652,6 +657,12 @@ void SimdPrinter::visit(VariableExpression *sym) {
     EXITM(out_, "variable");
 }
 
+void SimdPrinter::visit(WhiteNoise* sym) {
+    auto index = is_indirect_? "index_": "i_";
+    out_ << fmt::format("simd_cast<simd_value>(indirect({}random_numbers[{}]+{}, simd_width_))",
+        pp_var_pfx, sym->index(), index);
+}
+
 void SimdPrinter::visit(AssignmentExpression* e) {
     ENTERM(out_, "assign");
     if (!e->lhs() || !e->lhs()->is_identifier() || !e->lhs()->is_identifier()->symbol()) {
diff --git a/modcc/printer/cprinter.hpp b/modcc/printer/cprinter.hpp
index 872de3ec..b53c318b 100644
--- a/modcc/printer/cprinter.hpp
+++ b/modcc/printer/cprinter.hpp
@@ -27,6 +27,7 @@ public:
     void visit(IdentifierExpression*) override;
     void visit(VariableExpression*) override;
     void visit(LocalVariable*) override;
+    void visit(WhiteNoise*) override;
 
     // Delegate low-level emits to cexpr_emit:
     void visit(NumberExpression* e) override { cexpr_emit(e, out_, this); }
@@ -83,6 +84,7 @@ public:
     void visit(VariableExpression*) override;
     void visit(LocalVariable*) override;
     void visit(AssignmentExpression*) override;
+    void visit(WhiteNoise*) override;
 
     void visit(NumberExpression* e) override { simd_expr_emit(e, out_, is_indirect_, input_mask_, scalars_, this); } 
     void visit(UnaryExpression* e)  override { simd_expr_emit(e, out_, is_indirect_, input_mask_, scalars_, this); }
diff --git a/modcc/printer/gpuprinter.cpp b/modcc/printer/gpuprinter.cpp
index ac3fe69c..8285683d 100644
--- a/modcc/printer/gpuprinter.cpp
+++ b/modcc/printer/gpuprinter.cpp
@@ -153,7 +153,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
                                    "auto& {0}index_constraints __attribute__((unused)) = params_.index_constraints;\\\n"),
                        pp_var_pfx);
 
-    const auto& [state_ids, global_ids, param_ids] = public_variable_ids(module_);
+    const auto& [state_ids, global_ids, param_ids, white_noise_ids] = public_variable_ids(module_);
     const auto& assigned_ids = module_.assigned_block().parameters;
 
     auto global = 0;
@@ -161,6 +161,7 @@ ARB_LIBMODCC_API std::string emit_gpu_cu_source(const Module& module_, const pri
         out << fmt::format("auto {}{} __attribute__((unused)) = params_.globals[{}];\\\n", pp_var_pfx, scalar.name(), global);
         global++;
     }
+    out << fmt::format("auto const * const * {}random_numbers  __attribute__((unused)) = params_.random_numbers;\\\n", pp_var_pfx);
     auto param = 0, state = 0;
     for (const auto& array: state_ids) {
         out << fmt::format("auto* {}{} __attribute__((unused)) = params_.state_vars[{}];\\\n", pp_var_pfx, array.name(), state);
@@ -535,3 +536,7 @@ void GpuPrinter::visit(CallExpression* e) {
     }
     out_ << ")";
 }
+
+void GpuPrinter::visit(WhiteNoise* sym) {
+    out_ << fmt::format("{}random_numbers[{}][tid_]", pp_var_pfx, sym->index());
+}
diff --git a/modcc/printer/gpuprinter.hpp b/modcc/printer/gpuprinter.hpp
index 58f4ecda..b413adb2 100644
--- a/modcc/printer/gpuprinter.hpp
+++ b/modcc/printer/gpuprinter.hpp
@@ -16,5 +16,6 @@ public:
 
     void visit(CallExpression*) override;
     void visit(VariableExpression*) override;
+    void visit(WhiteNoise*) override;
 };
 
diff --git a/modcc/printer/infoprinter.cpp b/modcc/printer/infoprinter.cpp
index c4211001..596b7b80 100644
--- a/modcc/printer/infoprinter.cpp
+++ b/modcc/printer/infoprinter.cpp
@@ -42,7 +42,7 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op
                        prefix,
                        name);
 
-    const auto& [state_ids, global_ids, param_ids] = public_variable_ids(m);
+    const auto& [state_ids, global_ids, param_ids, white_noise_ids] = public_variable_ids(m);
     const auto& assigned_ids = m.assigned_block().parameters;
     auto fmt_id = [&](const auto& id) {
         auto lo  = id.has_range() ? id.range.first  : lowest;
@@ -59,6 +59,9 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op
            ion.writes_rev_potential(), ion.uses_rev_potential(),
            ion.uses_valence(), ion.verifies_valence(), ion.expected_valence);
     };
+    auto fmt_random_variable = [&](const auto& rv_id) {
+        return fmt::format(FMT_COMPILE("{{ \"{}\", {} }}"), rv_id.first.name(), rv_id.second);
+    };
     auto print_array_head = [&](char const * type, char const * name, auto size) {
         out << "    static " << type;
         if (size) out << " " << name << "[] = {";
@@ -86,6 +89,7 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op
     print_arrays("arb_field_info", "state_vars", fmt_id, state_ids, assigned_ids);
     print_array("arb_field_info", "parameters", fmt_id, param_ids);
     print_array("arb_ion_info", "ions", fmt_ion, m.ion_deps());
+    print_array("arb_random_variable_info", "random_variables", fmt_random_variable, white_noise_ids);
 
     out << fmt::format(FMT_COMPILE("\n"
                                    "    arb_mechanism_type result;\n"
@@ -103,6 +107,8 @@ ARB_LIBMODCC_API std::string build_info_header(const Module& m, const printer_op
                                    "    result.n_state_vars=n_state_vars;\n"
                                    "    result.parameters=parameters;\n"
                                    "    result.n_parameters=n_parameters;\n"
+                                   "    result.random_variables=random_variables;\n"
+                                   "    result.n_random_variables=n_random_variables;\n"
                                    "    return result;\n"
                                    "  }}\n"
                                    "\n"),
diff --git a/modcc/printer/printerutil.cpp b/modcc/printer/printerutil.cpp
index 86a983a7..d54c28b7 100644
--- a/modcc/printer/printerutil.cpp
+++ b/modcc/printer/printerutil.cpp
@@ -76,6 +76,13 @@ ARB_LIBMODCC_API public_variable_ids_t public_variable_ids(const Module& m) {
         }
     }
 
+    for (auto const & id : m.white_noise_block().parameters) {
+        auto it = m.white_noise_block().used.find(id.name());
+        if (it != m.white_noise_block().used.end()) {
+            ids.white_noise_ids.push_back(std::make_pair(id, it->second));
+        }
+    }
+
     return ids;
 }
 
diff --git a/modcc/printer/printerutil.hpp b/modcc/printer/printerutil.hpp
index 5addde91..f0a7ff8e 100644
--- a/modcc/printer/printerutil.hpp
+++ b/modcc/printer/printerutil.hpp
@@ -96,6 +96,7 @@ struct public_variable_ids_t {
     std::vector<Id> state_ids;
     std::vector<Id> global_parameter_ids;
     std::vector<Id> range_parameter_ids;
+    std::vector<std::pair<Id,unsigned int>> white_noise_ids;
 };
 
 // Public module variables by role.
diff --git a/modcc/solvers.cpp b/modcc/solvers.cpp
index 23d89a41..2ddc9fbf 100644
--- a/modcc/solvers.cpp
+++ b/modcc/solvers.cpp
@@ -502,6 +502,169 @@ void SparseSolverVisitor::finalize() {
     BlockRewriterBase::finalize();
 }
 
+// EulerMaruyama solver visitor implementation
+
+void EulerMaruyamaSolverVisitor::visit(AssignmentExpression *e) {
+    auto lhs = e->lhs();
+    auto rhs = e->rhs();
+    auto deriv = lhs->is_derivative();
+
+    if (!deriv) {
+        // add to substitute map if regular assignment involves white noise
+        auto orig = e->clone();
+        auto id = lhs->is_identifier();
+        if (id) {
+            auto expand = substitute(rhs, local_expr_);
+            if (involves_identifier(expand, wvars_))
+                local_expr_[id->spelling()] = std::move(expand);
+            else
+                statements_.push_back(std::move(orig));
+        }
+        else {
+            statements_.push_back(std::move(orig));
+        }
+    }
+    else {
+        // expand the rhs of the derivative with substitute map
+        auto expanded_rhs = substitute(rhs, local_expr_);
+
+        // get the determinsitc part: f
+        Location loc = e->location();
+        auto rhs_deterministic = expanded_rhs->clone();
+        for (auto w : wvars_) {
+            auto zero_expr = make_expression<NumberExpression>(loc, 0.0);
+            rhs_deterministic = substitute(rhs_deterministic, w,  zero_expr);
+        }
+        rhs_deterministic = constant_simplify(rhs_deterministic);
+        f_.push_back(std::move(rhs_deterministic));
+
+        // get the white noise coefficients
+        auto r = linear_test(expanded_rhs, wvars_);
+        if (!r.is_linear) {
+            error({"System is not a valid SDE", e->location()});
+            return;
+        }
+        for (unsigned k=0; k<wvars_.size(); ++k) {
+            auto const & w = wvars_[k];
+            if (r.coef.count(w))
+                L_[k].push_back(std::move(r.coef[w]));
+            else
+                L_[k].push_back(make_expression<IntegerExpression>(loc, 0));
+        }
+        // push back placeholder
+        statements_.push_back(make_expression<IdentifierExpression>(loc, deriv->name()));
+    }
+}
+
+void EulerMaruyamaSolverVisitor::visit(BlockExpression* e) {
+
+    // scaling factor for white noise: s = sqrt(dt)
+    auto dt_expr = make_expression<IdentifierExpression>(Location{}, "dt");
+    auto half_expr = make_expression<NumberExpression>(Location{}, 0.5);
+    auto wscale_expr = binary_expression(Location{}, tok::pow, std::move(dt_expr), std::move(half_expr));
+    auto wscale = make_unique_local_assign(e->scope(), wscale_expr, "s_");
+    wscale_ = wscale.id->is_identifier()->spelling();
+    statements_.push_back(std::move(wscale.local_decl));
+    statements_.push_back(std::move(wscale.assignment));
+
+    // white noise w
+    for (auto const& w : wvars_) {
+        // check if symbol is present
+        auto w_symbol = e->scope()->find(w);
+        if (!w_symbol) {
+            error({"couldn't find symbol", e->location()});
+            return;
+        }
+
+        // scaled white noise w_i = s * w
+        auto wscale = make_expression<IdentifierExpression>(Location{}, wscale_);
+        auto ww = make_expression<IdentifierExpression>(Location{}, w);
+        ww->is_identifier()->symbol(w_symbol);
+        auto w_ = binary_expression(Location{}, tok::times, std::move(wscale), std::move(ww));
+        auto temp_wvar_term = make_unique_local_assign(e->scope(), w_, "w_");
+        auto temp_wvar = temp_wvar_term.id->is_identifier()->spelling();
+        wvars_id_.push_back(temp_wvar);
+        statements_.push_back(std::move(temp_wvar_term.local_decl));
+        statements_.push_back(std::move(temp_wvar_term.assignment));
+    }
+
+    // Do a first pass to extract variables comprising ODE system
+    // lhs; can't really trust 'STATE' block.
+    for (auto& stmt: e->statements()) {
+        if (stmt && stmt->is_assignment() && stmt->is_assignment()->lhs()->is_derivative()) {
+            auto id = stmt->is_assignment()->lhs()->is_derivative();
+            dvars_.push_back(id->name());
+        }
+    }
+
+    BlockRewriterBase::visit(e);
+}
+
+void EulerMaruyamaSolverVisitor::finalize() {
+    if (has_error()) return;
+
+    // filter out unused white noise
+    std::vector<std::string> wvars_new;
+    std::vector<std::string> wvars_id_new;
+    std::vector<std::vector<expression_ptr>> L_new;
+    for (unsigned k=0; k<wvars_.size(); ++k) {
+        bool used = false;
+        for (auto& coef : L_[k]){
+            if (!is_zero(coef)) {
+                used = true;
+                break;
+            }
+        }
+        if (used) {
+            wvars_new.push_back(std::move(wvars_[k]));
+            wvars_id_new.push_back(std::move(wvars_id_[k]));
+            L_new.push_back( std::move(L_[k]) );
+        }
+    }
+    wvars_ = std::move(wvars_new);
+    wvars_id_ = std::move(wvars_id_new);
+    L_ = std::move(L_new);
+
+    // update the state variables
+    for (unsigned i = 0; i < dvars_.size(); ++i) {
+        // find placeholder expression
+        auto placeholder_expr_iter = std::find_if(statements_.begin(), statements_.end(),
+            [name=dvars_[i]](expression_ptr const & e) {
+                return e->is_identifier() && (e->is_identifier()->spelling() == name); });
+        if (placeholder_expr_iter == statements_.end()) {
+            error({"inconsistent ordering of derivative assignments"});
+            return;
+        }
+        Location loc = (*placeholder_expr_iter)->location();
+
+        // deterministic part: rhs = x + f dt
+        auto rhs = binary_expression(loc, tok::plus, 
+            make_expression<IdentifierExpression>(loc, dvars_[i]),
+                binary_expression(loc, tok::times,
+                    f_[i]->clone(),
+                    make_expression<IdentifierExpression>(loc, "dt")));
+
+        // stochastic part: rhs = rhs + Σ_k l_k dW_k
+        for (unsigned k=0; k<wvars_.size(); ++k) {
+            rhs = binary_expression(loc, tok::plus, rhs->clone(),
+                constant_simplify(
+                    binary_expression(loc, tok::times, L_[k][i]->clone(),
+                        make_expression<IdentifierExpression>(loc, wvars_id_[k]))));
+            rhs = constant_simplify(rhs);
+        }
+
+        // update state: x = rhs
+        auto expr = make_expression<AssignmentExpression>(loc,
+            make_expression<IdentifierExpression>(loc, dvars_[i]), std::move(rhs));
+
+        // replace placeholder expression
+        *placeholder_expr_iter = std::move(expr);
+    }
+}
+
+
+// Linear solver vistior implementation
+
 void LinearSolverVisitor::visit(BlockExpression* e) {
     BlockRewriterBase::visit(e);
 }
diff --git a/modcc/solvers.hpp b/modcc/solvers.hpp
index 719ad0c0..15fd498b 100644
--- a/modcc/solvers.hpp
+++ b/modcc/solvers.hpp
@@ -268,3 +268,43 @@ public:
         SolverVisitorBase::reset();
     }
 };
+
+class ARB_LIBMODCC_API EulerMaruyamaSolverVisitor : public SolverVisitorBase {
+protected:
+    // list of white noise names appearing in derivatives on lhs
+    std::vector<std::string> wvars_;
+    std::vector<std::string> wvars_id_;
+
+    // dx = f(x,t) dt + Σ_k l_k dW_k
+    std::vector<expression_ptr> f_;
+    std::vector<std::vector<expression_ptr>> L_;
+    
+    std::string wscale_;
+
+    // Expanded local assignments that need to be substituted in for derivative
+    // calculations.
+    substitute_map local_expr_;
+public:
+    using SolverVisitorBase::visit;
+
+    EulerMaruyamaSolverVisitor() {}
+    EulerMaruyamaSolverVisitor(scope_ptr enclosing): SolverVisitorBase(enclosing) {}
+    EulerMaruyamaSolverVisitor(std::vector<std::string> white_noise_vars)
+    :   wvars_{white_noise_vars},
+        L_{wvars_.size()}
+        //wvar_temp_{wvars_.size()}
+    {}
+
+    virtual void visit(BlockExpression* e) override;
+    virtual void visit(AssignmentExpression *e) override;
+    virtual void reset() override {
+        dvars_.clear();
+        f_.clear();
+        L_.clear();
+        local_expr_.clear();
+        BlockRewriterBase::reset();
+    }
+protected:
+    // Finalise statements list at end of top block visit.
+    virtual void finalize() override;
+};
diff --git a/modcc/symdiff.cpp b/modcc/symdiff.cpp
index 783a3ad0..35fdfc8d 100644
--- a/modcc/symdiff.cpp
+++ b/modcc/symdiff.cpp
@@ -682,7 +682,10 @@ ARB_LIBMODCC_API linear_test_result linear_test(Expression* e, const std::vector
             return res;
         }
         if (!coef) return linear_test_result{};
-        if (!is_zero(coef)) result.coef[id] = std::move(coef);
+        if (!is_zero(coef)){
+            result.coef[id] = std::move(coef);
+            result.is_constant = false;
+        }
         result.constant = substitute(result.constant, id, zero());
     }
 
@@ -714,4 +717,3 @@ done:
 
     return result;
 }
-
diff --git a/modcc/symdiff.hpp b/modcc/symdiff.hpp
index b4387489..08ba3e88 100644
--- a/modcc/symdiff.hpp
+++ b/modcc/symdiff.hpp
@@ -84,6 +84,7 @@ inline expression_ptr substitute(const expression_ptr& e, const substitute_map&
 struct linear_test_result: public error_stack {
     bool is_linear = false;
     bool is_homogeneous = false;
+    bool is_constant = true;
     expression_ptr constant;
     std::map<std::string, expression_ptr> coef;
 
diff --git a/modcc/token.cpp b/modcc/token.cpp
index db338d16..1c94385c 100644
--- a/modcc/token.cpp
+++ b/modcc/token.cpp
@@ -27,6 +27,7 @@ static Keyword keywords[] = {
     {"PARAMETER",   tok::parameter},
     {"CONSTANT",    tok::constant},
     {"ASSIGNED",    tok::assigned},
+    {"WHITE_NOISE", tok::white_noise},
     {"STATE",       tok::state},
     {"BREAKPOINT",  tok::breakpoint},
     {"DERIVATIVE",  tok::derivative},
@@ -64,6 +65,7 @@ static Keyword keywords[] = {
     {"ELSE",        tok::else_stmt},
     {"cnexp",       tok::cnexp},
     {"sparse",      tok::sparse},
+    {"stochastic",  tok::stochastic},
     {"min",         tok::min},
     {"max",         tok::max},
     {"exp",         tok::exp},
@@ -111,6 +113,7 @@ static TokenString token_strings[] = {
     {"PARAMETER",   tok::parameter},
     {"CONSTANT",    tok::constant},
     {"ASSIGNED",    tok::assigned},
+    {"WHITE_NOISE", tok::white_noise},
     {"STATE",       tok::state},
     {"BREAKPOINT",  tok::breakpoint},
     {"DERIVATIVE",  tok::derivative},
diff --git a/modcc/token.hpp b/modcc/token.hpp
index a953ae2e..1e4fa762 100644
--- a/modcc/token.hpp
+++ b/modcc/token.hpp
@@ -53,7 +53,7 @@ enum class tok {
     // block keywoards
     title,
     neuron, units, parameter,
-    constant, assigned, state, breakpoint,
+    constant, assigned, white_noise, state, breakpoint,
     derivative, kinetic, procedure, initial, function, linear,
     net_receive, post_event,
 
@@ -80,6 +80,7 @@ enum class tok {
     // solver methods
     cnexp,
     sparse,
+    stochastic,
 
     conductance,
 
diff --git a/modcc/visitor.hpp b/modcc/visitor.hpp
index e41123ad..72d8eb3d 100644
--- a/modcc/visitor.hpp
+++ b/modcc/visitor.hpp
@@ -17,6 +17,7 @@ public:
     virtual void visit(Expression *e) = 0;
     virtual void visit(Symbol *e)               { visit((Expression*) e); }
     virtual void visit(LocalVariable *e)        { visit((Expression*) e); }
+    virtual void visit(WhiteNoise *e)           { visit((Expression*) e); }
     virtual void visit(IdentifierExpression *e) { visit((Expression*) e); }
     virtual void visit(NumberExpression *e)     { visit((Expression*) e); }
     virtual void visit(IntegerExpression *e)    { visit((NumberExpression*) e); }
@@ -27,7 +28,7 @@ public:
     virtual void visit(ReactionExpression *e)   { visit((Expression*) e); }
     virtual void visit(StoichTermExpression *e) { visit((Expression*) e); }
     virtual void visit(StoichExpression *e)     { visit((Expression*) e); }
-    virtual void visit(CompartmentExpression *e) { visit((Expression*) e); }
+    virtual void visit(CompartmentExpression *e){ visit((Expression*) e); }
     virtual void visit(VariableExpression *e)   { visit((Expression*) e); }
     virtual void visit(IndexedVariable *e)      { visit((Expression*) e); }
     virtual void visit(FunctionExpression *e)   { visit((Expression*) e); }
@@ -38,7 +39,7 @@ public:
     virtual void visit(ProcedureExpression *e)  { visit((Expression*) e); }
     virtual void visit(NetReceiveExpression *e) { visit((ProcedureExpression*) e); }
     virtual void visit(APIMethod *e)            { visit((Expression*) e); }
-    virtual void visit(ConductanceExpression *e) { visit((Expression*) e); }
+    virtual void visit(ConductanceExpression *e){ visit((Expression*) e); }
     virtual void visit(BlockExpression *e)      { visit((Expression*) e); }
     virtual void visit(InitialBlock *e)         { visit((BlockExpression*) e); }
 
@@ -50,7 +51,7 @@ public:
     virtual void visit(SinUnaryExpression *e)   { visit((UnaryExpression*) e); }
 
     virtual void visit(BinaryExpression *e) = 0;
-    virtual void visit(ConditionalExpression *e) {visit((BinaryExpression*) e); }
+    virtual void visit(ConditionalExpression *e){ visit((BinaryExpression*) e); }
     virtual void visit(AssignmentExpression *e) { visit((BinaryExpression*) e); }
     virtual void visit(ConserveExpression *e)   { visit((BinaryExpression*) e); }
     virtual void visit(LinearExpression *e)     { visit((BinaryExpression*) e); }
diff --git a/python/cells.cpp b/python/cells.cpp
index 6baa9498..d79059cf 100644
--- a/python/cells.cpp
+++ b/python/cells.cpp
@@ -630,6 +630,8 @@ void register_cells(pybind11::module& m) {
         .def("check", [](const arb::cable_cell_global_properties& props) {
                 arb::check_global_properties(props);},
                 "Test whether all default parameters and ion species properties have been set.")
+        .def_readwrite("coalesce_synapses",  &arb::cable_cell_global_properties::coalesce_synapses,
+                "Flag for enabling/disabling linear syanpse coalescing.")
         // set cable properties
         .def_property("membrane_potential",
                       [](const arb::cable_cell_global_properties& props) { return props.default_parameters.init_membrane_potential; },
diff --git a/python/simulation.cpp b/python/simulation.cpp
index 03b80bd0..ef3e1868 100644
--- a/python/simulation.cpp
+++ b/python/simulation.cpp
@@ -55,11 +55,11 @@ class simulation_shim {
     std::unordered_map<arb::sampler_association_handle, sampler_callback> sampler_map_;
 
 public:
-    simulation_shim(std::shared_ptr<py_recipe>& rec, const context_shim& ctx, const arb::domain_decomposition& decomp, pyarb_global_ptr global_ptr):
+    simulation_shim(std::shared_ptr<py_recipe>& rec, const context_shim& ctx, const arb::domain_decomposition& decomp, std::uint64_t seed, pyarb_global_ptr global_ptr):
         global_ptr_(global_ptr)
     {
         try {
-            sim_.reset(new arb::simulation(py_recipe_shim(rec), ctx.context, decomp));
+            sim_.reset(new arb::simulation(py_recipe_shim(rec), ctx.context, decomp, seed));
         }
         catch (...) {
             py_reset_and_throw();
@@ -201,11 +201,12 @@ void register_simulation(pybind11::module& m, pyarb_global_ptr global_ptr) {
         .def(pybind11::init(
                  [global_ptr](std::shared_ptr<py_recipe>& rec,
                               const std::shared_ptr<context_shim>& ctx_,
-                              const std::optional<arb::domain_decomposition>& decomp) {
+                              const std::optional<arb::domain_decomposition>& decomp,
+                              std::uint64_t seed) {
                 try {
                     auto ctx = ctx_ ? ctx_ : std::make_shared<context_shim>(arb::make_context());
                     auto dec = decomp.value_or(arb::partition_load_balance(py_recipe_shim(rec), ctx->context));
-                    return new simulation_shim(rec, *ctx, dec, global_ptr);
+                    return new simulation_shim(rec, *ctx, dec, seed, global_ptr);
                 }
                 catch (...) {
                     py_reset_and_throw();
@@ -218,7 +219,8 @@ void register_simulation(pybind11::module& m, pyarb_global_ptr global_ptr) {
             "according to the domain decomposition and computational resources described by a context.",
              "recipe"_a,
              pybind11::arg_v("context", pybind11::none(), "Execution context"),
-             pybind11::arg_v("domains", pybind11::none(), "Domain decomposition"))
+             pybind11::arg_v("domains", pybind11::none(), "Domain decomposition"),
+             pybind11::arg_v("seed", 0u, "Random number generator seed"))
         .def("update", &simulation_shim::update,
              "Rebuild the connection table from recipe::connections_on and the event"
              "generators based on recipe::event_generators.",
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 5b398e61..a3282d0f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,6 +1,9 @@
 find_package(Threads REQUIRED)
 
-add_library(gtest EXCLUDE_FROM_ALL gtest-all.cpp)
+# Using shared gtest library seems to cause problems at test teardown (double free) on certain
+# systems, potentially related to this issue https://github.com/google/googletest/issues/930. Always
+# building a static library works around this problem.
+add_library(gtest STATIC EXCLUDE_FROM_ALL gtest-all.cpp)
 set_target_properties(gtest PROPERTIES CXX_VISIBILITY_PRESET hidden)
 target_include_directories(gtest PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
 target_link_libraries(gtest PUBLIC Threads::Threads)
diff --git a/test/unit-modcc/mod_files/test10.mod b/test/unit-modcc/mod_files/test10.mod
new file mode 100644
index 00000000..8be2e9f6
--- /dev/null
+++ b/test/unit-modcc/mod_files/test10.mod
@@ -0,0 +1,35 @@
+NEURON {
+    SUFFIX SABR
+	GLOBAL alpha, beta
+}
+
+PARAMETER {
+    alpha = 1
+	beta = 0.1
+}
+
+ASSIGNED {}
+
+STATE {
+    F
+    sigma
+}
+
+INITIAL {
+    F=1
+    sigma=0.2
+}
+
+BREAKPOINT {
+    SOLVE state METHOD stochastic
+}
+
+WHITE_NOISE {
+    W Z
+}
+
+DERIVATIVE state {
+    F' = sigma*F^beta*W
+    sigma' = alpha*sigma*Z
+}
+
diff --git a/test/unit-modcc/mod_files/test9.mod b/test/unit-modcc/mod_files/test9.mod
new file mode 100644
index 00000000..af05e09e
--- /dev/null
+++ b/test/unit-modcc/mod_files/test9.mod
@@ -0,0 +1,31 @@
+NEURON {
+    POINT_PROCESS OrnsteinUhlenbeck
+	GLOBAL tau, sigma
+}
+
+PARAMETER {
+    tau = 1000
+	sigma = 0.1
+}
+
+ASSIGNED {}
+
+STATE {
+    y
+}
+
+INITIAL {
+    y=1
+}
+
+BREAKPOINT {
+    SOLVE state METHOD stochastic
+}
+
+WHITE_NOISE {
+    zeta zeta2
+}
+
+DERIVATIVE state {
+    y' = -y/tau + (2*sigma*sigma/tau)^0.5*zeta
+}
diff --git a/test/unit-modcc/test_module.cpp b/test/unit-modcc/test_module.cpp
index 9e9a1db0..67e4b32f 100644
--- a/test/unit-modcc/test_module.cpp
+++ b/test/unit-modcc/test_module.cpp
@@ -19,7 +19,7 @@ TEST(Module, open) {
 
 TEST(Module, ion_deps) {
     Module m(io::read_all(DATADIR "/mod_files/test0.mod"), "test0.mod");
-    EXPECT_NE(m.buffer().size(), 0);
+    EXPECT_NE(m.buffer().size(), 0u);
 
     Parser p(m, false);
     EXPECT_TRUE(p.parse());
@@ -55,7 +55,7 @@ TEST(Module, ion_deps) {
 
 TEST(Module, identifiers) {
     Module m(io::read_all(DATADIR "/mod_files/test0.mod"), "test0.mod");
-    EXPECT_NE(m.buffer().size(), 0);
+    EXPECT_NE(m.buffer().size(), 0u);
 
     Parser p(m, false);
     EXPECT_TRUE(p.parse());
@@ -106,7 +106,7 @@ TEST(Module, linear_mechanisms) {
 TEST(Module, breakpoint) {
     // Test function call in BREAKPOINT block
     Module m(io::read_all(DATADIR "/mod_files/test8.mod"), "test8.mod");
-    EXPECT_NE(m.buffer().size(), 0);
+    EXPECT_NE(m.buffer().size(), 0u);
 
     Parser p(m, false);
     EXPECT_TRUE(p.parse());
@@ -116,7 +116,7 @@ TEST(Module, breakpoint) {
 
 TEST(Module, read_write_ion) {
     Module m(io::read_all(DATADIR "/mod_files/test-rw-ion.mod"), "test-rw-ion.mod");
-    EXPECT_NE(m.buffer().size(), 0);
+    EXPECT_NE(m.buffer().size(), 0u);
 
     Parser p(m, false);
     EXPECT_TRUE(p.parse());
@@ -128,7 +128,7 @@ TEST(Module, read_write_ion) {
 TEST(Module, solver_bug_1893) {
     {
         Module m(io::read_all(DATADIR "/mod_files/bug-1893.mod"), "bug-1893.mod");
-        EXPECT_NE(m.buffer().size(), 0);
+        EXPECT_NE(m.buffer().size(), 0u);
 
         Parser p(m, false);
         EXPECT_TRUE(p.parse());
@@ -136,10 +136,46 @@ TEST(Module, solver_bug_1893) {
     }
     {
         Module m(io::read_all(DATADIR "/mod_files/bug-1893-bad.mod"), "bug-1893.mod");
-        EXPECT_NE(m.buffer().size(), 0);
+        EXPECT_NE(m.buffer().size(), 0u);
 
         Parser p(m, false);
         EXPECT_TRUE(p.parse());
         EXPECT_FALSE(m.semantic());
     }
 }
+
+TEST(Module, stochastic_point) {
+    Module m(io::read_all(DATADIR "/mod_files/test9.mod"), "test9.mod");
+    EXPECT_NE(m.buffer().size(), 0u);
+
+    Parser p(m, false);
+    EXPECT_TRUE(p.parse());
+
+    auto const & wnb = m.white_noise_block();
+
+    EXPECT_EQ(wnb.parameters.size(), 2u);
+    EXPECT_EQ(wnb.used.size(), 0u);
+
+    EXPECT_TRUE(m.semantic());
+
+    EXPECT_EQ(wnb.parameters.size(), 2u);
+    EXPECT_EQ(wnb.used.size(), 1u);
+}
+
+TEST(Module, stochastic_density) {
+    Module m(io::read_all(DATADIR "/mod_files/test10.mod"), "test10.mod");
+    EXPECT_NE(m.buffer().size(), 0u);
+
+    Parser p(m, false);
+    EXPECT_TRUE(p.parse());
+
+    auto const & wnb = m.white_noise_block();
+
+    EXPECT_EQ(wnb.parameters.size(), 2u);
+    EXPECT_EQ(wnb.used.size(), 0u);
+
+    EXPECT_TRUE(m.semantic());
+
+    EXPECT_EQ(wnb.parameters.size(), 2u);
+    EXPECT_EQ(wnb.used.size(), 2u);
+}
diff --git a/test/unit-modcc/test_parser.cpp b/test/unit-modcc/test_parser.cpp
index 18d3e19c..c666a3b4 100644
--- a/test/unit-modcc/test_parser.cpp
+++ b/test/unit-modcc/test_parser.cpp
@@ -731,6 +731,32 @@ TEST(Parser, parse_state_block) {
     }
 }
 
+TEST(Parser, parse_white_noise_block) {
+    const char* white_noise_blocks[] = {
+        "WHITE_NOISE {\n"
+        "    a b c\n"
+        "}",
+        "WHITE_NOISE {\n"
+        "    a\n"
+        "    b c\n"
+        "}",
+        "WHITE_NOISE {\n"
+        "    a b\n"
+        "    c\n"
+        "}"};
+
+    expression_ptr null;
+    for (const auto& text: white_noise_blocks) {
+        Module m(text, text + std::strlen(text), "");
+        Parser p(m, false);
+        p.parse_white_noise_block();
+        EXPECT_EQ(lexerStatus::happy, p.status());
+        verbose_print(null, p, text);
+        EXPECT_EQ(m.white_noise_block().parameters.size(), 3u);
+        EXPECT_EQ(m.white_noise_block().used.size(), 0u);
+    }
+}
+
 static std::vector<IonDep> extract_useion(const std::string& neuron_block) {
     Module m(neuron_block, "dummy");
     Parser p(m, false);
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index d89a72c2..9270ae44 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -39,6 +39,11 @@ set(test_mechanisms
     write_eX
     write_multiple_eX
     write_Xi_Xo
+    mean_reverting_stochastic_process
+    mean_reverting_stochastic_process2
+    mean_reverting_stochastic_density_process
+    mean_reverting_stochastic_density_process2
+    stochastic_volatility
 )
 
 include(${PROJECT_SOURCE_DIR}/mechanisms/BuildModules.cmake)
@@ -138,6 +143,7 @@ set(unit_sources
     test_unique_any.cpp
     test_vector.cpp
     test_version.cpp
+    test_sde.cpp
 
     # unit test driver
     test.cpp
diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp
index ed172ba0..0547b4c1 100644
--- a/test/unit/test_matrix_cpuvsgpu.cpp
+++ b/test/unit/test_matrix_cpuvsgpu.cpp
@@ -123,10 +123,9 @@ TEST(matrix, assemble)
     std::generate(dt.begin(), dt.end(), [&](){return dt_dist(gen);});
 
     // Voltage, current, and conductance values
-    auto result_h = host_array(group_size);
+    auto result_h = host_array(group_size, -64);
     auto x_d = gpu_array(group_size);
-    m_mc.assemble(host_array(dt.begin(), dt.end()), host_array(group_size, -64), host_array(group_size, 10), host_array(group_size, 3));
-    m_mc.solve(result_h);
+    m_mc.solve(result_h, host_array(dt.begin(), dt.end()), host_array(group_size, 10), host_array(group_size, 3));
     m_gpu.assemble(on_gpu(dt), gpu_array(group_size, -64), gpu_array(group_size, 10), gpu_array(group_size, 3));
     m_gpu.solve(x_d);
     auto result_g = on_host(x_d);
diff --git a/test/unit/test_sde.cpp b/test/unit/test_sde.cpp
new file mode 100644
index 00000000..48dd369c
--- /dev/null
+++ b/test/unit/test_sde.cpp
@@ -0,0 +1,973 @@
+
+#include "../gtest.h"
+
+#include <atomic>
+#include <algorithm>
+#include <cmath>
+
+#include <arborio/label_parse.hpp>
+
+#include <arbor/cable_cell.hpp>
+#include <arbor/recipe.hpp>
+#include <arbor/sampling.hpp>
+#include <arbor/simulation.hpp>
+#include <arbor/schedule.hpp>
+#include <arbor/mechanism.hpp>
+#include <arbor/util/any_ptr.hpp>
+#ifdef ARB_GPU_ENABLED
+#include "memory/gpu_wrappers.hpp"
+#endif
+
+#include <arborenv/default_env.hpp>
+
+#include "unit_test_catalogue.hpp"
+#include "../simple_recipes.hpp"
+
+// ============================
+// helper classes and functions
+// ============================
+
+// data storage, can be filled concurrently
+template<typename T>
+struct archive {
+    std::vector<T> data_;
+    std::atomic<std::size_t> index_ = 0u;
+
+    archive(std::size_t n) : data_(n) {}
+
+    archive(const archive& other)
+    : data_{other.data_}
+    , index_{other.index_.load()}
+    {}
+
+    T* claim(std::size_t n) {
+        std::size_t expected, desired;
+        do {
+            expected = index_.load();
+            desired = expected + n;
+        }
+        while (!index_.compare_exchange_weak(expected, desired));
+        return &(data_[expected]);
+    }
+
+    // not thread safe
+    void reset() {
+        std::fill(data_.begin(), data_.end(), T{0});
+        index_.store(0u);
+    }
+
+    std::size_t size() const noexcept { return data_.size(); }
+};
+
+// compute mean and variance online
+// uses Welford-Knuth algorithm for the variance
+struct accumulator {
+    std::size_t n_ = 0;
+    double mean_ = 0;
+    double var_ = 0;
+
+    accumulator& operator()(double sample) {
+        double const delta = sample - mean_;
+        mean_ += delta / (++n_);
+        var_ += delta * (sample - mean_);
+        return *this;
+    }
+
+    std::size_t n() const noexcept { return n_; }
+    double mean() const noexcept { return mean_; }
+    double variance() const noexcept { return n_ > 1 ? var_/(n_-1) : 0; }
+};
+
+// Cumulative distribtion function of normal distribution
+double cdf_0(double x, double mu, double sigma) {
+    return 0.5*(1 + std::erf((x-mu)/(sigma*std::sqrt(2))));
+}
+
+// Supremum of distances of cumulative sample distribution function with respect to
+// normal cumulative distribution function
+template<typename T>
+double cdf_distance(const std::vector<T>& ordered_samples, double mu, double sigma) {
+    const std::size_t n = ordered_samples.size();
+    const double n_inv = 1.0/n;
+    double D_sup = 0;
+    for (std::size_t i=0; i<n; ++i) {
+        const double x = ordered_samples[i];
+        const double F_0 = cdf_0(x, mu, sigma);
+        const double d_u = std::abs((i+1)*n_inv - F_0);
+        const double d_l = std::abs(i*n_inv - F_0);
+        D_sup = std::max(D_sup, d_u);
+        D_sup = std::max(D_sup, d_l);
+    }
+    return D_sup;
+}
+
+// Kolmogorov-Smirnov test
+// Returns true if null hypothesis (samples are normally distributed) can not be rejected at
+// significance level alpha = 5%
+template<typename T>
+bool ks(const std::vector<T>& ordered_samples, double mu, double sigma) {
+    const std::size_t n = ordered_samples.size();
+    const double D_sup = cdf_distance(ordered_samples, mu, sigma);
+    // Kolmogorov statistic K
+    double K = std::sqrt(n)*D_sup;
+    // Critical value for significance level alpha (approximation for n > 35)
+    const double alpha = 0.05;
+    const double K_c = std::sqrt(-0.5*std::log(0.5*alpha));
+    const bool ret = (K < K_c);
+    if (!ret) {
+        std::cout << "ks test failed: "
+            << K << " is not smaller than critical value " << K_c << "\n";
+    }
+    return ret;
+}
+
+// Anderson-Darling test
+// Returns true if null hypothesis (samples are normally distributed) can not be rejected at
+// significance level alpha = 5%
+template<typename T>
+bool ad(const std::vector<T>& ordered_samples, double mu, double sigma) {
+    const std::size_t n = ordered_samples.size();
+    double a_mean = 0;
+    for (std::size_t i=0; i<n; ++i) {
+        const double x = ordered_samples[i];
+        const double a =
+            (2*(i+1)-1)*std::log(cdf_0(x, mu, sigma)) +
+            (2*(n-(i+1))+1)*std::log(1-cdf_0(x, mu, sigma));
+        double const delta = a - a_mean;
+        a_mean += delta / (i+1);
+    }
+    // Anderson-Darling distance
+    const double A2 = -a_mean - n;
+    // Critical value for significance level alpha = 5% (n > 5)
+    const double A2_c = 2.492;
+    const bool ret = (A2 < A2_c);
+    if (!ret) {
+        std::cout << "ad test failed: "
+            << A2 << " is not smaller than critical value " << A2_c << "\n";
+    }
+    return ret;
+}
+
+// Student's t-test
+// Returns true if null hypothesis (sample mean is equal to mu) can not be rejected at
+// significance level alpha = 5%
+bool t_test_mean(double mu, double sample_mean, double sample_variance, std::size_t n) {
+    // t statistic
+    const double t = std::sqrt(n/sample_variance)*(sample_mean - mu);
+    // Critical value for significance level alpha = 5% (n = ∞)
+    const double t_c = 1.960;
+    const bool ret = (t < t_c);
+    if (!ret) {
+        std::cout << "t test failed: "
+            << t << " is not smaller than critical value " << t_c
+            << ", sample_mean = " << sample_mean << ", expected mean " << mu << "\n";
+    }
+    return ret;
+}
+
+// Chi^2 test
+// Returns true if null hypothesis (sample variance is equal to sigma_squared) can not be rejected
+// at significance level alpha = 5%
+bool chi_2_test_variance(double sigma_squared, double sample_mean, double sample_variance, std::size_t n) {
+    // compute statistic following chi squared distribution
+    const double c = (n-1)*sample_variance/sigma_squared;
+    // we assume many samples, so chi squared distribution becomes normal distribution
+    // compute standard normally distributed variable
+    const double c_n = (c-n)/std::sqrt(2*n);
+    // critical value at 5%
+    const double c_n_c = 1.959963984540;
+    const bool ret = ((c_n < c_n_c) && (c_n > -c_n_c));
+    if (!ret) {
+        std::cout << "chi^2 test failed: "
+            << c_n << " is not between critical values (" << -c_n_c << ", " << c_n_c << ")"
+            << ", sample_variance = " << sample_variance
+            << ", expected variance = " << sigma_squared 
+            << ", number of samples = " << n << "\n";
+    }
+    return ret;
+}
+
+// combined statistical tests (assumes data is sorted)
+template<typename T>
+void test_statistics(const std::vector<T>& ordered_samples, double mu, double sigma) {
+    // uniqueness test
+    auto it = std::adjacent_find(ordered_samples.begin(), ordered_samples.end());
+    EXPECT_EQ(it, ordered_samples.end());
+
+    // goodness-of-fit tests (checks whether normally distributed)
+    EXPECT_TRUE(ks(ordered_samples, mu, sigma));
+    EXPECT_TRUE(ad(ordered_samples, mu, sigma));
+
+    // mean and variance tests (assumes normal distribution)
+    accumulator acc;
+    for (auto x : ordered_samples) acc(x);
+    EXPECT_TRUE(t_test_mean(mu, acc.mean(), acc.variance(), acc.n()));
+    EXPECT_TRUE(chi_2_test_variance(sigma*sigma, acc.mean(), acc.variance(), acc.n()));
+}
+
+// ===========================
+// Recipe and global variables
+// ===========================
+
+using namespace arb;
+using namespace arborio::literals;
+
+// forward declaration
+class sde_recipe;
+
+// global variables used in overriden advance methods
+sde_recipe* rec_ptr = nullptr;
+archive<arb_value_type>* archive_ptr = nullptr;
+
+// declaration of overriding advance methods
+void advance_mean_reverting_stochastic_density_process(arb_mechanism_ppack* pp);
+void advance_mean_reverting_stochastic_density_process2(arb_mechanism_ppack* pp);
+void advance_mean_reverting_stochastic_process(arb_mechanism_ppack* pp);
+void advance_mean_reverting_stochastic_process2(arb_mechanism_ppack* pp);
+
+// helper macro for replacing a mechanism's advance method
+#define REPLACE_IMPLEMENTATION(MECH)                                                               \
+{                                                                                                  \
+    auto inst = cell_gprop_.catalogue.instance(arb_backend_kind_cpu, #MECH);                       \
+    advance_ ## MECH = inst.mech->iface_.advance_state;                                            \
+    inst.mech->iface_.advance_state = &::advance_ ## MECH;                                         \
+    cell_gprop_.catalogue.register_implementation(#MECH, std::move(inst.mech));                    \
+}
+
+// This recipe generates simple 1D cells (consisting of 2 segments) and optionally replaces the
+// mechanism's implementation by dispatching to a user defined advance function.
+class sde_recipe: public simple_recipe_base {
+public:
+    sde_recipe(unsigned ncell, unsigned ncvs, label_dict labels, decor dec,
+        bool replace_implementation = true)
+    : simple_recipe_base()
+    , ncell_(ncell) {
+        // add unit test catalogue
+        cell_gprop_.catalogue.import(make_unit_test_catalogue(), "");
+
+        // replace mechanisms' advance methods
+        if (replace_implementation) {
+            rec_ptr = this;
+            REPLACE_IMPLEMENTATION(mean_reverting_stochastic_density_process)
+            REPLACE_IMPLEMENTATION(mean_reverting_stochastic_density_process2)
+            REPLACE_IMPLEMENTATION(mean_reverting_stochastic_process)
+            REPLACE_IMPLEMENTATION(mean_reverting_stochastic_process2)
+        }
+
+        // set cvs explicitly
+        double const cv_size = 1.0;
+        dec.set_default(cv_policy_max_extent(cv_size));
+
+        // generate cells
+        unsigned const n1 = ncvs/2;
+        for (unsigned int i=0; i<ncell_; ++i) {
+            segment_tree tree;
+            tree.append(mnpos, {i*20., 0, 0.0, 4.0}, {i*20., 0, n1*cv_size, 4.0}, 1);
+            tree.append(0, {i*20., 0, ncvs*cv_size, 4.0}, 2);
+            cells_.push_back(cable_cell(morphology(tree), labels, dec));
+        }
+    }
+
+    cell_size_type num_cells() const override { return ncell_; }
+
+    util::unique_any get_cell_description(cell_gid_type gid) const override { return cells_[gid]; }
+
+    cell_kind get_cell_kind(cell_gid_type gid) const override { return cell_kind::cable; }
+
+    std::any get_global_properties(cell_kind) const override { return cell_gprop_; }
+    
+    sde_recipe& add_probe(probe_tag tag, std::any address) {
+        for (unsigned i=0; i<cells_.size(); ++i) {
+            simple_recipe_base::add_probe(i, tag, address);
+        }
+        return *this;
+    }
+
+private:
+    unsigned ncell_;
+    std::vector<cable_cell> cells_;
+
+public:
+    // pointers to original advance methods
+    arb_mechanism_method advance_mean_reverting_stochastic_density_process;
+    arb_mechanism_method advance_mean_reverting_stochastic_density_process2;
+    arb_mechanism_method advance_mean_reverting_stochastic_process;
+    arb_mechanism_method advance_mean_reverting_stochastic_process2;
+};
+
+// generic advance method used for all pertinent mechanisms
+// first argument indicates the number of random variables
+void advance_common(unsigned int n_rv, arb_mechanism_ppack* pp) {
+    const auto width = pp->width;
+    arb_value_type* ptr = archive_ptr->claim(width * n_rv);
+    for (arb_size_type j=0; j<n_rv; ++j) {
+        for (arb_size_type i=0; i<width; ++i) {
+            ptr[j*width+i] = pp->random_numbers[j][i];
+        }
+    }
+}
+
+// overriden advance methods dispatch to common implementation and then to original method
+void advance_mean_reverting_stochastic_density_process(arb_mechanism_ppack* pp) {
+    advance_common(1, pp);
+    rec_ptr->advance_mean_reverting_stochastic_density_process(pp);
+}
+void advance_mean_reverting_stochastic_density_process2(arb_mechanism_ppack* pp) {
+    advance_common(2, pp);
+    rec_ptr->advance_mean_reverting_stochastic_density_process2(pp);
+}
+void advance_mean_reverting_stochastic_process(arb_mechanism_ppack* pp) {
+    advance_common(1, pp);
+    rec_ptr->advance_mean_reverting_stochastic_process(pp);
+}
+void advance_mean_reverting_stochastic_process2(arb_mechanism_ppack* pp) {
+    advance_common(2, pp);
+    rec_ptr->advance_mean_reverting_stochastic_process2(pp);
+}
+
+// =====
+// Tests
+// =====
+
+// generate a label dictionary with locations for synapses
+label_dict make_label_dict(unsigned nsynapse) {
+    label_dict labels;
+    auto t_1 = "(tag 1)"_reg;
+    labels.set("left", t_1);
+    auto t_2 = "(tag 2)"_reg;
+    labels.set("right", t_2);
+    auto locs = *arborio::parse_locset_expression("(uniform (all) 0 " + std::to_string(nsynapse-1) + " 0)");
+    labels.set("locs", locs);
+    return labels;
+}
+
+// Test quality of generated random numbers.
+// In order to cover all situations, we have multiple cells running on multiple threads, using
+// different load balancing strategies. Furthermore, multiple (and duplicated) stochastic point
+// mechanisms and multiple (and duplicated) stochastic density mechanisms are added for each cell.
+// The simulations are run for several time steps, and we sample the relevant random values in each
+// time step.
+// The quality of the random numbers is assesed by checking
+// - consistency
+//     The same simulations with varying number of threads and different partitioning must give the
+//     same random values
+// - uniqueness
+//     The random values must be unique and cannot repeat within a simulation and with respect to
+//     another simulation with a different seed
+// - goodness of fit
+//     We use Kolmogorov-Smirnoff and Anderson-Darling tests to check that we can not reject the
+//     null hypothesis that the values are (standard) normally distributed
+// - mean and variance
+//     Assuming a normal distribution, we check that the mean and the variance are close to the
+//     expected values using a student's t-test and a chi^2 test, respectively.
+TEST(sde, normality) {
+    // simulation parameters
+    unsigned ncells = 4;
+    unsigned nsynapses = 100;
+    unsigned ncvs = 100;
+    double const dt = 0.5;
+    unsigned nsteps = 50;
+
+    // make labels (and locations for synapses)
+    auto labels = make_label_dict(nsynapses);
+
+    // Decorations with a bunch of stochastic processes
+    // Duplicate mechanisms added on purpose in order test generation of unique random values
+    decor dec;
+    dec.paint("(all)"_reg , density("hh"));
+    dec.paint("(all)"_reg , density("mean_reverting_stochastic_density_process"));
+    dec.paint("(all)"_reg , density("mean_reverting_stochastic_density_process/sigma=0.2"));
+    dec.paint("(all)"_reg , density("mean_reverting_stochastic_density_process2"));
+    dec.paint("(all)"_reg , density("mean_reverting_stochastic_density_process2/sigma=0.2"));
+    dec.place(*labels.locset("locs"), synapse("mean_reverting_stochastic_process"), "synapses");
+    dec.place(*labels.locset("locs"), synapse("mean_reverting_stochastic_process"), "synapses");
+    dec.place(*labels.locset("locs"), synapse("mean_reverting_stochastic_process2"), "synapses");
+    dec.place(*labels.locset("locs"), synapse("mean_reverting_stochastic_process2"), "synapses");
+
+    // instantiate recipe
+    sde_recipe rec(ncells, ncvs, labels, dec, true);
+
+    // calculate storage needs
+    // - 2 point processes with 1 random variable each
+    // - 2 point processes with 2 random variables each
+    // - same for density processes
+    std::size_t n_rv_synapses = ncells*nsynapses*(1+1+2+2);
+    std::size_t n_rv_densities = ncells*ncvs*(1+1+2+2);
+    std::size_t n_rv_per_dt = n_rv_synapses + n_rv_densities;
+    std::size_t n_rv = n_rv_per_dt*(nsteps+1);
+
+    // setup storage
+    std::vector<arb_value_type> data;
+    archive<arb_value_type> arch(n_rv);
+    archive_ptr = &arch;
+
+    // Run a bunch of different simulations, with different concurrency and load balancing
+    // Check that generated random numbers are identical
+
+    // single-threaded, one cell per thread
+    {
+        auto context = make_context({1, -1});
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_seed(42);
+        sim.run(nsteps*dt, dt);
+
+        // sort data and store for comparison
+        std::sort(arch.data_.begin(), arch.data_.end());
+        data = arch.data_;
+    }
+
+    // multi-threaded, one cell per thread
+    {
+        auto context = make_context({arbenv::default_concurrency(), -1});
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_seed(42);
+        arch.reset();
+        sim.run(nsteps*dt, dt);
+
+        // sort data
+        std::sort(arch.data_.begin(), arch.data_.end());
+
+        // check for equality
+        EXPECT_TRUE(std::equal(data.begin(), data.end(), arch.data_.begin()));
+    }
+
+    // single-threaded, 2 cells per thread
+    {
+        auto context = make_context({1, -1});
+        partition_hint_map hint_map;
+        partition_hint h;
+        h.cpu_group_size = 2;
+        hint_map[cell_kind::cable] = h;
+        auto decomp = partition_load_balance(rec, context, hint_map);
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_decomposition(decomp)
+            .set_seed(42);
+        arch.reset();
+        sim.run(nsteps*dt, dt);
+
+        // sort data
+        std::sort(arch.data_.begin(), arch.data_.end());
+
+        // check for equality
+        EXPECT_TRUE(std::equal(data.begin(), data.end(), arch.data_.begin()));
+    }
+
+    // multi-threaded, 2 cells per thread
+    {
+        auto context = make_context({arbenv::default_concurrency(), -1});
+        partition_hint_map hint_map;
+        partition_hint h;
+        h.cpu_group_size = 2;
+        hint_map[cell_kind::cable] = h;
+        auto decomp = partition_load_balance(rec, context, hint_map);
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_decomposition(decomp)
+            .set_seed(42);
+        arch.reset();
+        sim.run(nsteps*dt, dt);
+
+        // sort data
+        std::sort(arch.data_.begin(), arch.data_.end());
+
+        // check for equality
+        EXPECT_TRUE(std::equal(data.begin(), data.end(), arch.data_.begin()));
+    }
+
+    // Run another simulation with different seed and check that values are different
+    {
+        auto context = make_context({arbenv::default_concurrency(), -1});
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_seed(0);
+        arch.reset();
+        sim.run(nsteps*dt, dt);
+
+        // sort data
+        std::sort(arch.data_.begin(), arch.data_.end());
+
+        // merge sort into combined data
+        std::vector<arb_value_type> combined(data.size()*2);
+        std::merge(
+            arch.data_.begin(), arch.data_.end(),
+            data.begin(), data.end(),
+            combined.begin());
+
+        // uniqueness test
+        auto it = std::adjacent_find(combined.begin(), combined.end());
+        EXPECT_EQ(it, combined.end());
+    }
+
+    // test statistics
+    test_statistics(data, 0.0, 1.0);
+
+    // test statistics with different seed
+    test_statistics(arch.data_, 0.0, 1.0);
+}
+
+// Test the solver by running the mean reverting process many times.
+// Every synapse in every cell computes the independent evolution of 4 different stochastic
+// processes. The results are evaluated at every time step and accumulated. Moreover, several
+// simulations with different seed values are run which multiply the samples obtained.
+// Quality of results is checked by comparing to expected mean and expected standard deviation
+// (relative error less than 1%).  We could also test the statistics with t test/ chi^2 test for
+// mean and variance - this is possible since the solution of the SDE is normally distributed
+// (linear SDE). However, due to the slow convergence of the variance, the sample size needs to be
+// very large, and the test would take too long to complete.
+TEST(sde, solver) {
+    // simulation parameters
+    unsigned ncells = 4;
+    unsigned nsynapses = 2000;
+    unsigned ncvs = 1;
+    double const dt = 1.0/512; // need relatively small time steps due to low accuracy
+    unsigned nsteps = 100;
+    unsigned nsims = 4;
+
+    // make labels (and locations for synapses)
+    auto labels = make_label_dict(nsynapses);
+
+    // Decorations with a bunch of stochastic processes
+    std::string m1 = "mean_reverting_stochastic_process/kappa=0.1,sigma=0.1";
+    std::string m2 = "mean_reverting_stochastic_process/kappa=0.01,sigma=0.1";
+    std::string m3 = "mean_reverting_stochastic_process/kappa=0.1,sigma=0.05";
+    std::string m4 = "mean_reverting_stochastic_process/kappa=0.01,sigma=0.05";
+    decor dec;
+    dec.place(*labels.locset("locs"), synapse(m1), "m1");
+    dec.place(*labels.locset("locs"), synapse(m2), "m2");
+    dec.place(*labels.locset("locs"), synapse(m3), "m3");
+    dec.place(*labels.locset("locs"), synapse(m4), "m4");
+
+    // a basic sampler: stores result in a vector
+    auto sampler_ = [ncells, nsteps] (std::vector<arb_value_type>& results, unsigned count,
+        probe_metadata pm, std::size_t n, sample_record const * samples) {
+
+        auto* point_info_ptr = arb::util::any_cast<const std::vector<arb::cable_probe_point_info>*>(pm.meta);
+        assert(point_info_ptr != nullptr);
+
+        unsigned n_entities = point_info_ptr->size();
+        assert(n_entities == count);
+
+        unsigned offset = pm.id.gid*(nsteps)*n_entities;
+        unsigned stride = n_entities;
+        assert(n == nsteps);
+        for (std::size_t i = 0; i<n; ++i) {
+            auto* value_range = arb::util::any_cast<const arb::cable_sample_range*>(samples[i].data);
+            assert(value_range);
+            const auto& [lo, hi] = *value_range;
+            assert(n_entities==hi-lo);
+            for (unsigned j = 0; j<n_entities; ++j) {
+                results[offset + stride*i + j] = lo[j];
+            }
+        }
+    };
+
+    // concrete sampler for process m1
+    std::vector<arb_value_type> results_m1(ncells*(nsteps)*(nsynapses));
+    auto sampler_m1 = [&] (probe_metadata pm, std::size_t n, sample_record const * samples) {
+        sampler_(results_m1, nsynapses, pm, n, samples);
+    };
+    // concrete sampler for process m2
+    std::vector<arb_value_type> results_m2(ncells*(nsteps)*(nsynapses));
+    auto sampler_m2 = [&] (probe_metadata pm, std::size_t n, sample_record const * samples) {
+        sampler_(results_m2, nsynapses, pm, n, samples);
+    };
+    // concrete sampler for process m3
+    std::vector<arb_value_type> results_m3(ncells*(nsteps)*(nsynapses));
+    auto sampler_m3 = [&] (probe_metadata pm, std::size_t n, sample_record const * samples) {
+        sampler_(results_m3, nsynapses, pm, n, samples);
+    };
+    // concrete sampler for process m4
+    std::vector<arb_value_type> results_m4(ncells*(nsteps)*(nsynapses));
+    auto sampler_m4 = [&] (probe_metadata pm, std::size_t n, sample_record const * samples) {
+        sampler_(results_m4, nsynapses, pm, n, samples);
+    };
+
+    // instantiate recipe
+    sde_recipe rec(ncells, ncvs, labels, dec, false);
+
+    // add probes
+    rec.add_probe(1, cable_probe_point_state_cell{m1, "S"});
+    rec.add_probe(2, cable_probe_point_state_cell{m2, "S"});
+    rec.add_probe(3, cable_probe_point_state_cell{m3, "S"});
+    rec.add_probe(4, cable_probe_point_state_cell{m4, "S"});
+
+    // results are accumulated for each time step
+    std::vector<accumulator> stats_m1(nsteps);
+    std::vector<accumulator> stats_m2(nsteps);
+    std::vector<accumulator> stats_m3(nsteps);
+    std::vector<accumulator> stats_m4(nsteps);
+
+    // context
+    auto context = make_context({arbenv::default_concurrency(), -1});
+
+    for (unsigned s=0; s<nsims; ++s)
+    {
+        // build a simulation object
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_seed(s);
+
+        // add sampler
+        sim.add_sampler([](cell_member_type pid) { return (pid.index==0); }, regular_schedule(dt),
+            sampler_m1, sampling_policy::exact);
+        sim.add_sampler([](cell_member_type pid) { return (pid.index==1); }, regular_schedule(dt),
+            sampler_m2, sampling_policy::exact);
+        sim.add_sampler([](cell_member_type pid) { return (pid.index==2); }, regular_schedule(dt),
+            sampler_m3, sampling_policy::exact);
+        sim.add_sampler([](cell_member_type pid) { return (pid.index==3); }, regular_schedule(dt),
+            sampler_m4, sampling_policy::exact);
+        
+        // run the simulation
+        sim.run(nsteps*dt, dt);
+
+        // accumulate statistics for sampled data
+        for (unsigned int k=0; k<ncells; ++k){
+            for (unsigned int i=0; i<nsteps; ++i){
+                for (unsigned int j=0; j<(nsynapses); ++j){
+                    stats_m1[i](results_m1[k*(nsteps)*(nsynapses) + i*(nsynapses) + j ]);
+                    stats_m2[i](results_m2[k*(nsteps)*(nsynapses) + i*(nsynapses) + j ]);
+                    stats_m3[i](results_m3[k*(nsteps)*(nsynapses) + i*(nsynapses) + j ]);
+                    stats_m4[i](results_m4[k*(nsteps)*(nsynapses) + i*(nsynapses) + j ]);
+                }
+            }
+        }
+    }
+
+    // analytical solutions
+    auto expected = [](double kappa, double sigma, double t) -> std::pair<double,double> {
+        const double mu = 1.0;
+        const double S_0 = 2.0;
+        return {
+            mu - (mu-S_0)*std::exp(-kappa*t),
+            (sigma*sigma/(2*kappa))*(1.0 - std::exp(-2*kappa*t))
+        };
+    };
+    auto expected_m1 = [&](double t) { return expected(0.1, 0.1, t); };
+    auto expected_m2 = [&](double t) { return expected(0.01, 0.1, t); };
+    auto expected_m3 = [&](double t) { return expected(0.1, 0.05, t); };
+    auto expected_m4 = [&](double t) { return expected(0.01, 0.05, t); };
+
+    auto test = [&] (auto func, const auto& stats) {
+        for (unsigned int i=1; i<nsteps; ++i) {
+            auto [mu, sigma_squared] = func(i*dt);
+            double const mean = stats[i].mean();
+            double const var = stats[i].variance();
+
+            auto relative_error = [](double result, double expected) {
+                return std::abs(result-expected)/expected;
+            };
+
+            EXPECT_LT( relative_error(mean, mu)*100, 1.0 );
+            EXPECT_LT( relative_error(std::sqrt(var), std::sqrt(sigma_squared))*100, 2.0 );
+
+            // using statistcal tests:
+            //std::size_t const n = stats[i].n();
+            //EXPECT_TRUE(t_test_mean(mu, mean, var, n));
+            //EXPECT_TRUE(chi_2_test_variance(sigma_squared, mean, var, n));
+        }
+    };
+    test(expected_m1, stats_m1);
+    test(expected_m2, stats_m2);
+    test(expected_m3, stats_m3);
+    test(expected_m4, stats_m4);
+}
+
+// coupled linear SDE with 2 white noise sources
+TEST(sde, coupled) {
+    // simulation parameters
+    unsigned ncells = 4;
+    unsigned nsynapses = 2000;
+    unsigned ncvs = 1;
+    double const dt = 1.0/512; // need relatively small time steps due to low accuracy
+    unsigned nsteps = 100;
+    unsigned nsims = 4;
+
+    // make labels (and locations for synapses)
+    auto labels = make_label_dict(nsynapses);
+
+    // Decorations
+    std::string m1 = "stochastic_volatility";
+    decor dec;
+    dec.place(*labels.locset("locs"), synapse(m1), "m1");
+
+    // a basic sampler: stores result in a vector
+    auto sampler_ = [ncells, nsteps] (std::vector<arb_value_type>& results, unsigned count,
+        probe_metadata pm, std::size_t n, sample_record const * samples) {
+
+        auto* point_info_ptr = arb::util::any_cast<const std::vector<arb::cable_probe_point_info>*>(pm.meta);
+        assert(point_info_ptr != nullptr);
+
+        unsigned n_entities = point_info_ptr->size();
+        assert(n_entities == count);
+
+        unsigned offset = pm.id.gid*(nsteps)*n_entities;
+        unsigned stride = n_entities;
+        assert(n == nsteps);
+        for (std::size_t i = 0; i<n; ++i) {
+            auto* value_range = arb::util::any_cast<const arb::cable_sample_range*>(samples[i].data);
+            assert(value_range);
+            const auto& [lo, hi] = *value_range;
+            assert(n_entities==hi-lo);
+            for (unsigned j = 0; j<n_entities; ++j) {
+                results[offset + stride*i + j] = lo[j];
+            }
+        }
+    };
+
+    // concrete sampler for P
+    std::vector<arb_value_type> results_P(ncells*(nsteps)*(nsynapses));
+    auto sampler_P = [&] (probe_metadata pm, std::size_t n, sample_record const * samples) {
+        sampler_(results_P, nsynapses, pm, n, samples);
+    };
+    // concrete sampler for sigma
+    std::vector<arb_value_type> results_sigma(ncells*(nsteps)*(nsynapses));
+    auto sampler_sigma = [&] (probe_metadata pm, std::size_t n, sample_record const * samples) {
+        sampler_(results_sigma, nsynapses, pm, n, samples);
+    };
+
+    // instantiate recipe
+    sde_recipe rec(ncells, ncvs, labels, dec, false);
+
+    // add probes
+    rec.add_probe(1, cable_probe_point_state_cell{m1, "P"});
+    rec.add_probe(2, cable_probe_point_state_cell{m1, "sigma"});
+
+    // results are accumulated for each time step
+    std::vector<accumulator> stats_P(nsteps);
+    std::vector<accumulator> stats_sigma(nsteps);
+    std::vector<accumulator> stats_Psigma(nsteps);
+
+    // context
+    auto context = make_context({arbenv::default_concurrency(), -1});
+
+    for (unsigned s=0; s<nsims; ++s)
+    {
+        // build a simulation object
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_seed(s);
+
+        // add sampler
+        sim.add_sampler([](cell_member_type pid) { return (pid.index==0); }, regular_schedule(dt),
+            sampler_P, sampling_policy::exact);
+        sim.add_sampler([](cell_member_type pid) { return (pid.index==1); }, regular_schedule(dt),
+            sampler_sigma, sampling_policy::exact);
+
+        // run the simulation
+        sim.run(nsteps*dt, dt);
+
+        // accumulate statistics for sampled data
+        for (unsigned int k=0; k<ncells; ++k){
+            for (unsigned int i=0; i<nsteps; ++i){
+                for (unsigned int j=0; j<(nsynapses); ++j){
+                    const double P = results_P[k*(nsteps)*(nsynapses) + i*(nsynapses) + j ];
+                    const double sigma = results_sigma[k*(nsteps)*(nsynapses) + i*(nsynapses) + j ];
+                    stats_P[i](P);
+                    stats_sigma[i](sigma);
+                    stats_Psigma[i](P*sigma);
+                }
+            }
+        }
+    }
+
+    // analytical solutions
+    auto expected = [](double t, double mu, double theta, double kappa, double sigma_1,
+        double P0, double sigma0) -> std::array<double,4> {
+        // E[P]                                        = mu*t + P0
+        // E[sigma]                                    = (simga0 - theta)*exp(-kappa*t) + theta
+        // Cov[P,P]         = E[P^2]-E[P]^2            = complicated
+        // Cov[P,sigma]     = E[P*sigma]-E[P]*E[sigma] = 0
+        // Cov[sigma,sigma] = E[sigma^2]-E[sigma]^2    = sigma_1^2*(1-exp(-2*kappa*t)/(2*kappa)
+        return {
+            mu*t + P0,
+            (sigma0 - theta)*std::exp(-kappa*t) + theta,
+            sigma_1*sigma_1*(1.0-std::exp(-2*kappa*t))/(2.0*kappa)
+        };
+    };
+
+    for (unsigned int i=1; i<nsteps; ++i) {
+        auto ex = expected(i*dt, 0.1, 0.1, 0.1, 0.1, 1, 0.2);
+
+        const double E_P = ex[0];
+        const double E_sigma = ex[1];
+        const double Var_sigma = ex[2];
+
+        const double stats_Cov_P_sigma =
+            stats_Psigma[i].mean() - stats_P[i].mean()*stats_sigma[i].mean();
+
+        auto relative_error = [](double result, double expected) {
+            return std::abs(result-expected)/expected;
+        };
+
+        EXPECT_LT( relative_error(stats_P[i].mean(), E_P)*100, 1.0 );
+        EXPECT_LT( relative_error(stats_sigma[i].mean(), E_sigma)*100,  1.0 );
+        EXPECT_LT( relative_error(std::sqrt(stats_sigma[i].variance()),
+            std::sqrt(Var_sigma))*100, 2.0 );
+        EXPECT_LT( stats_Cov_P_sigma, 1.0e-4);
+    }
+}
+
+
+#ifdef ARB_GPU_ENABLED
+
+// forward declaration
+class sde_recipe_gpu;
+
+// global variables used in overriden advance methods
+sde_recipe_gpu* rec_gpu_ptr = nullptr;
+
+// declaration of overriding advance methods
+void advance_mech_cpu(arb_mechanism_ppack* pp);
+void advance_mech_gpu(arb_mechanism_ppack* pp);
+
+class sde_recipe_gpu: public simple_recipe_base {
+public:
+    sde_recipe_gpu(unsigned ncell, unsigned ncvs, label_dict labels, decor dec)
+    : simple_recipe_base()
+    , ncell_(ncell) {
+        // add unit test catalogue
+        cell_gprop_.catalogue.import(make_unit_test_catalogue(), "");
+
+        rec_gpu_ptr = this;
+
+        // replace mechanisms' advance methods
+        auto inst_cpu = cell_gprop_.catalogue.instance(arb_backend_kind_cpu, "mean_reverting_stochastic_process");
+        advance_mech_cpu = inst_cpu.mech->iface_.advance_state;
+        inst_cpu.mech->iface_.advance_state = &::advance_mech_cpu;
+        cell_gprop_.catalogue.register_implementation("mean_reverting_stochastic_process", std::move(inst_cpu.mech));
+
+        auto inst_gpu = cell_gprop_.catalogue.instance(arb_backend_kind_gpu, "mean_reverting_stochastic_process");
+        advance_mech_gpu = inst_gpu.mech->iface_.advance_state;
+        inst_gpu.mech->iface_.advance_state = &::advance_mech_gpu;
+        cell_gprop_.catalogue.register_implementation("mean_reverting_stochastic_process", std::move(inst_gpu.mech));
+
+        // set cvs explicitly
+        double const cv_size = 1.0;
+        dec.set_default(cv_policy_max_extent(cv_size));
+
+        // generate cells
+        unsigned const n1 = ncvs/2;
+        for (unsigned int i=0; i<ncell_; ++i) {
+            segment_tree tree;
+            tree.append(mnpos, {i*20., 0, 0.0, 4.0}, {i*20., 0, n1*cv_size, 4.0}, 1);
+            tree.append(0, {i*20., 0, ncvs*cv_size, 4.0}, 2);
+            cells_.push_back(cable_cell(morphology(tree), labels, dec));
+        }
+    }
+
+    cell_size_type num_cells() const override { return ncell_; }
+
+    util::unique_any get_cell_description(cell_gid_type gid) const override { return cells_[gid]; }
+
+    cell_kind get_cell_kind(cell_gid_type gid) const override { return cell_kind::cable; }
+
+    std::any get_global_properties(cell_kind) const override { return cell_gprop_; }
+
+private:
+    unsigned ncell_;
+    std::vector<cable_cell> cells_;
+
+public:
+    // pointers to original advance methods
+    arb_mechanism_method advance_mech_cpu;
+    arb_mechanism_method advance_mech_gpu;
+};
+
+void advance_mech_cpu(arb_mechanism_ppack* pp) {
+    const auto width = pp->width;
+    arb_value_type* ptr = archive_ptr->claim(width);
+    for (arb_size_type i=0; i<width; ++i) {
+        ptr[i] = pp->random_numbers[0][i];
+    }
+    rec_gpu_ptr->advance_mech_cpu(pp);
+}
+
+void advance_mech_gpu(arb_mechanism_ppack* pp) {
+    const auto width = pp->width;
+    arb_value_type* ptr = archive_ptr->claim(width);
+    // copy gpu pointer
+    arb_value_type const * gpu_ptr;
+    memory::gpu_memcpy_d2h(&gpu_ptr, pp->random_numbers, sizeof(arb_value_type const*));
+    // copy data
+    memory::gpu_memcpy_d2h(ptr, gpu_ptr, width*sizeof(arb_value_type));
+    //tmp.resize(width);
+    rec_gpu_ptr->advance_mech_gpu(pp);
+}
+
+template<class T>
+bool almost_equal(T x, T y, unsigned ulp, T abs_tol = std::numeric_limits<T>::min()) {
+    if (x == y) return true;
+    const auto diff = std::abs(x-y);
+    const auto norm = std::min(std::abs(x) + std::abs(y), std::numeric_limits<T>::max());
+    return (diff < std::max(abs_tol, std::numeric_limits<T>::epsilon() * norm * ulp));
+}
+
+// This test checks that the GPU implementation returns the same random numbers as the CPU version
+TEST(sde, gpu) {
+    // simulation parameters
+    unsigned ncells = 4;
+    unsigned nsynapses = 100;
+    unsigned ncvs = 100;
+    double const dt = 0.5;
+    unsigned nsteps = 50;
+
+    // make labels (and locations for synapses)
+    auto labels = make_label_dict(nsynapses);
+
+    // instantiate recipe
+    decor dec;
+    dec.paint("(all)"_reg , density("hh"));
+    dec.place(*labels.locset("locs"), synapse("mean_reverting_stochastic_process"), "synapses");
+    sde_recipe_gpu rec(ncells, ncvs, labels, dec);
+
+    // calculate storage needs
+    std::size_t n_rv_synapses = ncells*nsynapses;
+    std::size_t n_rv_per_dt = n_rv_synapses;
+    std::size_t n_rv = n_rv_per_dt*(nsteps+1);
+
+    // setup storage
+    archive<arb_value_type> arch_cpu(n_rv);
+    archive<arb_value_type> arch_gpu(n_rv);
+
+    // on CPU
+    {
+        archive_ptr = &arch_cpu;
+        auto context = make_context({1, -1});
+        partition_hint_map hint_map;
+        partition_hint h;
+        h.cpu_group_size = ncells;
+        hint_map[cell_kind::cable] = h;
+        auto decomp = partition_load_balance(rec, context, hint_map);
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_decomposition(decomp)
+            .set_seed(42);
+        sim.run(nsteps*dt, dt);
+    }
+
+    // on GPU
+    {
+        archive_ptr = &arch_gpu;
+        auto context = make_context({1, arbenv::default_gpu()});
+        simulation sim = simulation::create(rec)
+            .set_context(context)
+            .set_seed(42);
+        sim.run(nsteps*dt, dt);
+    }
+
+    // compare values
+    for (std::size_t i=0; i<arch_cpu.size(); ++i) {
+        EXPECT_TRUE(
+            almost_equal(
+                arch_gpu.data_[i],
+                arch_cpu.data_[i],
+                128,
+                4*std::numeric_limits<arb_value_type>::epsilon()
+            )
+        );
+    }
+}
+#endif
diff --git a/test/unit/test_simulation.cpp b/test/unit/test_simulation.cpp
index 0e0caf2e..2a9b7804 100644
--- a/test/unit/test_simulation.cpp
+++ b/test/unit/test_simulation.cpp
@@ -55,6 +55,30 @@ TEST(simulation, null) {
     s.run(0.05, 0.01);
 }
 
+// Test with simulation builder
+TEST(simulation, null_builder) {
+    auto r = null_recipe{};
+    {
+        arb::simulation s = arb::simulation::create(r);
+        s.run(0.05, 0.01);
+    }
+    {
+        arb::simulation s = arb::simulation::create(r).set_seed(42);
+        s.run(0.05, 0.01);
+    }
+    {
+        auto c = arb::make_context();
+        arb::simulation s = arb::simulation::create(r).set_context(c);
+        s.run(0.05, 0.01);
+    }
+    {
+        auto c = arb::make_context();
+        auto d = arb::partition_load_balance(r, c);
+        arb::simulation s = arb::simulation::create(r).set_context(c).set_decomposition(d);
+        s.run(0.05, 0.01);
+    }
+}
+
 TEST(simulation, spike_global_callback) {
     constexpr unsigned n = 5;
     double t_max = 10.;
diff --git a/test/unit/testing/mean_reverting_stochastic_density_process.mod b/test/unit/testing/mean_reverting_stochastic_density_process.mod
new file mode 100644
index 00000000..9d7e86b6
--- /dev/null
+++ b/test/unit/testing/mean_reverting_stochastic_density_process.mod
@@ -0,0 +1,26 @@
+NEURON {
+    SUFFIX mean_reverting_stochastic_density_process
+    GLOBAL kappa, mu, sigma
+}
+
+PARAMETER {
+    kappa = 0.1
+    mu = 1
+    sigma = 0.1
+}
+
+STATE { s }
+
+WHITE_NOISE { w }
+
+BREAKPOINT {
+    SOLVE states METHOD stochastic
+}
+
+INITIAL {
+    s = 2
+}
+
+DERIVATIVE states {
+    s' = kappa*(mu - s) + sigma*w
+}
diff --git a/test/unit/testing/mean_reverting_stochastic_density_process2.mod b/test/unit/testing/mean_reverting_stochastic_density_process2.mod
new file mode 100644
index 00000000..509ef948
--- /dev/null
+++ b/test/unit/testing/mean_reverting_stochastic_density_process2.mod
@@ -0,0 +1,29 @@
+NEURON {
+    SUFFIX mean_reverting_stochastic_density_process2
+	GLOBAL kappa, mu, sigma
+}
+
+PARAMETER {
+    kappa = 0.1
+    mu = 1
+	sigma = 0.1
+}
+
+STATE { x y }
+
+WHITE_NOISE { q b }
+
+BREAKPOINT {
+    SOLVE states METHOD stochastic
+}
+
+INITIAL {
+    x = 2
+    y = 1
+}
+
+DERIVATIVE states {
+    x' = kappa*(mu - x) + sigma*q
+    y' = kappa*(mu - y) + sigma*b
+}
+
diff --git a/test/unit/testing/mean_reverting_stochastic_process.mod b/test/unit/testing/mean_reverting_stochastic_process.mod
new file mode 100644
index 00000000..bb153ef1
--- /dev/null
+++ b/test/unit/testing/mean_reverting_stochastic_process.mod
@@ -0,0 +1,42 @@
+: Mean reverting process (linear SDE)
+: ===================================
+: dS(t) = kappa [mu - S(t)] dt + sigma dW(t)
+: If mu==0 it is called Ornstein-Uhlenbeck process
+:     E[S(t)] = mu - (mu-S_0)*e^(-kappa t)
+:   Var[S(t)] = sigma^2/(2 kappa) [1 - e^(-2 kappa t)]
+: in the limit:
+:     lim t->∞   E[S(t)] = mu
+:     lim t->∞ Var[S(t)] = sigma^2/(2 kappa)
+
+NEURON {
+    POINT_PROCESS mean_reverting_stochastic_process
+    GLOBAL kappa, mu, sigma
+}
+
+PARAMETER {
+    kappa = 0.1
+    mu = 1
+    sigma = 0.1
+}
+
+ASSIGNED {}
+
+STATE {
+    S
+}
+
+INITIAL {
+    S=2
+}
+
+BREAKPOINT {
+    SOLVE state METHOD stochastic
+}
+
+WHITE_NOISE {
+    W
+}
+
+DERIVATIVE state {
+    S' = kappa*(mu - S) + sigma*W
+}
diff --git a/test/unit/testing/mean_reverting_stochastic_process2.mod b/test/unit/testing/mean_reverting_stochastic_process2.mod
new file mode 100644
index 00000000..055b6776
--- /dev/null
+++ b/test/unit/testing/mean_reverting_stochastic_process2.mod
@@ -0,0 +1,36 @@
+NEURON {
+    POINT_PROCESS mean_reverting_stochastic_process2
+    GLOBAL kappa, mu, sigma
+}
+
+PARAMETER {
+    kappa = 0.1
+    mu = 1
+    sigma = 0.1
+}
+
+ASSIGNED {}
+
+STATE {
+    X
+    Y
+}
+
+INITIAL {
+    X=2
+    Y=1
+}
+
+BREAKPOINT {
+    SOLVE state METHOD stochastic
+}
+
+WHITE_NOISE {
+    Q Z
+}
+
+DERIVATIVE state {
+    X' = kappa*(mu - X) + sigma*Q
+    Y' = kappa*(mu - Y) + sigma*Z
+}
+
diff --git a/test/unit/testing/stochastic_volatility.mod b/test/unit/testing/stochastic_volatility.mod
new file mode 100644
index 00000000..a3342e9c
--- /dev/null
+++ b/test/unit/testing/stochastic_volatility.mod
@@ -0,0 +1,42 @@
+: Stochastic Volatility (linear SDE)
+: ===================================
+: P:     log of the price (of a stock or bond)
+: sigma: volatility
+: The observed volatility of the price is not constant but itself a stochastic process.
+
+NEURON {
+    POINT_PROCESS stochastic_volatility
+    GLOBAL mu, theta, kappa, sigma_1
+}
+
+PARAMETER {
+    mu      = 0.1
+    theta   = 0.1
+    kappa   = 0.1
+    sigma_1 = 0.1
+}
+
+ASSIGNED {}
+
+STATE {
+    P sigma
+}
+
+INITIAL {
+    P=1
+    sigma=0.2
+}
+
+BREAKPOINT {
+    SOLVE state METHOD stochastic
+}
+
+WHITE_NOISE {
+    W1 W2
+}
+
+DERIVATIVE state {
+    P'     = mu + sigma*W1
+    sigma' = kappa*(theta-sigma) + sigma_1*W2
+}
+
-- 
GitLab