diff --git a/doc/sampling_api.rst b/doc/sampling_api.rst
index 79fa620dc27b7e428a48c64f0c1abeb874c3f5e4..71984fea560cfede4ac019fd1ca58cc238105368 100644
--- a/doc/sampling_api.rst
+++ b/doc/sampling_api.rst
@@ -273,3 +273,23 @@ accessors.
 probe ids and tuples (*probe handle*, *probe tag*), where the *probe handle*
 is a cell-group specific accessor that allows efficient polling.
 
+
+Batched sampling in ``mc_cell_group``
+-------------------------------------
+
+The ``fvm_multicell`` implementations for CPU and GPU simulation of multi-compartment
+cable neurons perform sampling in a batched manner: when their integration is
+initialized, they take a sequence of ``sample_event`` objects which are used to
+populate an implementation-specific ``multi_event_stream`` that describes for each
+cell the sample times and what to sample over the integration interval.
+
+When an integration step for a cell covers a sample event on that cell, the sample
+is satisfied with the value from the cell state at the beginning of the time step,
+after any postsynaptic spike events have been delivered.
+
+It is the responsibility of the ``mc_cell_group::advance()`` method to create the sample
+events from the entries of its ``sampler_association_map``, and to dispatch the
+sampled values to the sampler callbacks after the integration is complete.
+Given an association tuple (*schedule*, *sampler*, *probe set*) where the *schedule*
+has (non-zero) *n* sample times in the current integration interval, the ``mc_cell_group`` will
+call the *sampler* callback once for probe in *probe set*, with *n* sample values.
diff --git a/modcc/cprinter.cpp b/modcc/cprinter.cpp
index f553a7a2cc6394159e651a834446e860ae2af68d..61d2caf5e342dd9a0ac05c82c4a0cadeefce284e 100644
--- a/modcc/cprinter.cpp
+++ b/modcc/cprinter.cpp
@@ -60,7 +60,7 @@ std::string CPrinter::emit_source() {
     text_.add_line("using const_view = typename base::const_view;");
     text_.add_line("using const_iview = typename base::const_iview;");
     text_.add_line("using ion_type = typename base::ion_type;");
-    text_.add_line("using multi_event_stream = typename base::multi_event_stream;");
+    text_.add_line("using deliverable_event_stream_state = typename base::deliverable_event_stream_state;");
     text_.add_line();
 
     //////////////////////////////////////////////
@@ -331,14 +331,17 @@ std::string CPrinter::emit_source() {
     }
 
     if(override_deliver_events) {
-        text_.add_line("void deliver_events(multi_event_stream& events) override {");
+        text_.add_line("void deliver_events(const deliverable_event_stream_state& events) override {");
         text_.increase_indentation();
         text_.add_line("auto ncell = events.n_streams();");
         text_.add_line("for (size_type c = 0; c<ncell; ++c) {");
         text_.increase_indentation();
-        text_.add_line("for (auto ev: events.marked_events(c)) {");
+
+        text_.add_line("auto begin = events.begin_marked(c);");
+        text_.add_line("auto end = events.end_marked(c);");
+        text_.add_line("for (auto p = begin; p<end; ++p) {");
         text_.increase_indentation();
-        text_.add_line("if (ev.handle.mech_id==mech_id_) net_receive(ev.handle.index, ev.weight);");
+        text_.add_line("if (p->mech_id==mech_id_) net_receive(p->mech_index, p->weight);");
         text_.decrease_indentation();
         text_.add_line("}");
         text_.decrease_indentation();
@@ -403,7 +406,8 @@ void CPrinter::emit_headers() {
     text_.add_line();
     text_.add_line("#include <mechanism.hpp>");
     text_.add_line("#include <algorithms.hpp>");
-    text_.add_line("#include <backends/multicore/multi_event_stream.hpp>");
+    text_.add_line("#include <backends/event.hpp>");
+    text_.add_line("#include <backends/multi_event_stream_state.hpp>");
     text_.add_line("#include <util/pprintf.hpp>");
     text_.add_line();
 }
diff --git a/modcc/cudaprinter.cpp b/modcc/cudaprinter.cpp
index 5f54ff0efeaae0f8e67af43bea854228a5daf24b..1a7cf47b12c3386f3d8730aec6292eab65d74ee5 100644
--- a/modcc/cudaprinter.cpp
+++ b/modcc/cudaprinter.cpp
@@ -41,8 +41,10 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o)
     text_.add_line();
     text_.add_line("#include <mechanism.hpp>");
     text_.add_line("#include <algorithms.hpp>");
+    text_.add_line("#include <backends/event.hpp>");
+    text_.add_line("#include <backends/multi_event_stream_state.hpp>");
+    text_.add_line("#include <backends/gpu/fvm.hpp>");
     text_.add_line("#include <backends/gpu/intrinsics.hpp>");
-    text_.add_line("#include <backends/gpu/multi_event_stream.hpp>");
     text_.add_line("#include <backends/gpu/kernels/reduce_by_key.hpp>");
     text_.add_line("#include <util/pprintf.hpp>");
     text_.add_line();
@@ -51,6 +53,10 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o)
     text_.add_line();
     increase_indentation();
 
+    text_.add_line("// same type as base::deliverable_event_stream_state in class definition");
+    text_.add_line("using deliverable_event_stream_state = multi_event_stream_state<deliverable_event_data>;");
+    text_.add_line();
+
     ////////////////////////////////////////////////////////////
     // generate the parameter pack
     ////////////////////////////////////////////////////////////
@@ -115,7 +121,6 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o)
     text_.add_line("namespace kernels {");
     {
         increase_indentation();
-        text_.add_line("using nest::mc::gpu::multi_event_stream;");
 
         // forward declarations of procedures
         for(auto const &var : m.symbols()) {
@@ -162,7 +167,7 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o)
     text_.add_line("using typename base::const_iview;");
     text_.add_line("using typename base::const_view;");
     text_.add_line("using typename base::ion_type;");
-    text_.add_line("using multi_event_stream = typename base::multi_event_stream;");
+    text_.add_line("using deliverable_event_stream_state = typename base::deliverable_event_stream_state;");
     text_.add_line("using param_pack_type = " + module_name + "_ParamPack<value_type, size_type>;");
 
     //////////////////////////////////////////////
@@ -442,13 +447,13 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o)
                  var.second->is_procedure()->kind()==procedureKind::net_receive)
         {
             // Override `deliver_events`.
-            text_.add_line("void deliver_events(multi_event_stream& events) override {");
+            text_.add_line("void deliver_events(const deliverable_event_stream_state& events) override {");
             text_.increase_indentation();
             text_.add_line("auto ncell = events.n_streams();");
             text_.add_line("constexpr int blockwidth = 128;");
             text_.add_line("int nblock = 1+(ncell-1)/blockwidth;");
             text_.add_line("kernels::deliver_events<value_type, size_type>"
-                           "<<<nblock, blockwidth>>>(param_pack_, mech_id_, events.delivery_data());");
+                           "<<<nblock, blockwidth>>>(param_pack_, mech_id_, events);");
             text_.decrease_indentation();
             text_.add_line("}");
             text_.add_line();
@@ -755,7 +760,7 @@ void CUDAPrinter::visit(ProcedureExpression *e) {
         text_.add_line(       "__global__");
         text_.add_gutter() << "void deliver_events("
                            << module_->name() << "_ParamPack<T,I> params_, "
-                           << "I mech_id, multi_event_stream::span_state state) {";
+                           << "I mech_id, deliverable_event_stream_state state) {";
         text_.add_line();
         increase_indentation();
 
@@ -765,9 +770,11 @@ void CUDAPrinter::visit(ProcedureExpression *e) {
         text_.add_line("if(tid_<ncell_) {");
         increase_indentation();
 
-        text_.add_line("for (auto j = state.span_begin[tid_]; j<state.mark[tid_]; ++j) {");
+        text_.add_line("auto begin = state.ev_data+state.begin_offset[tid_];");
+        text_.add_line("auto end = state.ev_data+state.end_offset[tid_];");
+        text_.add_line("for (auto p = begin; p<end; ++p) {");
         increase_indentation();
-        text_.add_line("if (state.ev_mech_id[j]==mech_id) net_receive<T, I>(params_, state.ev_index[j], state.ev_weight[j]);");
+        text_.add_line("if (p->mech_id==mech_id) net_receive<T, I>(params_, p->mech_index, p->weight);");
         decrease_indentation();
         text_.add_line("}");
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 5bbabc608b4e20f952146c31c0a8697996cf3c0c..4a0c5ac1d3d19c577b7e1f6b9571234e7604a176 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -28,6 +28,7 @@ set(CUDA_SOURCES
     backends/gpu/fvm.cu
     backends/gpu/multi_event_stream.cu
     backends/gpu/fill.cu
+    backends/gpu/kernels/take_samples.cu
 )
 
 # The cell_group_factory acts like an interface between the
diff --git a/src/backends/event.hpp b/src/backends/event.hpp
index 0eae623fbd0b90abf493dfc44e6c53b3d2bc5047..efcb673dbb6c6723d1bf0cbc650f6e1d9cc65658 100644
--- a/src/backends/event.hpp
+++ b/src/backends/event.hpp
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <common_types.hpp>
+#include <backends/fvm_types.hpp>
 
 // Structures for the representation of event delivery targets and
 // staged events.
@@ -8,14 +9,16 @@
 namespace nest {
 namespace mc {
 
+// Post-synaptic spike events
+
 struct target_handle {
     cell_local_size_type mech_id;    // mechanism type identifier (per cell group).
-    cell_local_size_type index;      // instance of the mechanism
+    cell_local_size_type mech_index; // instance of the mechanism
     cell_size_type cell_index;       // which cell (acts as index into e.g. vec_t)
 
     target_handle() {}
-    target_handle(cell_local_size_type mech_id, cell_local_size_type index, cell_size_type cell_index):
-        mech_id(mech_id), index(index), cell_index(cell_index) {}
+    target_handle(cell_local_size_type mech_id, cell_local_size_type mech_index, cell_size_type cell_index):
+        mech_id(mech_id), mech_index(mech_index), cell_index(cell_index) {}
 };
 
 struct deliverable_event {
@@ -25,8 +28,51 @@ struct deliverable_event {
 
     deliverable_event() {}
     deliverable_event(time_type time, target_handle handle, float weight):
-        time(time), handle(handle), weight(weight) {}
+        time(time), handle(handle), weight(weight)
+    {}
+};
+
+// Stream index accessor function for multi_event_stream:
+inline cell_size_type event_index(const deliverable_event& ev) {
+    return ev.handle.cell_index;
+}
+
+// Subset of event information required for mechanism delivery.
+struct deliverable_event_data {
+    cell_local_size_type mech_id;    // same as target_handle::mech_id
+    cell_local_size_type mech_index; // same as target_handle::mech_index
+    float weight;
 };
 
+// Delivery data accessor function for multi_event_stream:
+inline deliverable_event_data event_data(const deliverable_event& ev) {
+    return {ev.handle.mech_id, ev.handle.mech_index, ev.weight};
+}
+
+
+// Sample events (scalar values)
+
+using probe_handle = const fvm_value_type*;
+
+struct raw_probe_info {
+    probe_handle handle;      // where the to-be-probed value sits
+    sample_size_type offset;  // offset into array to store raw probed value
+};
+
+struct sample_event {
+    time_type time;
+    cell_size_type cell_index;  // which cell probe is on
+    raw_probe_info raw;         // event payload: what gets put where on sample
+};
+
+inline raw_probe_info event_data(const sample_event& ev) {
+    return ev.raw;
+}
+
+inline cell_size_type event_index(const sample_event& ev) {
+    return ev.cell_index;
+}
+
+
 } // namespace mc
 } // namespace nest
diff --git a/src/backends/gpu/fvm.hpp b/src/backends/gpu/fvm.hpp
index 28a48ec5c84e05b9d8fe913e2f43fde9045c7301..7002c7aefafb03370911f98e93920cb655c705c2 100644
--- a/src/backends/gpu/fvm.hpp
+++ b/src/backends/gpu/fvm.hpp
@@ -11,6 +11,7 @@
 #include <util/rangeutil.hpp>
 
 #include "kernels/time_ops.hpp"
+#include "kernels/take_samples.hpp"
 #include "matrix_state_interleaved.hpp"
 #include "matrix_state_flat.hpp"
 #include "multi_event_stream.hpp"
@@ -50,13 +51,18 @@ struct backend {
         return "gpu";
     }
 
+    // dereference a probe handle
+    static value_type dereference(probe_handle h) {
+        memory::const_device_reference<value_type> v(h); // h is a device-side pointer
+        return v;
+    }
+
     // matrix back end implementation
     using matrix_state = matrix_state_interleaved<value_type, size_type>;
-    using multi_event_stream = nest::mc::gpu::multi_event_stream;
 
-    // re-expose common backend event types
-    using deliverable_event = nest::mc::deliverable_event;
-    using target_handle = nest::mc::target_handle;
+    // backend-specific multi event streams.
+    using deliverable_event_stream = nest::mc::gpu::multi_event_stream<deliverable_event>;
+    using sample_event_stream = nest::mc::gpu::multi_event_stream<sample_event>;
 
     // mechanism infrastructure
     using ion = mechanisms::ion<backend>;
@@ -124,6 +130,16 @@ struct backend {
         nest::mc::gpu::set_dt<value_type, size_type>(ncell, ncomp, dt_cell.data(), dt_comp.data(), time_to.data(), time.data(), cv_to_cell.data());
     }
 
+    // perform sampling as described by marked events in a sample_event_stream
+    static void take_samples(
+        const sample_event_stream::state& s,
+        const_view time,
+        array& sample_time,
+        array& sample_value)
+    {
+        nest::mc::gpu::take_samples(s, time.data(), sample_time.data(), sample_value.data());
+    }
+
 private:
     using maker_type = mechanism (*)(size_type, const_iview, const_view, const_view, const_view, view, view, array&&, iarray&&);
     static std::map<std::string, maker_type> mech_map_;
diff --git a/src/backends/gpu/kernels/take_samples.cu b/src/backends/gpu/kernels/take_samples.cu
new file mode 100644
index 0000000000000000000000000000000000000000..2145400a058d6d8414064360346f5e64e27f8a7f
--- /dev/null
+++ b/src/backends/gpu/kernels/take_samples.cu
@@ -0,0 +1,48 @@
+#include <common_types.hpp>
+#include <backends/event.hpp>
+#include <backends/fvm_types.hpp>
+#include <backends/multi_event_stream_state.hpp>
+
+namespace nest {
+namespace mc {
+namespace gpu {
+
+namespace kernels {
+    __global__ void take_samples(
+	    multi_event_stream_state<raw_probe_info> s,
+	    const fvm_value_type* time,
+	    fvm_value_type* sample_time,
+	    fvm_value_type* sample_value)
+    {
+        int i = threadIdx.x+blockIdx.x*blockDim.x;
+
+        if (i<s.n) {
+            auto begin = s.ev_data+s.begin_offset[i];
+            auto end = s.ev_data+s.end_offset[i];
+            for (auto p = begin; p!=end; ++p) {
+                sample_time[p->offset] = time[i];
+                sample_value[p->offset] = *p->handle;
+            }
+        }
+    }
+}
+
+void take_samples(
+	const multi_event_stream_state<raw_probe_info>& s,
+	const fvm_value_type* time,
+	fvm_value_type* sample_time,
+	fvm_value_type* sample_value)
+{
+    if (!s.n_streams()) {
+        return;
+    }
+
+    constexpr int blockwidth = 128;
+    int nblock = 1+(s.n_streams()-1)/blockwidth;
+    kernels::take_samples<<<nblock, blockwidth>>>(s, time, sample_time, sample_value);
+}
+
+} // namespace gpu
+} // namespace mc
+} // namespace nest
+
diff --git a/src/backends/gpu/kernels/take_samples.hpp b/src/backends/gpu/kernels/take_samples.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..8fdd0de35a4ff25a167f257f108084ca0231a3de
--- /dev/null
+++ b/src/backends/gpu/kernels/take_samples.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <common_types.hpp>
+#include <backends/event.hpp>
+#include <backends/fvm_types.hpp>
+#include <backends/multi_event_stream_state.hpp>
+
+namespace nest {
+namespace mc {
+namespace gpu {
+
+void take_samples(
+    const multi_event_stream_state<raw_probe_info>& s,
+    const fvm_value_type* time,
+    fvm_value_type* sample_time,
+    fvm_value_type* sample_value);
+
+} // namespace gpu
+} // namespace mc
+} // namespace nest
+
diff --git a/src/backends/gpu/multi_event_stream.cu b/src/backends/gpu/multi_event_stream.cu
index cc19e215c14d191a0e0ac04b579dcad86933025e..f36863c7ac7734598c971d0af1936c7646725353 100644
--- a/src/backends/gpu/multi_event_stream.cu
+++ b/src/backends/gpu/multi_event_stream.cu
@@ -30,6 +30,26 @@ namespace kernels {
         }
     }
 
+    template <typename T, typename I>
+    __global__ void mark_until(
+        I n,
+        I* mark,
+        const I* span_end,
+        const T* ev_time,
+        const T* t_until)
+    {
+        I i = threadIdx.x+blockIdx.x*blockDim.x;
+        if (i<n) {
+            auto t = t_until[i];
+            auto end = span_end[i];
+            auto &m = mark[i];
+
+            while (m!=end && t>ev_time[m]) {
+                ++m;
+            }
+        }
+    }
+
     template <typename T, typename I>
     __global__ void drop_marked_events(
         I n,
@@ -68,74 +88,37 @@ namespace kernels {
     }
 } // namespace kernels
 
-void multi_event_stream::clear() {
+void multi_event_stream_base::clear() {
     memory::fill(span_begin_, 0u);
     memory::fill(span_end_, 0u);
     memory::fill(mark_, 0u);
     n_nonempty_stream_[0] = 0;
 }
 
-void multi_event_stream::init(std::vector<deliverable_event> staged) {
-    if (staged.size()>std::numeric_limits<size_type>::max()) {
-        throw std::range_error("too many events");
-    }
-
-    // Build vectors in host memory here and transfer to the device at end.
-    std::size_t n_ev = staged.size();
-
-    std::vector<size_type> divisions(n_stream_+1, 0);
-    std::vector<size_type> tmp_ev_indices(n_ev);
-    std::vector<value_type> tmp_ev_values(n_ev);
-
-    ev_time_ = array(n_ev);
-    ev_weight_ = array(n_ev);
-    ev_mech_id_ = iarray(n_ev);
-    ev_index_ = iarray(n_ev);
-
-    util::stable_sort_by(staged, [](const deliverable_event& e) { return e.handle.cell_index; });
-
-    // Split out event fields and copy to device.
-    util::assign_by(tmp_ev_values, staged, [](const deliverable_event& e) { return e.weight; });
-    memory::copy(tmp_ev_values, ev_weight_);
-
-    util::assign_by(tmp_ev_values, staged, [](const deliverable_event& e) { return e.time; });
-    memory::copy(tmp_ev_values, ev_time_);
-
-    util::assign_by(tmp_ev_indices, staged, [](const deliverable_event& e) { return e.handle.mech_id; });
-    memory::copy(tmp_ev_indices, ev_mech_id_);
-
-    util::assign_by(tmp_ev_indices, staged, [](const deliverable_event& e) { return e.handle.index; });
-    memory::copy(tmp_ev_indices, ev_index_);
-
-    // Determine divisions by `cell_index` in ev list and copy to device.
-    size_type ev_i = 0;
-    size_type n_nonempty = 0;
-    for (size_type s = 1; s<=n_stream_; ++s) {
-        while (ev_i<n_ev && staged[ev_i].handle.cell_index<s) ++ev_i;
-        divisions[s] = ev_i;
-        n_nonempty += (divisions[s]!=divisions[s-1]);
-    }
+// Designate for processing events `ev` at head of each event stream `i`
+// until `event_time(ev)` > `t_until[i]`.
+void multi_event_stream_base::mark_until_after(const_view t_until) {
+    EXPECTS(n_streams()==util::size(t_until));
 
-    memory::copy(memory::make_view(divisions)(0,n_stream_), span_begin_);
-    memory::copy(memory::make_view(divisions)(1,n_stream_+1), span_end_);
-    memory::copy(span_begin_, mark_);
-    n_nonempty_stream_[0] = n_nonempty;
+    constexpr int blockwidth = 128;
+    int nblock = 1+(n_stream_-1)/blockwidth;
+    kernels::mark_until_after<value_type, size_type><<<nblock, blockwidth>>>(
+        n_stream_, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data());
 }
 
-
 // Designate for processing events `ev` at head of each event stream `i`
-// until `event_time(ev)` > `t_until[i]`.
-void multi_event_stream::mark_until_after(const_view t_until) {
+// while `t_until[i]` > `event_time(ev)`.
+void multi_event_stream_base::mark_until(const_view t_until) {
     EXPECTS(n_streams()==util::size(t_until));
 
     constexpr int blockwidth = 128;
     int nblock = 1+(n_stream_-1)/blockwidth;
-    kernels::mark_until_after<value_type, size_type><<<nblock, blockwidth>>>(
+    kernels::mark_until<value_type, size_type><<<nblock, blockwidth>>>(
         n_stream_, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data());
 }
 
 // Remove marked events from front of each event stream.
-void multi_event_stream::drop_marked_events() {
+void multi_event_stream_base::drop_marked_events() {
     constexpr int blockwidth = 128;
     int nblock = 1+(n_stream_-1)/blockwidth;
     kernels::drop_marked_events<value_type, size_type><<<nblock, blockwidth>>>(
@@ -144,7 +127,7 @@ void multi_event_stream::drop_marked_events() {
 
 // If the head of `i`th event stream exists and has time less than `t_until[i]`, set
 // `t_until[i]` to the event time.
-void multi_event_stream::event_time_if_before(view t_until) {
+void multi_event_stream_base::event_time_if_before(view t_until) {
     constexpr int blockwidth = 128;
     int nblock = 1+(n_stream_-1)/blockwidth;
     kernels::event_time_if_before<value_type, size_type><<<nblock, blockwidth>>>(
diff --git a/src/backends/gpu/multi_event_stream.hpp b/src/backends/gpu/multi_event_stream.hpp
index 9fcce25f17d408da3bbd7a5e1c4e1ee4ac94a52c..6ebe84309e2a4db3cd2927b61adb49c14b1e57e6 100644
--- a/src/backends/gpu/multi_event_stream.hpp
+++ b/src/backends/gpu/multi_event_stream.hpp
@@ -4,17 +4,22 @@
 
 #include <common_types.hpp>
 #include <backends/event.hpp>
+#include <backends/fvm_types.hpp>
+#include <backends/multi_event_stream_state.hpp>
+#include <generic_event.hpp>
 #include <memory/array.hpp>
 #include <memory/copy.hpp>
+#include <util/rangeutil.hpp>
 
 namespace nest {
 namespace mc {
 namespace gpu {
 
-class multi_event_stream {
+// Base class provides common implementations across event types.
+class multi_event_stream_base {
 public:
     using size_type = cell_size_type;
-    using value_type = double;
+    using value_type = fvm_value_type;
 
     using array = memory::device_vector<value_type>;
     using iarray = memory::device_vector<size_type>;
@@ -22,29 +27,20 @@ public:
     using const_view = array::const_view_type;
     using view = array::view_type;
 
-    multi_event_stream() {}
-
-    explicit multi_event_stream(size_type n_stream):
-        n_stream_(n_stream),
-        span_begin_(n_stream),
-        span_end_(n_stream),
-        mark_(n_stream),
-        n_nonempty_stream_(1)
-    {}
-
     size_type n_streams() const { return n_stream_; }
 
     bool empty() const { return n_nonempty_stream_[0]==0; }
 
     void clear();
 
-    // Initialize event streams from a vector of `deliverable_event`.
-    void init(std::vector<deliverable_event> staged);
-
     // Designate for processing events `ev` at head of each event stream `i`
     // until `event_time(ev)` > `t_until[i]`.
     void mark_until_after(const_view t_until);
 
+    // Designate for processing events `ev` at head of each event stream `i`
+    // while `t_until[i]` > `event_time(ev)`.
+    void mark_until(const_view t_until);
+
     // Remove marked events from front of each event stream.
     void drop_marked_events();
 
@@ -52,30 +48,110 @@ public:
     // `t_until[i]` to the event time.
     void event_time_if_before(view t_until);
 
-    // Interface for access by mechanism kernels:
-    struct span_state {
-        size_type n;
-        const size_type* ev_mech_id;
-        const size_type* ev_index;
-        const value_type* ev_weight;
-        const size_type* span_begin;
-        const size_type* mark;
-    };
-
-    span_state delivery_data() const {
-        return {n_stream_, ev_mech_id_.data(), ev_index_.data(), ev_weight_.data(), span_begin_.data(), mark_.data()};
+protected:
+    multi_event_stream_base() {}
+
+    explicit multi_event_stream_base(size_type n_stream):
+        n_stream_(n_stream),
+        span_begin_(n_stream),
+        span_end_(n_stream),
+        mark_(n_stream),
+        n_nonempty_stream_(1)
+    {}
+
+    template <typename Event>
+    void init(std::vector<Event>& staged) {
+        using ::nest::mc::event_time;
+        using ::nest::mc::event_index;
+
+        if (staged.size()>std::numeric_limits<size_type>::max()) {
+            throw std::range_error("too many events");
+        }
+
+        // Sort by index (staged events should already be time-sorted).
+        EXPECTS(util::is_sorted_by(staged, [](const Event& ev) { return event_time(ev); }));
+        util::stable_sort_by(staged, [](const Event& ev) { return event_index(ev); });
+
+        std::size_t n_ev = staged.size();
+        tmp_ev_time_.clear();
+        tmp_ev_time_.reserve(n_ev);
+
+        util::assign_by(tmp_ev_time_, staged, [](const Event& ev) { return event_time(ev); });
+        ev_time_ = array(memory::make_view(tmp_ev_time_));
+
+        // Determine divisions by `event_index` in ev list.
+        tmp_divs_.clear();
+        tmp_divs_.reserve(n_stream_+1);
+
+        size_type n_nonempty = 0;
+        size_type ev_begin_i = 0;
+        size_type ev_i = 0;
+        tmp_divs_.push_back(ev_i);
+        for (size_type s = 0; s<n_stream_; ++s) {
+            while (ev_i<n_ev && event_index(staged[ev_i])<s+1) ++ev_i;
+
+            // Within a subrange of events with the same index, events should
+            // be sorted by time.
+            EXPECTS(std::is_sorted(&tmp_ev_time_[ev_begin_i], &tmp_ev_time_[ev_i]));
+            n_nonempty += (tmp_divs_.back()!=ev_i);
+            tmp_divs_.push_back(ev_i);
+            ev_begin_i = ev_i;
+        }
+
+        EXPECTS(tmp_divs_.size()==n_stream_+1);
+        memory::copy(memory::make_view(tmp_divs_)(0,n_stream_), span_begin_);
+        memory::copy(memory::make_view(tmp_divs_)(1,n_stream_+1), span_end_);
+        memory::copy(span_begin_, mark_);
+        n_nonempty_stream_[0] = n_nonempty;
     }
 
-private:
     size_type n_stream_;
     array ev_time_;
-    array ev_weight_;
-    iarray ev_mech_id_;
-    iarray ev_index_;
     iarray span_begin_;
     iarray span_end_;
     iarray mark_;
     iarray n_nonempty_stream_;
+
+    // Host-side vectors for staging values in init():
+    std::vector<value_type> tmp_ev_time_;
+    std::vector<size_type> tmp_divs_;
+};
+
+template <typename Event>
+class multi_event_stream: public multi_event_stream_base {
+public:
+    using event_data_type = ::nest::mc::event_data_type<Event>;
+    using data_array = memory::device_vector<event_data_type>;
+
+    using state = multi_event_stream_state<event_data_type>;
+
+    multi_event_stream() {}
+
+    explicit multi_event_stream(size_type n_stream):
+        multi_event_stream_base(n_stream) {}
+
+    // Initialize event streams from a vector of events, sorted first by index
+    // and then by time.
+    void init(std::vector<Event> staged) {
+        multi_event_stream_base::init(staged); // reorders `staged` in place.
+
+        tmp_ev_data_.clear();
+        tmp_ev_data_.reserve(staged.size());
+
+        using ::nest::mc::event_data;
+        util::assign_by(tmp_ev_data_, staged, [](const Event& ev) { return event_data(ev); });
+        ev_data_ = data_array(memory::make_view(tmp_ev_data_));
+    }
+
+    state marked_events() const {
+        return {n_stream_, ev_data_.data(), span_begin_.data(), mark_.data()};
+    }
+
+private:
+    data_array ev_data_;
+
+    // Host-side vector for staging event data in init():
+    std::vector<event_data_type> tmp_ev_data_;
 };
 
 } // namespace gpu
diff --git a/src/backends/multi_event_stream_state.hpp b/src/backends/multi_event_stream_state.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..4797d691f6f7a59440e2e544eaae9d2d3a0ca8aa
--- /dev/null
+++ b/src/backends/multi_event_stream_state.hpp
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <common_types.hpp>
+
+// Pointer representation of multi-event stream marked event state,
+// common across CPU and GPU backends.
+
+namespace nest {
+namespace mc {
+
+template <typename EvData>
+struct multi_event_stream_state {
+    using value_type = EvData;
+
+    cell_size_type n;                     // number of streams
+    const value_type* ev_data;            // array of event data items
+    const cell_size_type* begin_offset;   // array of offsets to beginning of marked events
+    const cell_size_type* end_offset;     // array of offsets to end of marked events
+
+    fvm_size_type n_streams() const {
+        return n;
+    }
+
+    const value_type* begin_marked(fvm_size_type i) const {
+        return ev_data+begin_offset[i];
+    }
+
+    const value_type* end_marked(fvm_size_type i) const {
+        return ev_data+end_offset[i];
+    }
+};
+
+} // namespace nest
+} // namespace mc
diff --git a/src/backends/multicore/fvm.hpp b/src/backends/multicore/fvm.hpp
index 4c9e761839f132ec58b9057c2d7be8d079d98d06..8e0364f6c5c862ab409e1e44b971ed11cb331edb 100644
--- a/src/backends/multicore/fvm.hpp
+++ b/src/backends/multicore/fvm.hpp
@@ -49,11 +49,10 @@ struct backend {
     using host_iview  = iview;
 
     using matrix_state = nest::mc::multicore::matrix_state<value_type, size_type>;
-    using multi_event_stream = nest::mc::multicore::multi_event_stream;
 
-    // re-expose common backend event types
-    using deliverable_event = nest::mc::deliverable_event;
-    using target_handle = nest::mc::target_handle;
+    // backend-specific multi event streams.
+    using deliverable_event_stream = nest::mc::multicore::multi_event_stream<deliverable_event>;
+    using sample_event_stream = nest::mc::multicore::multi_event_stream<sample_event>;
 
     //
     // mechanism infrastructure
@@ -88,6 +87,11 @@ struct backend {
         return "cpu";
     }
 
+    // dereference a probe handle
+    static value_type dereference(probe_handle h) {
+        return *h; // just a pointer!
+    }
+
     /// threshold crossing logic
     /// used as part of spike detection back end
     using threshold_watcher =
@@ -127,6 +131,24 @@ struct backend {
         }
     }
 
+    // perform sampling as described by marked events in a sample_event_stream
+    static void take_samples(
+        const sample_event_stream::state& s,
+        const_view time,
+        array& sample_time,
+        array& sample_value)
+    {
+        for (size_type i = 0; i<s.n_streams(); ++i) {
+            auto begin = s.begin_marked(i);
+            auto end = s.end_marked(i);
+
+            for (auto p = begin; p<end; ++p) {
+                sample_time[p->offset] = time[i];
+                sample_value[p->offset] = *p->handle;
+            }
+        }
+    }
+
 private:
     using maker_type = mechanism (*)(value_type, const_iview, const_view, const_view, const_view, view, view, array&&, iarray&&);
     static std::map<std::string, maker_type> mech_map_;
diff --git a/src/backends/multicore/multi_event_stream.hpp b/src/backends/multicore/multi_event_stream.hpp
index 06e334d40c1060b2750d6eae98cee6aac4d37906..18b12742052d46209e7ce2761111f27d53108efc 100644
--- a/src/backends/multicore/multi_event_stream.hpp
+++ b/src/backends/multicore/multi_event_stream.hpp
@@ -8,6 +8,9 @@
 
 #include <common_types.hpp>
 #include <backends/event.hpp>
+#include <backends/multi_event_stream_state.hpp>
+#include <generic_event.hpp>
+#include <algorithms.hpp>
 #include <util/debug.hpp>
 #include <util/range.hpp>
 #include <util/rangeutil.hpp>
@@ -17,83 +20,115 @@ namespace nest {
 namespace mc {
 namespace multicore {
 
+template <typename Event>
 class multi_event_stream {
 public:
     using size_type = cell_size_type;
-    using value_type = double;
+    using event_type = Event;
+
+    using event_time_type = ::nest::mc::event_time_type<Event>;
+    using event_data_type = ::nest::mc::event_data_type<Event>;
+    using event_index_type = ::nest::mc::event_index_type<Event>;
+
+    using state = multi_event_stream_state<event_data_type>;
 
     multi_event_stream() {}
 
     explicit multi_event_stream(size_type n_stream):
-       span_(n_stream), mark_(n_stream) {}
+       span_begin_(n_stream), span_end_(n_stream), mark_(n_stream) {}
 
-    size_type n_streams() const { return span_.size(); }
+    size_type n_streams() const { return span_begin_.size(); }
 
     bool empty() const { return remaining_==0; }
 
     void clear() {
-        ev_.clear();
+        ev_data_.clear();
         remaining_ = 0;
 
-        util::fill(span_, span_type(0u, 0u));
+        util::fill(span_begin_, 0u);
+        util::fill(span_end_, 0u);
         util::fill(mark_, 0u);
     }
 
-    // Initialize event streams from a vector of `deliverable_event`.
-    void init(std::vector<deliverable_event> staged) {
+    // Initialize event streams from a vector of events, sorted by time.
+    void init(std::vector<Event> staged) {
+        using ::nest::mc::event_time;
+        using ::nest::mc::event_index;
+        using ::nest::mc::event_data;
+
         if (staged.size()>std::numeric_limits<size_type>::max()) {
             throw std::range_error("too many events");
         }
 
-        ev_ = std::move(staged);
-        util::stable_sort_by(ev_, [](const deliverable_event& e) { return e.handle.cell_index; });
+        // Sort by index (staged events should already be time-sorted).
+        EXPECTS(util::is_sorted_by(staged, [](const Event& ev) { return event_time(ev); }));
+        util::stable_sort_by(staged, [](const Event& ev) { return event_index(ev); });
+
+        std::size_t n_ev = staged.size();
+        util::assign_by(ev_data_, staged, [](const Event& ev) { return event_data(ev); });
+        util::assign_by(ev_time_, staged, [](const Event& ev) { return event_time(ev); });
+
+        // Determine divisions by `event_index` in ev list.
+        EXPECTS(n_streams() == span_begin_.size());
+        EXPECTS(n_streams() == span_end_.size());
+        EXPECTS(n_streams() == mark_.size());
+
+        size_type ev_begin_i = 0;
+        size_type ev_i = 0;
+        for (size_type s = 0; s<n_streams(); ++s) {
+            while (ev_i<n_ev && event_index(staged[ev_i])<s+1) ++ev_i;
+
+            // Within a subrange of events with the same index, events should
+            // be sorted by time.
+            EXPECTS(std::is_sorted(&ev_time_[ev_begin_i], &ev_time_[ev_i]));
+            mark_[s] = ev_begin_i;
+            span_begin_[s] = ev_begin_i;
+            span_end_[s] = ev_i;
+            ev_begin_i = ev_i;
+        }
 
-        util::fill(span_, span_type(0u, 0u));
-        util::fill(mark_, 0u);
+        remaining_ = n_ev;
+    }
 
-        size_type si = 0;
-        for (size_type ev_i = 0; ev_i<ev_.size(); ++ev_i) {
-            size_type i = ev_[ev_i].handle.cell_index;
-            EXPECTS(i<n_streams());
-
-            if (si<i) {
-                // Moved on to a new cell: make empty spans for any skipped cells.
-                for (size_type j = si+1; j<i; ++j) {
-                    span_[j].second = span_[si].second;
-                }
-                si = i;
-            }
-            // Include event in this cell's span.
-            span_[si].second = ev_i+1;
-        }
+    // Designate for processing events `ev` at head of each event stream `i`
+    // until `event_time(ev)` > `t_until[i]`.
+    template <typename TimeSeq>
+    void mark_until_after(const TimeSeq& t_until) {
+        using ::nest::mc::event_time;
 
-        while (++si<n_streams()) {
-            span_[si].second = span_[si-1].second;
-        }
+        EXPECTS(n_streams()==util::size(t_until));
 
-        for (size_type i = 1u; i<n_streams(); ++i) {
-            mark_[i] = span_[i].first = span_[i-1].second;
-        }
+        // note: operation on each `i` is independent.
+        for (size_type i = 0; i<n_streams(); ++i) {
+            auto end = span_end_[i];
+            auto t = t_until[i];
 
-        remaining_ = ev_.size();
+            auto mark = span_begin_[i];
+            while (mark!=end && !(ev_time_[mark]>t)) {
+                ++mark;
+            }
+            mark_[i] = mark;
+        }
     }
 
     // Designate for processing events `ev` at head of each event stream `i`
-    // until `event_time(ev)` > `t_until[i]`.
+    // while `t_until[i]` > `event_time(ev)`.
     template <typename TimeSeq>
-    void mark_until_after(const TimeSeq& t_until) {
+    void mark_until(const TimeSeq& t_until) {
+        using ::nest::mc::event_time;
+
         EXPECTS(n_streams()==util::size(t_until));
 
         // note: operation on each `i` is independent.
         for (size_type i = 0; i<n_streams(); ++i) {
-            auto& span = span_[i];
-            auto& mark = mark_[i];
-            auto t = t_until[i]; 
+            auto end = span_end_[i];
+            auto t = t_until[i];
 
-            mark = span.first;
-            while (mark!=span.second && !(ev_[mark].time>t)) {
+            auto mark = span_begin_[i];
+            while (mark!=end && t>ev_time_[mark]) {
                 ++mark;
             }
+            mark_[i] = mark;
         }
     }
 
@@ -101,78 +136,73 @@ public:
     void drop_marked_events() {
         // note: operation on each `i` is independent.
         for (size_type i = 0; i<n_streams(); ++i) {
-            remaining_ -= (mark_[i]-span_[i].first);
-            span_[i].first = mark_[i];
+            remaining_ -= (mark_[i]-span_begin_[i]);
+            span_begin_[i] = mark_[i];
         }
     }
 
-    // Return range of marked events on stream `i`.
-    util::range<deliverable_event*> marked_events(size_type i) {
-        return {&ev_[span_[i].first], &ev_[mark_[i]]};
+    // Interface for access to marked events by mechanisms/kernels:
+    state marked_events() const {
+        return {n_streams(), ev_data_.data(), span_begin_.data(), mark_.data()};
     }
 
     // If the head of `i`th event stream exists and has time less than `t_until[i]`, set
     // `t_until[i]` to the event time.
     template <typename TimeSeq>
     void event_time_if_before(TimeSeq& t_until) {
+        using ::nest::mc::event_time;
+
         // note: operation on each `i` is independent.
         for (size_type i = 0; i<n_streams(); ++i) {
-            const auto& span = span_[i];
-            if (span.second==span.first) {
+            if (span_begin_[i]==span_end_[i]) {
                continue;
             }
 
-            auto ev_t = ev_[span.first].time;
+            auto ev_t = ev_time_[span_begin_[i]];
             if (t_until[i]>ev_t) {
                 t_until[i] = ev_t;
             }
         }
     }
 
-    friend std::ostream& operator<<(std::ostream& out, const multi_event_stream& m);
-private:
-    using span_type = std::pair<size_type, size_type>;
-
-    std::vector<deliverable_event> ev_;
-    std::vector<span_type> span_;
-    std::vector<size_type> mark_;
-    size_type remaining_ = 0;
-};
-
-
-inline std::ostream& operator<<(std::ostream& out, const multi_event_stream& m) {
-    auto n = m.n_streams();
+    friend std::ostream& operator<<(std::ostream& out, const multi_event_stream<Event>& m) {
+        auto n_ev = m.ev_data_.size();
+        auto n = m.n_streams();
 
-    out << "\n[";
-    unsigned i = 0;
-    for (unsigned ev_i = 0; ev_i<m.ev_.size(); ++ev_i) {
-        while (m.span_[i].second<=ev_i && i<n) ++i;
-        if (i<n) {
-            out << util::strprintf(" % 7d ", i);
+        out << "\n[";
+        unsigned i = 0;
+        for (unsigned ev_i = 0; ev_i<n_ev; ++ev_i) {
+            while (m.span_end_[i]<=ev_i && i<n) ++i;
+            out << (i<n? util::strprintf(" % 7d ", i): "      ?");
         }
-        else {
-            out << "      ?";
-        }
-    }
-    out << "\n[";
+        out << "]\n[";
 
-    i = 0;
-    for (unsigned ev_i = 0; ev_i<m.ev_.size(); ++ev_i) {
-        while (m.span_[i].second<=ev_i && i<n) ++i;
+        i = 0;
+        for (unsigned ev_i = 0; ev_i<n_ev; ++ev_i) {
+            while (m.span_end_[i]<=ev_i && i<n) ++i;
 
-        bool discarded = i<n && m.span_[i].first>ev_i;
-        bool marked = i<n && m.mark_[i]>ev_i;
+            bool discarded = i<n && m.span_begin_[i]>ev_i;
+            bool marked = i<n && m.mark_[i]>ev_i;
 
-        if (discarded) {
-            out << "        x";
-        }
-        else {
-            out << util::strprintf(" % 7.3f%c", m.ev_[ev_i].time, marked?'*':' ');
+            if (discarded) {
+                out << "        x";
+            }
+            else {
+                out << util::strprintf(" % 7.3f%c", m.ev_time_[ev_i], marked?'*':' ');
+            }
         }
+        out << "]\n";
+        return out;
     }
-    out << "]\n";
-    return out;
-}
+
+private:
+    std::vector<event_time_type> ev_time_;
+    std::vector<size_type> span_begin_;
+    std::vector<size_type> span_end_;
+    std::vector<size_type> mark_;
+    std::vector<event_data_type> ev_data_;
+    size_type remaining_ = 0;
+};
 
 } // namespace multicore
 } // namespace nest
diff --git a/src/common_types.hpp b/src/common_types.hpp
index 9a4530a82835873599791c2c80bc3f12c1ae2ca3..c0499a8eb07e9c693268b9756ba75a2e2434ae9a 100644
--- a/src/common_types.hpp
+++ b/src/common_types.hpp
@@ -59,6 +59,10 @@ using time_type = float;
 
 using probe_tag = int;
 
+// For holding counts and indexes into generated sample data.
+
+using sample_size_type = std::int32_t;
+
 // Enumeration used to indentify the cell type/kind, used by the model to
 // group equal kinds in the same cell group.
 
diff --git a/src/event_queue.hpp b/src/event_queue.hpp
index 113ac319e8efa06e6f0c07d0716db45fd916097e..7f1e8e3f211d7f46b2ceab069eecf5e692cbdcff 100644
--- a/src/event_queue.hpp
+++ b/src/event_queue.hpp
@@ -8,6 +8,7 @@
 #include <utility>
 
 #include "common_types.hpp"
+#include "generic_event.hpp"
 #include "util/meta.hpp"
 #include "util/optional.hpp"
 #include "util/range.hpp"
@@ -18,7 +19,7 @@ namespace mc {
 
 /* Event classes `Event` used with `event_queue` must be move and copy constructible,
  * and either have a public field `time` that returns the time value, or provide an
- * overload of `event_time(const Event&)` which returns this value.
+ * overload of `event_time(const Event&)` which returns this value (see generic_event.hpp).
  *
  * Time values must be well ordered with respect to `operator>`.
  */
@@ -38,34 +39,11 @@ struct postsynaptic_spike_event {
     }
 };
 
-struct sample_event {
-    using size_type = std::uint32_t;
-
-    size_type sampler_index;
-    time_type time;
-};
-
-// Configuration point: define `event_time(ev)` for event objects `ev`
-// that do not have the corresponding `time` member field.
-
-template <typename Event>
-auto event_time(const Event& ev) -> decltype(ev.time) {
-    return ev.time;
-}
-
-namespace impl {
-    using ::nest::mc::event_time;
-
-    // wrap in `impl::` namespace to obtain correct ADL for return type.
-    template <typename Event>
-    using event_time_type = decltype(event_time(std::declval<Event>()));
-}
-
 template <typename Event>
 class event_queue {
 public :
     using value_type = Event;
-    using event_time_type = impl::event_time_type<Event>;
+    using event_time_type = ::nest::mc::event_time_type<Event>;
 
     event_queue() {}
 
diff --git a/src/fvm_multicell.hpp b/src/fvm_multicell.hpp
index f74298e2f3cb388c72061983c3c86a809e605c3f..dad507944ed787b7cca37fc66affefa6fdfe038f 100644
--- a/src/fvm_multicell.hpp
+++ b/src/fvm_multicell.hpp
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include <algorithms.hpp>
+#include <backends/event.hpp>
 #include <backends/fvm_types.hpp>
 #include <cell.hpp>
 #include <compartment.hpp>
@@ -61,10 +62,17 @@ public:
     /// the container used for indexes
     using iarray = typename backend::iarray;
 
-    using probe_handle = std::pair<const array fvm_multicell::*, size_type>;
+    using view = typename array::view_type;
+    using const_view = typename array::const_view_type;
 
-    using target_handle = typename backend::target_handle;
-    using deliverable_event = typename backend::deliverable_event;
+    /// the type (view or copy) for a const host-side view of an array
+    using host_view = decltype(memory::on_host(std::declval<array>()));
+
+    // handles and events are currently common across implementations;
+    // re-expose definitions from `backends/event.hpp`.
+    using target_handle = ::nest::mc::target_handle;
+    using probe_handle = ::nest::mc::probe_handle;
+    using deliverable_event = ::nest::mc::deliverable_event;
 
     fvm_multicell() = default;
 
@@ -85,17 +93,24 @@ public:
 
     void reset();
 
-    // fvm_multicell::deliver_event is used only for testing
+    // fvm_multicell::deliver_event is used only for testing.
     void deliver_event(target_handle h, value_type weight) {
-        mechanisms_[h.mech_id]->net_receive(h.index, weight);
+        mechanisms_[h.mech_id]->net_receive(h.mech_index, weight);
     }
 
+    // fvm_multicell::probe is used only for testing.
     value_type probe(probe_handle h) const {
-        return (this->*h.first)[h.second];
+        return backend::dereference(h); // h is a pointer, but might be device-side.
     }
 
     // Initialize state prior to a sequence of integration steps.
-    void setup_integration(value_type tfinal, value_type dt_max) {
+    // `staged_events` and `staged_samples` are expected to be
+    // sorted by event time.
+    void setup_integration(
+        value_type tfinal, value_type dt_max,
+        const std::vector<deliverable_event>& staged_events,
+        const std::vector<sample_event>& staged_samples)
+    {
         EXPECTS(dt_max>0);
 
         tfinal_ = tfinal;
@@ -105,8 +120,16 @@ public:
 
         EXPECTS(!has_pending_events());
 
-        events_->init(std::move(staged_events_));
-        staged_events_.clear();
+        n_samples_ = staged_samples.size();
+
+        events_.init(staged_events);
+        sample_events_.init(staged_samples);
+
+        // Reallocate sample buffers if necessary.
+        if (sample_value_.size()<n_samples_) {
+            sample_value_ = array(n_samples_);
+            sample_time_ = array(n_samples_);
+        }
     }
 
     // Advance one integration step.
@@ -117,6 +140,19 @@ public:
         return min_remaining_steps_==0;
     }
 
+    // Access to sample data post-integration.
+    decltype(memory::make_const_view(std::declval<host_view>())) sample_value() const {
+        EXPECTS(sample_events_.empty());
+        host_sample_value_ = memory::on_host(sample_value_);
+        return host_sample_value_;
+    }
+
+    decltype(memory::make_const_view(std::declval<host_view>())) sample_time() const {
+        EXPECTS(sample_events_.empty());
+        host_sample_time_ = memory::on_host(sample_time_);
+        return host_sample_time_;
+    }
+
     // Query per-cell time state.
     // Placeholder: external time queries will no longer be required when
     // complete integration loop is in lowered cell.
@@ -149,12 +185,6 @@ public:
         invalidate_time_cache();
     }
 
-    /// Add an event for processing in next integration stage.
-    void add_event(value_type ev_time, target_handle h, value_type weight) {
-        EXPECTS(integration_complete());
-        staged_events_.push_back(deliverable_event(ev_time, h, weight));
-    }
-
     /// Following types and methods are public only for testing:
 
     /// the type used to store matrix information
@@ -173,11 +203,6 @@ public:
     using iview = typename backend::iview;
     using const_iview = typename backend::const_iview;
 
-    /// view into value container
-    using view = typename backend::view;
-    using const_view = typename backend::const_view;
-
-    /// which requires const_view in the vector library
     const matrix_type& jacobian() { return matrix_; }
 
     /// return list of CV areas in :
@@ -287,17 +312,26 @@ private:
         }
     }
 
-    /// events staged for upcoming integration stage
-    std::vector<deliverable_event> staged_events_;
-
     /// event queue for integration period
-    using multi_event_stream = typename backend::multi_event_stream;
-    std::unique_ptr<multi_event_stream> events_;
+    using deliverable_event_stream = typename backend::deliverable_event_stream;
+    deliverable_event_stream events_;
 
     bool has_pending_events() const {
-        return events_ && !events_->empty();
+        return !events_.empty();
     }
 
+    /// sample events for integration period
+    using sample_event_stream = typename backend::sample_event_stream;
+    sample_event_stream sample_events_;
+
+    /// sample buffers
+    size_type n_samples_ = 0;
+    array sample_value_;
+    array sample_time_;
+
+    mutable host_view host_sample_value_; // host-side views/copies of sample data
+    mutable host_view host_sample_time_;
+
     /// the linear system for implicit time stepping of cell state
     matrix_type matrix_;
 
@@ -592,7 +626,8 @@ void fvm_multicell<Backend>::initialize(
     std::vector<size_type> group_parent_index(ncomp);
 
     // setup per-cell event stores.
-    events_ = util::make_unique<multi_event_stream>(ncell_);
+    events_ = deliverable_event_stream(ncell_);
+    sample_events_ = sample_event_stream(ncell_);
 
     // Create each cell:
 
@@ -712,10 +747,10 @@ void fvm_multicell<Backend>::initialize(
 
             switch (where.kind) {
             case cell_probe_address::membrane_voltage:
-                handle = {&fvm_multicell::voltage_, comp};
+                handle = fvm_multicell::voltage_.data()+comp;
                 break;
             case cell_probe_address::membrane_current:
-                handle = {&fvm_multicell::current_, comp};
+                handle = fvm_multicell::current_.data()+comp;
                 break;
             default:
                 throw std::logic_error("unrecognized probeKind");
@@ -892,14 +927,13 @@ void fvm_multicell<Backend>::reset() {
     tfinal_ = 0;
     dt_max_ = 0;
     min_remaining_steps_ = 0;
-    staged_events_.clear();
-    events_->clear();
+    events_.clear();
+    sample_events_.clear();
 
     EXPECTS(integration_complete());
     EXPECTS(!has_pending_events());
 }
 
-
 template <typename Backend>
 void fvm_multicell<Backend>::step_integration() {
     EXPECTS(!integration_complete());
@@ -908,27 +942,33 @@ void fvm_multicell<Backend>::step_integration() {
     memory::fill(current_, 0.);
 
     // mark pending events for delivery
-    events_->mark_until_after(time_);
+    events_.mark_until_after(time_);
 
     // deliver pending events and update current contributions from mechanisms
-    for(auto& m: mechanisms_) {
+    for (auto& m: mechanisms_) {
         PE(m->name().c_str());
-        m->deliver_events(*events_);
+        m->deliver_events(events_.marked_events());
         m->nrn_current();
         PL();
     }
 
     // remove delivered events from queue and set time_to_
-    events_->drop_marked_events();
+    events_.drop_marked_events();
 
     backend::update_time_to(time_to_, time_, dt_max_, tfinal_);
     invalidate_time_cache();
-    events_->event_time_if_before(time_to_);
+    events_.event_time_if_before(time_to_);
     PL();
 
     // set per-cell and per-compartment dt (constant within a cell)
     backend::set_dt(dt_cell_, dt_comp_, time_to_, time_, cv_to_cell_);
 
+    // take samples if they lie within the integration step; they will be provided
+    // with the values (post-event delivery) at the beginning of the interval.
+    sample_events_.mark_until(time_to_);
+    backend::take_samples(sample_events_.marked_events(), time_, sample_time_, sample_value_);
+    sample_events_.drop_marked_events();
+
     // solve the linear system
     PE("matrix", "setup");
     matrix_.assemble(dt_cell_, voltage_, current_);
diff --git a/src/generic_event.hpp b/src/generic_event.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..f405261663cebb2a71268786439c04c3725c7bca
--- /dev/null
+++ b/src/generic_event.hpp
@@ -0,0 +1,87 @@
+#pragma once
+
+// Generic accessors for event types used in `event_queue` and
+// `multi_event_stream`.
+//
+// 1. event_time(const Event&):
+//
+//    Returns ordered type (typically `time_type`) representing
+//    the event time. Default implementation returns `e.time`
+//    for an event `e`.
+//
+// 2. event_index(const Event&):
+//
+//    Returns the stream index associated with the event (an
+//    unsigned index type), for use with `multi_event_stream`.
+//    Default implementation returns `e.index` for an event `e`.
+//
+// 3. event_data(const Event&):
+//
+//    Returns the event _payload_, viz. the event data that
+//    does not include (necessarily) the time or index. This
+//    is used with `multi_event_stream`.
+//    Default implementation returns `e.data` for an event `e`.
+//
+// The type aliases event_time_type<Event> and event_index_type<Event>
+// give the corresponding return types.
+//
+// The accessors act as customization points, in that they can be
+// specialized for a particular event class. In order for ADL
+// to work correctly across namespaces, the accessor functions
+// should be brought into scope with a `using` declaration.
+//
+// Example use:
+//
+// template <typename Event>
+// bool is_before(const Event& a, const Event& b) {
+//     using ::nest::mc::event_time;
+//     return event_time(a)<event_time(b);
+// }
+
+namespace nest {
+namespace mc {
+
+template <typename Event>
+auto event_time(const Event& ev) -> decltype(ev.time) {
+    return ev.time;
+}
+
+template <typename Event>
+auto event_index(const Event& ev) -> decltype(ev.index) {
+    return ev.index;
+}
+
+template <typename Event>
+auto event_data(const Event& ev) -> decltype(ev.data) {
+    return ev.data;
+}
+
+namespace impl {
+    // Wrap in `impl::` namespace to obtain correct ADL for return type.
+
+    using ::nest::mc::event_time;
+    using ::nest::mc::event_index;
+    using ::nest::mc::event_data;
+
+    template <typename Event>
+    using event_time_type = decltype(event_time(std::declval<Event>()));
+
+    template <typename Event>
+    using event_index_type = decltype(event_index(std::declval<Event>()));
+
+    template <typename Event>
+    using event_data_type = decltype(event_data(std::declval<Event>()));
+}
+
+template <typename Event>
+using event_time_type = impl::event_time_type<Event>;
+
+template <typename Event>
+using event_index_type = impl::event_index_type<Event>;
+
+template <typename Event>
+using event_data_type = impl::event_data_type<Event>;
+
+} // namespace mc
+} // namespace nest
+
diff --git a/src/mc_cell_group.hpp b/src/mc_cell_group.hpp
index 5fa97cc2e6632ab1069ea0c0a3154e0c1d7a688d..80793199762c2a53392545ee673e3f6b3aef7425 100644
--- a/src/mc_cell_group.hpp
+++ b/src/mc_cell_group.hpp
@@ -7,6 +7,7 @@
 #include <vector>
 
 #include <algorithms.hpp>
+#include <backends/event.hpp>
 #include <cell_group.hpp>
 #include <cell.hpp>
 #include <common_types.hpp>
@@ -82,107 +83,132 @@ public:
         binner_ = event_binner(policy, bin_interval);
     }
 
+
     void advance(time_type tfinal, time_type dt) override {
+        PE("advance");
         EXPECTS(lowered_.state_synchronized());
         time_type tstart = lowered_.min_time();
 
-        // Bin pending events and enqueue on lowered state.
-        time_type ev_min_time = lowered_.max_time(); // (but we're synchronized here)
+        PE("event-setup");
+        // Bin pending events and batch for lowered cell.
+        std::vector<deliverable_event> staged_events;
+        staged_events.reserve(events_.size());
+
         while (auto ev = events_.pop_if_before(tfinal)) {
-            auto handle = get_target_handle(ev->target);
-            auto binned_ev_time = binner_.bin(ev->target.gid, ev->time, ev_min_time);
-            lowered_.add_event(binned_ev_time, handle, ev->weight);
+            staged_events.emplace_back(
+                binner_.bin(ev->target.gid, ev->time, tstart),
+                get_target_handle(ev->target),
+                ev->weight);
         }
+        PL();
 
-        lowered_.setup_integration(tfinal, dt);
-
-        // Set up sample event queue.
-        struct sampler_entry {
-            typename lowered_cell_type::probe_handle handle;
-            probe_tag tag;
-            cell_member_type probe_id;
+        // Create sample events and delivery information.
+        //
+        // For each (schedule, sampler, probe set) in the sampler association
+        // map that will be triggered in this integration interval, create
+        // sample events for the lowered cell, one for each scheduled sample
+        // time and probe in the probe set.
+        //
+        // Each event is associated with an offset into the sample data and
+        // time buffers; these are assigned contiguously such that one call to
+        // a sampler callback can be represented by a `sampler_call_info`
+        // value as defined below, grouping together all the samples of the
+        // same probe for this callback in this association.
+
+        struct sampler_call_info {
             sampler_function sampler;
+            cell_member_type probe_id;
+            probe_tag tag;
+
+            // Offsets are into lowered cell sample time and event arrays.
+            sample_size_type begin_offset;
+            sample_size_type end_offset;
         };
-        std::vector<sampler_entry> samplers;
 
-        for (auto &assoc: sampler_map_) {
-            auto ts = assoc.sched.events(tstart, tfinal);
-            if (ts.empty()) {
+        PE("sample-event-setup");
+        std::vector<sampler_call_info> call_info;
+
+        std::vector<sample_event> sample_events;
+        sample_size_type n_samples = 0;
+        sample_size_type max_samples_per_call = 0;
+
+        for (auto& sa: sampler_map_) {
+            auto sample_times = sa.sched.events(tstart, tfinal);
+            if (sample_times.empty()) {
                 continue;
             }
 
-            for (auto p: assoc.probe_ids) {
-                EXPECTS(probe_map_.count(p)>0);
-                sample_event::size_type idx = samplers.size();
-                auto pinfo = probe_map_[p];
+            sample_size_type n_times = sample_times.size();
+            max_samples_per_call = std::max(max_samples_per_call, n_times);
 
-                samplers.push_back({pinfo.handle, pinfo.tag, p, assoc.sampler});
+            for (cell_member_type pid: sa.probe_ids) {
+                auto cell_index = gid_to_index(pid.gid);
+                auto p = probe_map_[pid];
 
-                for (auto t: ts) {
-                    sample_events_.push({idx, t});
-                }
-            }
-        }
+                call_info.push_back({sa.sampler, pid, p.tag, n_samples, n_samples+n_times});
 
-        util::optional<time_type> first_sample_time = sample_events_.time_if_before(tfinal);
-        std::vector<sample_event> requeue_sample_events;
-        while (!lowered_.integration_complete()) {
-            // Take any pending samples.
-            // TODO: Placeholder: this will be replaced by a backend polling implementation.
-
-            if (first_sample_time) {
-                PE("sampling");
-                time_type cell_max_time = lowered_.max_time();
-
-                requeue_sample_events.clear();
-                while (auto m = sample_events_.pop_if_before(cell_max_time)) {
-                    auto& s = samplers[m->sampler_index];
-                    EXPECTS((bool)s.sampler);
-
-                    time_type cell_time = lowered_.time(gid_to_index(s.probe_id.gid));
-                    if (cell_time<m->time) {
-                        // This cell hasn't reached this sample time yet.
-                        requeue_sample_events.push_back(*m);
-                    }
-                    else {
-                        const double value = lowered_.probe(s.handle);
-                        sample_record record = {cell_time, &value};
-                        s.sampler(s.probe_id, s.tag, 1u, &record);
-                    }
+                for (auto t: sample_times) {
+                    sample_event ev{t, cell_index, {p.handle, n_samples++}};
+                    sample_events.push_back(ev);
                 }
-                for (auto& ev: requeue_sample_events) {
-                    sample_events_.push(std::move(ev));
-                }
-                first_sample_time = sample_events_.time_if_before(tfinal);
-                PL();
             }
+        }
 
-            // Ask lowered_ cell to integrate 'one step', delivering any
-            // events accordingly.
-            // TODO: Placeholder: with backend polling for samplers, we will
-            // request that the lowered cell perform the integration all the
-            // way to tfinal.
+        // Sample events must be ordered by time for the lowered cell.
+        util::sort_by(sample_events, [](const sample_event& ev) { return event_time(ev); });
+        PL();
 
+        // Run integration.
+        lowered_.setup_integration(tfinal, dt, std::move(staged_events), std::move(sample_events));
+        PE("integrator-steps");
+        while (!lowered_.integration_complete()) {
             lowered_.step_integration();
-
             if (util::is_debug_mode() && !lowered_.is_physical_solution()) {
                 std::cerr << "warning: solution out of bounds  at (max) t "
                           << lowered_.max_time() << " ms\n";
             }
         }
+        PL();
+
+
+        // For each sampler callback registered in `call_info`, construct the
+        // vector of sample entries from the lowered cell sample times and values
+        // and then call the callback.
+
+        PE("sample-deliver");
+        std::vector<sample_record> sample_records;
+        sample_records.reserve(max_samples_per_call);
+
+        auto sample_time = lowered_.sample_time();
+        auto sample_value = lowered_.sample_value();
+
+        for (auto& sc: call_info) {
+            sample_records.clear();
+            for (auto i = sc.begin_offset; i!=sc.end_offset; ++i) {
+                sample_records.push_back(sample_record{time_type(sample_time[i]), &sample_value[i]});
+            }
+
+            sc.sampler(sc.probe_id, sc.tag, sc.end_offset-sc.begin_offset, sample_records.data());
+        }
+        PL();
 
         // Copy out spike voltage threshold crossings from the back end, then
         // generate spikes with global spike source ids. The threshold crossings
         // record the local spike source index, which must be converted to a
         // global index for spike communication.
-        PE("events");
+
+        PE("spike-retrieve");
         for (auto c: lowered_.get_spikes()) {
             spikes_.push_back({spike_sources_[c.index], time_type(c.time)});
         }
+
         // Now that the spikes have been generated, clear the old crossings
         // to get ready to record spikes from the next integration period.
+
         lowered_.clear_spikes();
         PL();
+
+        PL();
     }
 
     void enqueue_events(const std::vector<postsynaptic_spike_event>& events) override {
diff --git a/src/mechanism.hpp b/src/mechanism.hpp
index b0bc86e6b7f81d973f452dffa5f33a6781466746..b900057395feac95c000314f2ba81a8c213517a7 100644
--- a/src/mechanism.hpp
+++ b/src/mechanism.hpp
@@ -4,6 +4,8 @@
 #include <memory>
 #include <string>
 
+#include <backends/event.hpp>
+#include <backends/multi_event_stream_state.hpp>
 #include <ion.hpp>
 #include <parameter_list.hpp>
 #include <util/indirect.hpp>
@@ -39,7 +41,7 @@ public:
 
     using ion_type = ion<backend>;
 
-    using multi_event_stream = typename backend::multi_event_stream;
+    using deliverable_event_stream_state = multi_event_stream_state<deliverable_event_data>;
 
     mechanism(size_type mech_id, const_iview vec_ci, const_view vec_t, const_view vec_t_to, const_view vec_dt, view vec_v, view vec_i, iarray&& node_index):
         mech_id_(mech_id),
@@ -66,7 +68,7 @@ public:
     virtual void nrn_init()     = 0;
     virtual void nrn_state()    = 0;
     virtual void nrn_current()  = 0;
-    virtual void deliver_events(multi_event_stream& events) {};
+    virtual void deliver_events(const deliverable_event_stream_state& events) {};
     virtual bool uses_ion(ionKind) const = 0;
     virtual void set_ion(ionKind k, ion_type& i, const std::vector<size_type>& index) = 0;
     virtual mechanismKind kind() const = 0;
diff --git a/src/util/filter.hpp b/src/util/filter.hpp
index 25041e043e956560022eb21ca32e4190b2edf279..b28be5228266189763a709342b84cc1fa284e28e 100644
--- a/src/util/filter.hpp
+++ b/src/util/filter.hpp
@@ -110,6 +110,7 @@ public:
             inner_ = std::move(other.inner_);
             end_ = std::move(other.end_);
             ok_ = other.ok_;
+            f_.destruct();
             f_.construct(std::move(other.f_.ref()));
         }
         return *this;
diff --git a/src/util/rangeutil.hpp b/src/util/rangeutil.hpp
index b5d4d040ebbc848e87f1b44d0926ca85602a5594..e7ee41eda2add841410c48f6e315da2391e918b3 100644
--- a/src/util/rangeutil.hpp
+++ b/src/util/rangeutil.hpp
@@ -215,7 +215,6 @@ bool any_of(const Seq& seq, const Predicate& pred) {
     return std::any_of(std::begin(canon), std::end(canon), pred);
 }
 
-
 // Accumulate by projection `proj`
 
 template <
@@ -320,6 +319,53 @@ std::pair<Value, Value> minmax_value(const Seq& seq, Compare cmp = Compare{}) {
 template <typename Map>
 auto keys(Map& m) DEDUCED_RETURN_TYPE(util::transform_view(m, util::first));
 
+// Test if sequence is sorted after apply projection `proj` to elements.
+// (TODO: this will perform unnecessary copies if `proj` returns a reference;
+// specialize on this if it becomes an issue.)
+
+template <
+    typename Seq,
+    typename Proj,
+    typename Compare = std::less<typename std::result_of<Proj (typename sequence_traits<Seq>::value_type)>::type>
+>
+bool is_sorted_by(const Seq& seq, const Proj& proj, Compare cmp = Compare{}) {
+    auto i = std::begin(seq);
+    auto e = std::end(seq);
+
+    if (i==e) {
+        return true;
+    }
+
+    // Special one-element case for forward iterators.
+    if (is_forward_iterator<decltype(i)>::value) {
+        auto j = i;
+        if (++j==e) {
+            return true;
+        }
+    }
+
+    auto v = proj(*i++);
+
+    for (;;) {
+        if (i==e) {
+            return true;
+        }
+        auto u = proj(*i++);
+
+        if (cmp(u, v)) {
+            return false;
+        }
+
+        if (i==e) {
+            return true;
+        }
+        v = proj(*i++);
+
+        if (cmp(v, u)) {
+            return false;
+        }
+    }
+}
 
 template <typename C, typename Seq>
 C make_copy(Seq const& seq) {
diff --git a/tests/unit/test_fvm_multi.cpp b/tests/unit/test_fvm_multi.cpp
index 78057b856024884a3576f70a2c369a4c9dc2a3c6..97b9f2fa631eb067e0fa83be14f02801b548405d 100644
--- a/tests/unit/test_fvm_multi.cpp
+++ b/tests/unit/test_fvm_multi.cpp
@@ -83,7 +83,7 @@ TEST(fvm_multi, init)
 
     // test that the matrix is initialized with sensible values
     //J.build_matrix(0.01);
-    fvcell.setup_integration(0.01, 0.01);
+    fvcell.setup_integration(0.01, 0.01, {}, {});
     fvcell.step_integration();
 
     auto& mat = J.state_;
@@ -397,7 +397,7 @@ void run_target_handle_test(std::vector<handle_info> all_handles) {
             const auto& mech = fvcell.mechanisms()[targets[i].mech_id];
             const auto& cvidx = mech->node_index();
             EXPECT_EQ(h.mech, mech->name());
-            EXPECT_EQ(h.cv, cvidx[targets[i].index]);
+            EXPECT_EQ(h.cv, cvidx[targets[i].mech_index]);
             EXPECT_EQ(h.cell, targets[i].cell_index);
             ++i;
         }
diff --git a/tests/unit/test_mc_cell_group.cu b/tests/unit/test_mc_cell_group.cu
index 1abdbda3c908cb8cca22c2371ea8490b7d04d4fd..0bf73d492addf3503191acb956be9538179910a4 100644
--- a/tests/unit/test_mc_cell_group.cu
+++ b/tests/unit/test_mc_cell_group.cu
@@ -10,14 +10,10 @@
 #include "../simple_recipes.hpp"
 
 using namespace nest::mc;
+using fvm_cell = fvm::fvm_multicell<nest::mc::gpu::backend>;
 
-using fvm_cell =
-    fvm::fvm_multicell<nest::mc::gpu::backend>;
-
-nest::mc::cell make_cell() {
-    using namespace nest::mc;
-
-    cell c = make_cell_ball_and_stick();
+cell make_cell() {
+    auto c = make_cell_ball_and_stick();
 
     c.add_detector({0, 0}, 0);
     c.segment(1)->set_compartments(101);
@@ -25,7 +21,7 @@ nest::mc::cell make_cell() {
     return c;
 }
 
-TEST(cell_group, test)
+TEST(mc_cell_group, test)
 {
     mc_cell_group<fvm_cell> group({0u}, cable1d_recipe(make_cell()));
 
diff --git a/tests/unit/test_multi_event_stream.cpp b/tests/unit/test_multi_event_stream.cpp
index 754b9f7071fb9d7a6a28ecce9e101627689626ed..7b3c8738b8f171bc801871118f5668b54d33be17 100644
--- a/tests/unit/test_multi_event_stream.cpp
+++ b/tests/unit/test_multi_event_stream.cpp
@@ -3,6 +3,7 @@
 
 #include <backends/event.hpp>
 #include <backends/multicore/multi_event_stream.hpp>
+#include <util/rangeutil.hpp>
 
 using namespace nest::mc;
 
@@ -35,13 +36,22 @@ namespace common_events {
     };
 }
 
+namespace {
+    // convenience wrapper around marked_events:
+    template <typename MultiEventStream>
+    auto marked_range(const MultiEventStream& m, unsigned i)
+        DEDUCED_RETURN_TYPE(util::make_range(m.marked_events().begin_marked(i), m.marked_events().end_marked(i)))
+}
+
 TEST(multi_event_stream, init) {
-    using multi_event_stream = multicore::multi_event_stream;
+    using multi_event_stream = multicore::multi_event_stream<deliverable_event>;
     using namespace common_events;
 
     multi_event_stream m(n_cell);
     EXPECT_EQ(n_cell, m.n_streams());
 
+    auto events = common_events::events;
+    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
     EXPECT_FALSE(m.empty());
 
@@ -50,16 +60,18 @@ TEST(multi_event_stream, init) {
 }
 
 TEST(multi_event_stream, mark) {
-    using multi_event_stream = multicore::multi_event_stream;
+    using multi_event_stream = multicore::multi_event_stream<deliverable_event>;
     using namespace common_events;
 
     multi_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
+    auto events = common_events::events;
+    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
 
     for (cell_size_type i = 0; i<n_cell; ++i) {
-        EXPECT_TRUE(m.marked_events(i).empty());
+        EXPECT_TRUE(marked_range(m, i).empty());
     }
 
     std::vector<time_type> t_until(n_cell);
@@ -73,18 +85,18 @@ TEST(multi_event_stream, mark) {
     //     events[2] (with handle 3) at t=3.f on cell_3
 
     for (cell_size_type i = 0; i<n_cell; ++i) {
-        auto evs = m.marked_events(i);
+        auto evs = marked_range(m, i);
         auto n_marked = evs.size();
         switch (i) {
         case cell_2:
             EXPECT_EQ(1u, n_marked);
-            EXPECT_EQ(handle[1].mech_id, evs.front().handle.mech_id);
-            EXPECT_EQ(handle[1].index, evs.front().handle.index);
+            EXPECT_EQ(handle[1].mech_id, evs.front().mech_id);
+            EXPECT_EQ(handle[1].mech_index, evs.front().mech_index);
             break;
         case cell_3:
             EXPECT_EQ(1u, n_marked);
-            EXPECT_EQ(handle[3].mech_id, evs.front().handle.mech_id);
-            EXPECT_EQ(handle[3].index, evs.front().handle.index);
+            EXPECT_EQ(handle[3].mech_id, evs.front().mech_id);
+            EXPECT_EQ(handle[3].mech_index, evs.front().mech_index);
             break;
         default:
             EXPECT_EQ(0u, n_marked);
@@ -92,7 +104,7 @@ TEST(multi_event_stream, mark) {
         }
     }
 
-    // Drop these events and mark all events up to t=5.f.
+    // Drop these events and mark all events up to and including t=5.f.
     //     cell_1 should have one marked event (events[1], handle 0)
     //     cell_2 should have one marked event (events[3], handle 2)
 
@@ -101,18 +113,18 @@ TEST(multi_event_stream, mark) {
     m.mark_until_after(t_until);
 
     for (cell_size_type i = 0; i<n_cell; ++i) {
-        auto evs = m.marked_events(i);
+        auto evs = marked_range(m, i);
         auto n_marked = evs.size();
         switch (i) {
         case cell_1:
             EXPECT_EQ(1u, n_marked);
-            EXPECT_EQ(handle[0].mech_id, evs.front().handle.mech_id);
-            EXPECT_EQ(handle[0].index, evs.front().handle.index);
+            EXPECT_EQ(handle[0].mech_id, evs.front().mech_id);
+            EXPECT_EQ(handle[0].mech_index, evs.front().mech_index);
             break;
         case cell_2:
             EXPECT_EQ(1u, n_marked);
-            EXPECT_EQ(handle[2].mech_id, evs.front().handle.mech_id);
-            EXPECT_EQ(handle[2].index, evs.front().handle.index);
+            EXPECT_EQ(handle[2].mech_id, evs.front().mech_id);
+            EXPECT_EQ(handle[2].mech_index, evs.front().mech_index);
             break;
         default:
             EXPECT_EQ(0u, n_marked);
@@ -124,15 +136,49 @@ TEST(multi_event_stream, mark) {
     EXPECT_FALSE(m.empty());
     m.drop_marked_events();
     EXPECT_TRUE(m.empty());
+
+    // Confirm different semantics of `mark_until`.
+
+    m.init(events);
+    t_until[cell_1] = 3.1f;
+    t_until[cell_2] = 1.9f;
+    t_until[cell_3] = 3.f;
+    m.mark_until(t_until);
+
+    // Only one event should be marked: 
+    //     events[1] (with handle 0) at t=3.f on cell_1
+    //
+    // events[2] at 3.f on cell_3 should not be marked (3.f not less than 3.f)
+    // events[0] at 2.f on cell_2 should not be marked (2.f not less than 1.9f)
+    //     events[2] (with handle 3) at t=3.f on cell_3
+    //     events[0] (with handle 1) at t=2.f on cell_2 should _not_ be marked.
+
+    for (cell_size_type i = 0; i<n_cell; ++i) {
+        auto evs = marked_range(m, i);
+        auto n_marked = evs.size();
+        switch (i) {
+        case cell_1:
+            EXPECT_EQ(1u, n_marked);
+            EXPECT_EQ(handle[0].mech_id, evs.front().mech_id);
+            EXPECT_EQ(handle[0].mech_index, evs.front().mech_index);
+            break;
+        default:
+            EXPECT_EQ(0u, n_marked);
+            break;
+        }
+    }
+
 }
 
 TEST(multi_event_stream, time_if_before) {
-    using multi_event_stream = multicore::multi_event_stream;
+    using multi_event_stream = multicore::multi_event_stream<deliverable_event>;
     using namespace common_events;
 
     multi_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
+    auto events = common_events::events;
+    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
 
     // Test times less than all event times (first event at t=2).
diff --git a/tests/unit/test_multi_event_stream.cu b/tests/unit/test_multi_event_stream.cu
index a6d5d3a5a5ec440b9f16a6fcd5efeff6f939eaa7..cb1f83e7fc5ffb5d0f3fb9fa1d308d2d8c727507 100644
--- a/tests/unit/test_multi_event_stream.cu
+++ b/tests/unit/test_multi_event_stream.cu
@@ -13,6 +13,8 @@
 
 using namespace nest::mc;
 
+using deliverable_event_stream = gpu::multi_event_stream<deliverable_event>;
+
 namespace common_events {
     // set up four targets across three streams and two mech ids.
 
@@ -43,12 +45,13 @@ namespace common_events {
 }
 
 TEST(multi_event_stream, init) {
-    using multi_event_stream = gpu::multi_event_stream;
     using namespace common_events;
 
-    multi_event_stream m(n_cell);
+    deliverable_event_stream m(n_cell);
     EXPECT_EQ(n_cell, m.n_streams());
 
+    auto events = common_events::events;
+    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
     EXPECT_FALSE(m.empty());
 
@@ -56,17 +59,11 @@ TEST(multi_event_stream, init) {
     EXPECT_TRUE(m.empty());
 }
 
-struct ev_info {
-    unsigned mech_id;
-    unsigned index;
-    double weight;
-};
-
 __global__
 void copy_marked_events_kernel(
     unsigned ci,
-    gpu::multi_event_stream::span_state state,
-    ev_info* store,
+    deliverable_event_stream::state state,
+    deliverable_event_data* store,
     unsigned& count,
     unsigned max_ev)
 {
@@ -74,32 +71,35 @@ void copy_marked_events_kernel(
     if (threadIdx.x || blockIdx.x) return;
 
     unsigned k = 0;
-    for (auto j = state.span_begin[ci]; j<state.mark[ci]; ++j) {
+    auto begin = state.ev_data+state.begin_offset[ci];
+    auto end = state.ev_data+state.end_offset[ci];
+    for (auto p = begin; p<end; ++p) {
         if (k>=max_ev) break;
-        store[k++] = {state.ev_mech_id[j], state.ev_index[j], state.ev_weight[j]};
+        store[k++] = *p;
     }
     count = k;
 }
 
-std::vector<ev_info> copy_marked_events(int ci, gpu::multi_event_stream& m) {
+std::vector<deliverable_event_data> copy_marked_events(int ci, deliverable_event_stream& m) {
     unsigned max_ev = 1000;
-    memory::device_vector<ev_info> store(max_ev);
+    memory::device_vector<deliverable_event_data> store(max_ev);
     memory::device_vector<unsigned> counter(1);
 
-    copy_marked_events_kernel<<<1,1>>>(ci, m.delivery_data(), store.data(), *counter.data(), max_ev);
+    copy_marked_events_kernel<<<1,1>>>(ci, m.marked_events(), store.data(), *counter.data(), max_ev);
     unsigned n_ev = counter[0];
-    std::vector<ev_info> ev(n_ev);
+    std::vector<deliverable_event_data> ev(n_ev);
     memory::copy(store(0, n_ev), ev);
     return ev;
 }
 
 TEST(multi_event_stream, mark) {
-    using multi_event_stream = gpu::multi_event_stream;
     using namespace common_events;
 
-    multi_event_stream m(n_cell);
+    deliverable_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
+    auto events = common_events::events;
+    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
 
     for (cell_size_type i = 0; i<n_cell; ++i) {
@@ -124,12 +124,12 @@ TEST(multi_event_stream, mark) {
         case cell_2:
             ASSERT_EQ(1u, n_marked);
             EXPECT_EQ(handle[1].mech_id, evs.front().mech_id);
-            EXPECT_EQ(handle[1].index, evs.front().index);
+            EXPECT_EQ(handle[1].mech_index, evs.front().mech_index);
             break;
         case cell_3:
             ASSERT_EQ(1u, n_marked);
             EXPECT_EQ(handle[3].mech_id, evs.front().mech_id);
-            EXPECT_EQ(handle[3].index, evs.front().index);
+            EXPECT_EQ(handle[3].mech_index, evs.front().mech_index);
             break;
         default:
             EXPECT_EQ(0u, n_marked);
@@ -152,12 +152,12 @@ TEST(multi_event_stream, mark) {
         case cell_1:
             ASSERT_EQ(1u, n_marked);
             EXPECT_EQ(handle[0].mech_id, evs.front().mech_id);
-            EXPECT_EQ(handle[0].index, evs.front().index);
+            EXPECT_EQ(handle[0].mech_index, evs.front().mech_index);
             break;
         case cell_2:
             ASSERT_EQ(1u, n_marked);
             EXPECT_EQ(handle[2].mech_id, evs.front().mech_id);
-            EXPECT_EQ(handle[2].index, evs.front().index);
+            EXPECT_EQ(handle[2].mech_index, evs.front().mech_index);
             break;
         default:
             EXPECT_EQ(0u, n_marked);
@@ -169,15 +169,47 @@ TEST(multi_event_stream, mark) {
     EXPECT_FALSE(m.empty());
     m.drop_marked_events();
     EXPECT_TRUE(m.empty());
+
+    // Confirm different semantics of `mark_until`.
+
+    m.init(events);
+    t_until[cell_1] = 3.1f;
+    t_until[cell_2] = 1.9f;
+    t_until[cell_3] = 3.f;
+    m.mark_until(t_until);
+
+    // Only one event should be marked: 
+    //     events[1] (with handle 0) at t=3.f on cell_1
+    //
+    // events[2] at 3.f on cell_3 should not be marked (3.f not less than 3.f)
+    // events[0] at 2.f on cell_2 should not be marked (2.f not less than 1.9f)
+    //     events[2] (with handle 3) at t=3.f on cell_3
+    //     events[0] (with handle 1) at t=2.f on cell_2 should _not_ be marked.
+
+    for (cell_size_type i = 0; i<n_cell; ++i) {
+        auto evs = copy_marked_events(i, m);
+        auto n_marked = evs.size();
+        switch (i) {
+        case cell_1:
+            EXPECT_EQ(1u, n_marked);
+            EXPECT_EQ(handle[0].mech_id, evs.front().mech_id);
+            EXPECT_EQ(handle[0].mech_index, evs.front().mech_index);
+            break;
+        default:
+            EXPECT_EQ(0u, n_marked);
+            break;
+        }
+    }
 }
 
 TEST(multi_event_stream, time_if_before) {
-    using multi_event_stream = gpu::multi_event_stream;
     using namespace common_events;
 
-    multi_event_stream m(n_cell);
+    deliverable_event_stream m(n_cell);
     ASSERT_EQ(n_cell, m.n_streams());
 
+    auto events = common_events::events;
+    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
     m.init(events);
 
     // Test times less than all event times (first event at t=2).
diff --git a/tests/unit/test_probe.cpp b/tests/unit/test_probe.cpp
index 1caa2a53b0bbb6a52d9de8f7cb53e07a5ee04431..281304f61b9d859b5b1661c929cf85b2758221a0 100644
--- a/tests/unit/test_probe.cpp
+++ b/tests/unit/test_probe.cpp
@@ -24,7 +24,7 @@ TEST(probe, fvm_multicell)
 
     segment_location loc0{0, 0};
     segment_location loc1{1, 1};
-    segment_location loc2{1, 0.7};
+    segment_location loc2{1, 0.3};
 
     rec.add_probe(0, 10, cell_probe_address{loc0, cell_probe_address::membrane_voltage});
     rec.add_probe(0, 20, cell_probe_address{loc1, cell_probe_address::membrane_voltage});
@@ -47,19 +47,33 @@ TEST(probe, fvm_multicell)
     fvm_cell::probe_handle p1 = probe_map.at({0, 1}).handle;
     fvm_cell::probe_handle p2 = probe_map.at({0, 2}).handle;
 
-    // Know from implementation that probe_handle.second
-    // is a compartment index: expect probe values and
-    // direct queries of voltage and current to be equal in fvm_cell.
+    // Expect initial probe values to be the resting potential
+    // for the voltage probes (cell membrane potential should
+    // be constant), and zero for the current probe.
 
-    EXPECT_EQ(lcell.voltage()[p0.second], lcell.probe(p0));
-    EXPECT_EQ(lcell.voltage()[p1.second], lcell.probe(p1));
-    EXPECT_EQ(lcell.current()[p2.second], lcell.probe(p2));
+    auto resting = lcell.voltage()[0];
+    EXPECT_NE(0.0, resting);
 
-    lcell.setup_integration(0.05, 0.05);
+    EXPECT_EQ(resting, lcell.probe(p0));
+    EXPECT_EQ(resting, lcell.probe(p1));
+    EXPECT_EQ(0.0, lcell.probe(p2));
+
+    // After an integration step, expect voltage probe values
+    // to differ from resting, and between each other, and
+    // for there to be a non-zero current.
+    //
+    // First probe, at (0,0), should match voltage in first
+    // compartment.
+
+    lcell.setup_integration(0.1, 0.0025, {}, {});
     lcell.step_integration();
+    lcell.step_integration();
+
+    EXPECT_NE(resting, lcell.probe(p0));
+    EXPECT_NE(resting, lcell.probe(p1));
+    EXPECT_NE(lcell.probe(p0), lcell.probe(p1));
+    EXPECT_NE(0.0, lcell.probe(p2));
 
-    EXPECT_EQ(lcell.voltage()[p0.second], lcell.probe(p0));
-    EXPECT_EQ(lcell.voltage()[p1.second], lcell.probe(p1));
-    EXPECT_EQ(lcell.current()[p2.second], lcell.probe(p2));
+    EXPECT_EQ(lcell.voltage()[0], lcell.probe(p0));
 }
 
diff --git a/tests/unit/test_range.cpp b/tests/unit/test_range.cpp
index 232862e24946f3a2d5b98b88e8086c7c607acb93..bdec7c3e9afbcddb1d44ff0919e503b9cf36313e 100644
--- a/tests/unit/test_range.cpp
+++ b/tests/unit/test_range.cpp
@@ -2,9 +2,10 @@
 
 #include <algorithm>
 #include <iterator>
-#include <sstream>
+#include <functional>
 #include <list>
 #include <numeric>
+#include <sstream>
 #include <type_traits>
 #include <unordered_map>
 
@@ -24,6 +25,7 @@
 using namespace nest::mc;
 using testing::null_terminated;
 using testing::nocopy;
+using testing::nomove;
 
 TEST(range, list_iterator) {
     std::list<int> l = { 2, 4, 6, 8, 10 };
@@ -516,6 +518,88 @@ TEST(range, keys) {
     }
 }
 
+TEST(range, is_sorted_by) {
+    // Use `nomove` wrapper to count potential copies: implementation aims to
+    // minimize copies of projection return value, and invocations of the projection function.
+
+    struct cmp_nomove_ {
+        bool operator()(const nomove<int>& a, const nomove<int>& b) const {
+            return a.value<b.value;
+        }
+    } cmp_nomove;
+
+    int invocations = 0;
+    auto copies = []() { return nomove<int>::copy_ctor_count+nomove<int>::copy_assign_count; };
+    auto reset = [&]() { invocations = 0; nomove<int>::reset_counts(); };
+
+    auto id_copy = [&](const nomove<int>& x) -> const nomove<int>& { return ++invocations, x; };
+    auto get_value = [&](const nomove<int>& x) { return ++invocations, x.value; };
+
+    // 1. sorted non-empty vector
+
+    std::vector<nomove<int>> v_sorted = {10, 13, 13, 15, 16};
+    int n = v_sorted.size();
+
+    reset();
+    EXPECT_TRUE(util::is_sorted_by(v_sorted, get_value));
+    EXPECT_EQ(0, copies());
+    EXPECT_EQ(n, invocations);
+
+    reset();
+    EXPECT_TRUE(util::is_sorted_by(v_sorted, id_copy, cmp_nomove));
+    EXPECT_EQ(n, copies());
+    EXPECT_EQ(n, invocations);
+
+    // 2. empty vector
+
+    std::vector<nomove<int>> v_empty;
+
+    reset();
+    EXPECT_TRUE(util::is_sorted_by(v_empty, id_copy, cmp_nomove));
+    EXPECT_EQ(0, copies());
+    EXPECT_EQ(0, invocations);
+
+    // 3. one-element vector
+
+    std::vector<nomove<int>> v_single = {-44};
+
+    reset();
+    EXPECT_TRUE(util::is_sorted_by(v_single, id_copy, cmp_nomove));
+    EXPECT_EQ(0, copies());
+    EXPECT_EQ(0, invocations);
+
+    // 4. unsorted vectors at second, third, fourth elements.
+
+    std::vector<nomove<int>> v_unsorted_2 = {2, 1, 3, 4};
+
+    reset();
+    EXPECT_FALSE(util::is_sorted_by(v_unsorted_2, id_copy, cmp_nomove));
+    EXPECT_EQ(2, copies());
+    EXPECT_EQ(2, invocations);
+
+    std::vector<nomove<int>> v_unsorted_3 = {2, 3, 1, 4};
+
+    reset();
+    EXPECT_FALSE(util::is_sorted_by(v_unsorted_3, id_copy, cmp_nomove));
+    EXPECT_EQ(3, copies());
+    EXPECT_EQ(3, invocations);
+
+    std::vector<nomove<int>> v_unsorted_4 = {2, 3, 4, 1};
+
+    reset();
+    EXPECT_FALSE(util::is_sorted_by(v_unsorted_4, id_copy, cmp_nomove));
+    EXPECT_EQ(4, copies());
+    EXPECT_EQ(4, invocations);
+
+    // 5. sequence defined by input (not forward) iterator.
+
+    std::istringstream s_reversed("18 15 13 13 11");
+    auto seq = util::make_range(std::istream_iterator<int>(s_reversed), std::istream_iterator<int>());
+    EXPECT_FALSE(util::is_sorted_by(seq, [](int x) { return x+2; }));
+    EXPECT_TRUE(util::is_sorted_by(seq, [](int x) { return 2-x; }));
+    EXPECT_TRUE(util::is_sorted_by(seq, [](int x) { return x+2; }, std::greater<int>{}));
+}
+
 #ifdef NMC_HAVE_TBB
 
 TEST(range, tbb_split) {