diff --git a/src/backends/gpu/multi_event_stream.hpp b/src/backends/gpu/multi_event_stream.hpp
index 94fa0fd9428757e568500818c617cd15641d863f..c0e94a99d0c232b564e584aab4cfdb1def66bc7e 100644
--- a/src/backends/gpu/multi_event_stream.hpp
+++ b/src/backends/gpu/multi_event_stream.hpp
@@ -9,6 +9,7 @@
 #include <generic_event.hpp>
 #include <memory/array.hpp>
 #include <memory/copy.hpp>
+#include <profiling/profiler.hpp>
 #include <util/rangeutil.hpp>
 
 namespace arb {
@@ -43,8 +44,8 @@ public:
     // Remove marked events from front of each event stream.
     void drop_marked_events();
 
-    // If the head of `i`th event stream exists and has time less than `t_until[i]`, set
-    // `t_until[i]` to the event time.
+    // If the head of `i`th event stream exists and has time less than
+    // `t_until[i]`, set `t_until[i]` to the event time.
     void event_time_if_before(view t_until);
 
 protected:
@@ -58,18 +59,18 @@ protected:
         n_nonempty_stream_(1)
     {}
 
+    // The list of events must be sorted sorted first by index and then by time.
     template <typename Event>
-    void init(std::vector<Event>& staged) {
+    void init(const std::vector<Event>& staged) {
         using ::arb::event_time;
         using ::arb::event_index;
 
+        PE("event-stream");
         if (staged.size()>std::numeric_limits<size_type>::max()) {
             throw std::range_error("too many events");
         }
 
-        // Sort by index (staged events should already be time-sorted).
-        EXPECTS(util::is_sorted_by(staged, [](const Event& ev) { return event_time(ev); }));
-        util::stable_sort_by(staged, [](const Event& ev) { return event_index(ev); });
+        EXPECTS(util::is_sorted_by(staged, [](const Event& ev) { return event_index(ev); }));
 
         std::size_t n_ev = staged.size();
         tmp_ev_time_.clear();
@@ -102,6 +103,7 @@ protected:
         memory::copy(memory::make_view(tmp_divs_)(1,n_stream_+1), span_end_);
         memory::copy(span_begin_, mark_);
         n_nonempty_stream_[0] = n_nonempty;
+        PL();
     }
 
     size_type n_stream_;
@@ -131,8 +133,8 @@ public:
 
     // Initialize event streams from a vector of events, sorted first by index
     // and then by time.
-    void init(std::vector<Event> staged) {
-        multi_event_stream_base::init(staged); // reorders `staged` in place.
+    void init(const std::vector<Event>& staged) {
+        multi_event_stream_base::init(staged);
 
         tmp_ev_data_.clear();
         tmp_ev_data_.reserve(staged.size());
diff --git a/src/cell_group.hpp b/src/cell_group.hpp
index 9abb760bebcbc0fbdccecc77262964e19aab83ac..7e26da0f4af332993c8a70043b5353b9d2b7cc5c 100644
--- a/src/cell_group.hpp
+++ b/src/cell_group.hpp
@@ -1,15 +1,18 @@
 #pragma once
 
+#include <cstdint>
 #include <memory>
 #include <vector>
 
 #include <cell.hpp>
 #include <common_types.hpp>
+#include <epoch.hpp>
 #include <event_binner.hpp>
 #include <event_queue.hpp>
 #include <sampling.hpp>
 #include <schedule.hpp>
 #include <spike.hpp>
+#include <util/rangeutil.hpp>
 
 namespace arb {
 
@@ -21,8 +24,24 @@ public:
 
     virtual void reset() = 0;
     virtual void set_binning_policy(binning_kind policy, time_type bin_interval) = 0;
-    virtual void advance(time_type tfinal, time_type dt) = 0;
-    virtual void enqueue_events(const std::vector<postsynaptic_spike_event>& events) = 0;
+    virtual void advance(epoch epoch, time_type dt) = 0;
+
+    // Pass events to be delivered to targets in the cell group in a future epoch.
+    // events:
+    //    An unsorted vector of post-synaptic events is maintained for each gid
+    //    on the local domain. These event lists are stored in a vector, with one
+    //    entry for each gid. Event lists for a cell group are contiguous in the
+    //    vector, in same order that input gid were provided to the cell_group
+    //    constructor.
+    // tfinal:
+    //    The final time for the current integration epoch. This may be used
+    //    by the cell_group implementation to omptimise event queue wrangling.
+    // epoch:
+    //    The current integration epoch. Events in events are due for delivery
+    //    in epoch+1 and later.
+    virtual void enqueue_events(
+            epoch epoch,
+            util::subrange_view_type<std::vector<std::vector<postsynaptic_spike_event>>> events) = 0;
     virtual const std::vector<spike>& spikes() const = 0;
     virtual void clear_spikes() = 0;
 
diff --git a/src/communication/communicator.hpp b/src/communication/communicator.hpp
index a34da7eb60098e5a12f1ee7a7dd1abd63d60a766..d1813e134baefa54a84d70652bfe2ae530668620 100644
--- a/src/communication/communicator.hpp
+++ b/src/communication/communicator.hpp
@@ -40,11 +40,7 @@ public:
     using communication_policy_type = CommunicationPolicy;
 
     /// per-cell group lists of events to be delivered
-    using event_queue =
-        std::vector<postsynaptic_spike_event>;
-
-    using gid_partition_type =
-        util::partition_range<std::vector<cell_gid_type>::const_iterator>;
+    using event_queue = std::vector<postsynaptic_spike_event>;
 
     communicator() {}
 
@@ -56,11 +52,11 @@ public:
         // For caching information about each cell
         struct gid_info {
             using connection_list = decltype(std::declval<recipe>().connections_on(0));
-            cell_gid_type gid;
-            cell_gid_type local_group;
-            connection_list conns;
-            gid_info(cell_gid_type g, cell_gid_type lg, connection_list c):
-                gid(g), local_group(lg), conns(std::move(c)) {}
+            cell_gid_type gid;              // global identifier of cell
+            cell_size_type index_on_domain; // index of cell in this domain
+            connection_list conns;          // list of connections terminating at this cell
+            gid_info(cell_gid_type g, cell_size_type di, connection_list c):
+                gid(g), index_on_domain(di), conns(std::move(c)) {}
         };
 
         // Make a list of local gid with their group index and connections
@@ -75,12 +71,13 @@ public:
         gid_infos.reserve(dom_dec.num_local_cells);
 
         cell_local_size_type n_cons = 0;
+        cell_local_size_type n_gid = 0;
         std::vector<unsigned> src_domains;
         std::vector<cell_size_type> src_counts(num_domains_);
-        for (auto i: make_span(0, num_local_groups_)) {
-            const auto& group = dom_dec.groups[i];
+        for (auto g: make_span(0, num_local_groups_)) {
+            const auto& group = dom_dec.groups[g];
             for (auto gid: group.gids) {
-                gid_info info(gid, i, rec.connections_on(gid));
+                gid_info info(gid, n_gid, rec.connections_on(gid));
                 n_cons += info.conns.size();
                 for (auto con: info.conns) {
                     const auto src = dom_dec.gid_domain(con.source.gid);
@@ -88,9 +85,12 @@ public:
                     src_counts[src]++;
                 }
                 gid_infos.push_back(std::move(info));
+                ++n_gid;
             }
         }
 
+        num_local_cells_ = n_gid;
+
         // Construct the connections.
         // The loop above gave the information required to construct in place
         // the connections as partitioned by the domain of their source gid.
@@ -101,21 +101,33 @@ public:
         for (const auto& cell: gid_infos) {
             for (auto c: cell.conns) {
                 const auto i = offsets[src_domains[pos]]++;
-                connections_[i] = {c.source, c.dest, c.weight, c.delay, cell.local_group};
+                connections_[i] = {c.source, c.dest, c.weight, c.delay, cell.index_on_domain};
                 ++pos;
             }
         }
 
+        // Build cell partition by group for passing events to cell groups
+        index_part_ = util::make_partition(index_divisions_,
+            util::transform_view(
+                dom_dec.groups,
+                [](const group_description& g){return g.gids.size();}));
+
         // Sort the connections for each domain.
         // This is num_domains_ independent sorts, so it can be parallelized trivially.
         const auto& cp = connection_part_;
         threading::parallel_for::apply(0, num_domains_,
-            [&](cell_gid_type i) {
+            [&](cell_size_type i) {
                 util::sort(util::subrange_view(connections_, cp[i], cp[i+1]));
             });
     }
 
-    /// the minimum delay of all connections in the global network.
+    /// The range of event queues that belong to cells in group i.
+    std::pair<cell_size_type, cell_size_type> group_queue_range(cell_size_type i) {
+        EXPECTS(i<num_local_groups_);
+        return index_part_[i];
+    }
+
+    /// The minimum delay of all connections in the global network.
     time_type min_delay() {
         auto local_min = std::numeric_limits<time_type>::max();
         for (auto& con : connections_) {
@@ -151,7 +163,7 @@ public:
         using util::make_span;
         using util::make_range;
 
-        auto queues = std::vector<event_queue>(num_local_groups_);
+        auto queues = std::vector<event_queue>(num_local_cells_);
         const auto& sp = global_spikes.partition();
         const auto& cp = connection_part_;
         for (auto dom: make_span(0, num_domains_)) {
@@ -165,14 +177,22 @@ public:
                     {return src<spk.source;}
             };
 
+            // We have a choice of whether to walk spikes or connections:
+            // i.e., we can iterate over the spikes, and for each spike search
+            // the for connections that have the same source; or alternatively
+            // for each connection, we can search the list of spikes for spikes
+            // with the same source.
+            //
+            // We iterate over whichever set is the smallest, which has
+            // complexity of order max(S log(C), C log(S)), where S is the
+            // number of spikes, and C is the number of connections.
             if (cons.size()<spks.size()) {
                 auto sp = spks.begin();
                 auto cn = cons.begin();
                 while (cn!=cons.end() && sp!=spks.end()) {
                     auto sources = std::equal_range(sp, spks.end(), cn->source(), spike_pred());
-
-                    for (auto s: make_range(sources.first, sources.second)) {
-                        queues[cn->group_index()].push_back(cn->make_event(s));
+                    for (auto s: make_range(sources)) {
+                        queues[cn->index_on_domain()].push_back(cn->make_event(s));
                     }
 
                     sp = sources.first;
@@ -184,9 +204,8 @@ public:
                 auto sp = spks.begin();
                 while (cn!=cons.end() && sp!=spks.end()) {
                     auto targets = std::equal_range(cn, cons.end(), sp->source);
-
-                    for (auto c: make_range(targets.first, targets.second)) {
-                        queues[c.group_index()].push_back(c.make_event(*sp));
+                    for (auto c: make_range(targets)) {
+                        queues[c.index_on_domain()].push_back(c.make_event(*sp));
                     }
 
                     cn = targets.first;
@@ -210,10 +229,14 @@ public:
     }
 
 private:
+    cell_size_type num_local_cells_;
     cell_size_type num_local_groups_;
     cell_size_type num_domains_;
     std::vector<connection> connections_;
     std::vector<cell_size_type> connection_part_;
+    std::vector<cell_size_type> index_divisions_;
+    util::partition_view_type<std::vector<cell_size_type>> index_part_;
+
     communication_policy_type comms_;
     std::uint64_t num_spikes_ = 0u;
 };
diff --git a/src/connection.hpp b/src/connection.hpp
index 24b9af55e9be9d374760bbe81416200951672ff5..018c69b4551dc32e95f40f4f31407037d1a8e2bc 100644
--- a/src/connection.hpp
+++ b/src/connection.hpp
@@ -15,12 +15,12 @@ public:
                 cell_member_type dest,
                 float w,
                 time_type d,
-                cell_gid_type gidx=cell_gid_type(-1)):
+                cell_gid_type didx=cell_gid_type(-1)):
         source_(src),
         destination_(dest),
         weight_(w),
         delay_(d),
-        group_index_(gidx)
+        index_on_domain_(didx)
     {}
 
     float weight() const { return weight_; }
@@ -28,7 +28,7 @@ public:
 
     cell_member_type source() const { return source_; }
     cell_member_type destination() const { return destination_; }
-    cell_gid_type group_index() const { return group_index_; }
+    cell_size_type index_on_domain() const { return index_on_domain_; }
 
     postsynaptic_spike_event make_event(const spike& s) {
         return {destination_, s.time + delay_, weight_};
@@ -39,7 +39,7 @@ private:
     cell_member_type destination_;
     float weight_;
     time_type delay_;
-    cell_gid_type group_index_;
+    cell_size_type index_on_domain_;
 };
 
 // connections are sorted by source id
diff --git a/src/dss_cell_group.hpp b/src/dss_cell_group.hpp
index ffaecbe50933902edb3d43c5560de744b420f9e1..c3466572b26cc2b26eb493f78fca32c229d8c1d7 100644
--- a/src/dss_cell_group.hpp
+++ b/src/dss_cell_group.hpp
@@ -44,7 +44,7 @@ public:
 
     void set_binning_policy(binning_kind policy, time_type bin_interval) override {}
 
-    void advance(time_type tfinal, time_type dt) override {
+    void advance(epoch ep, time_type dt) override {
         for (auto i: util::make_span(0, not_emit_it_.size())) {
             // The first potential spike_time to emit for this cell
             auto spike_time_it = not_emit_it_[i];
@@ -52,7 +52,7 @@ public:
             // Find the first spike past tfinal
             not_emit_it_[i] = std::find_if(
                 spike_time_it, spike_times_[i].end(),
-                [tfinal](time_type t) {return t >= tfinal; }
+                [ep](time_type t) {return t >= ep.tfinal; }
             );
 
             // Loop over the range and create spikes
@@ -62,7 +62,7 @@ public:
         }
     };
 
-    void enqueue_events(const std::vector<postsynaptic_spike_event>& events) override {
+    void enqueue_events(epoch, util::subrange_view_type<std::vector<std::vector<postsynaptic_spike_event>>>) override {
         std::runtime_error("The dss_cells do not support incoming events!");
     }
 
diff --git a/src/epoch.hpp b/src/epoch.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..b54101971a5ecc7685d4eeeb3ae2ce851326a6f9
--- /dev/null
+++ b/src/epoch.hpp
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <cstdint>
+
+#include <common_types.hpp>
+
+namespace arb {
+
+// Information about a current time integration epoch.
+// Each epoch has an integral id, that is incremented for successive epochs.
+// Time is divided as follows, where tfinal_i is tfinal for epoch i:
+//
+// epoch_0 :             t < tfinal_0
+// epoch_1 : tfinal_0 <= t < tfinal_1
+// epoch_2 : tfinal_1 <= t < tfinal_2
+// epoch_3 : tfinal_2 <= t < tfinal_3
+//
+// At the end of epoch_i the solution is at tfinal_i, however events that are
+// due for delivery at tfinal_i are not delivered until epoch_i+1.
+
+struct epoch {
+    std::size_t id=0;
+    time_type tfinal=0;
+
+    epoch() = default;
+
+    epoch(std::size_t id, time_type tfinal): id(id), tfinal(tfinal) {}
+
+    void advance(time_type t) {
+        EXPECTS(t>=tfinal);
+        tfinal = t;
+        ++id;
+    }
+};
+
+} // namespace arb
diff --git a/src/event_binner.cpp b/src/event_binner.cpp
index c5e7bc4809e74deef63fe64387289510fcd735ae..0b380bd4fd5263eb8a9ddf9c3c6aac0affc027ab 100644
--- a/src/event_binner.cpp
+++ b/src/event_binner.cpp
@@ -12,10 +12,10 @@
 namespace arb {
 
 void event_binner::reset() {
-    last_event_times_.clear();
+    last_event_time_ = util::nothing;
 }
 
-time_type event_binner::bin(cell_gid_type id, time_type t, time_type t_min) {
+time_type event_binner::bin(time_type t, time_type t_min) {
     time_type t_binned = t;
 
     switch (policy_) {
@@ -27,12 +27,12 @@ time_type event_binner::bin(cell_gid_type id, time_type t, time_type t_min) {
         }
         break;
     case binning_kind::following:
-        if (auto last_t = last_event_time(id)) {
-            if (t-*last_t<bin_interval_) {
-                t_binned = *last_t;
+        if (last_event_time_) {
+            if (t-*last_event_time_<bin_interval_) {
+                t_binned = *last_event_time_;
             }
         }
-        update_last_event_time(id, t_binned);
+        last_event_time_ = t_binned;
         break;
     default:
         throw std::logic_error("unrecognized binning policy");
@@ -41,15 +41,5 @@ time_type event_binner::bin(cell_gid_type id, time_type t, time_type t_min) {
     return std::max(t_binned, t_min);
 }
 
-util::optional<time_type>
-event_binner::last_event_time(cell_gid_type id) {
-    auto it = last_event_times_.find(id);
-    return it==last_event_times_.end()? util::nothing: util::just(it->second);
-}
-
-void event_binner::update_last_event_time(cell_gid_type id, time_type t) {
-    last_event_times_[id] = t;
-}
-
 } // namespace arb
 
diff --git a/src/event_binner.hpp b/src/event_binner.hpp
index bcdce2ffb0669133ece48cc943dbe6f392837c35..f465327405b427cc670980d104cc9c94bf69143f 100644
--- a/src/event_binner.hpp
+++ b/src/event_binner.hpp
@@ -30,9 +30,7 @@ public:
     // Otherwise the returned binned time will be less than or equal to the parameter `t`,
     // and within `bin_interval_`.
 
-    time_type bin(cell_gid_type id,
-                  time_type t,
-                  time_type t_min = std::numeric_limits<time_type>::lowest());
+    time_type bin(time_type t, time_type t_min = std::numeric_limits<time_type>::lowest());
 
 private:
     binning_kind policy_;
@@ -40,12 +38,7 @@ private:
     // Interval in which event times can be aliased.
     time_type bin_interval_;
 
-    // (Consider replacing this with a vector-backed store.)
-    std::unordered_map<cell_gid_type, time_type> last_event_times_;
-
-    util::optional<time_type> last_event_time(cell_gid_type id);
-
-    void update_last_event_time(cell_gid_type id, time_type t);
+    util::optional<time_type> last_event_time_;
 };
 
 } // namespace arb
diff --git a/src/generic_event.hpp b/src/generic_event.hpp
index c8263c6ad3d1e2c960e6c7d0a65266e1dd024309..77ef4328e1301853c74a814802c169d64b7cd1f0 100644
--- a/src/generic_event.hpp
+++ b/src/generic_event.hpp
@@ -1,5 +1,8 @@
 #pragma once
 
+#include <utility>
+#include <type_traits>
+
 // Generic accessors for event types used in `event_queue` and
 // `multi_event_stream`.
 //
@@ -55,6 +58,18 @@ auto event_data(const Event& ev) -> decltype(ev.data) {
     return ev.data;
 }
 
+struct event_time_less {
+    template <typename T, typename Event, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+    bool operator() (T l, const Event& r) {
+        return l<event_time(r);
+    }
+
+    template <typename T, typename Event, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+    bool operator() (const Event& l, T r) {
+        return event_time(l)<r;
+    }
+};
+
 namespace impl {
     // Wrap in `impl::` namespace to obtain correct ADL for return type.
 
diff --git a/src/mc_cell_group.hpp b/src/mc_cell_group.hpp
index 65b2472ba460e8ded47fe998ed08dfa517a871d0..fde8129721f379aabe23445bba8e14112f392a40 100644
--- a/src/mc_cell_group.hpp
+++ b/src/mc_cell_group.hpp
@@ -18,6 +18,7 @@
 #include <sampling.hpp>
 #include <spike.hpp>
 #include <util/debug.hpp>
+#include <util/double_buffer.hpp>
 #include <util/filter.hpp>
 #include <util/partition.hpp>
 #include <util/range.hpp>
@@ -62,8 +63,14 @@ public:
                 spike_sources_.push_back({source_gid, lid});
             }
         }
-
         spike_sources_.shrink_to_fit();
+
+        // Create event lane buffers.
+        // There is one set for each epoch: current and next.
+        event_lanes_.resize(2);
+        // For each epoch there is one lane for each cell in the cell group.
+        event_lanes_[0].resize(gids_.size());
+        event_lanes_[1].resize(gids_.size());
     }
 
     cell_kind get_cell_kind() const override {
@@ -72,32 +79,41 @@ public:
 
     void reset() override {
         spikes_.clear();
-        events_.clear();
         reset_samplers();
-        binner_.reset();
+        for (auto& b: binners_) {
+            b.reset();
+        }
         lowered_.reset();
+        for (auto& lanes: event_lanes_) {
+            for (auto& lane: lanes) {
+                lane.clear();
+            }
+        }
     }
 
     void set_binning_policy(binning_kind policy, time_type bin_interval) override {
-        binner_ = event_binner(policy, bin_interval);
+        binners_.clear();
+        binners_.resize(gids_.size(), event_binner(policy, bin_interval));
     }
 
 
-    void advance(time_type tfinal, time_type dt) override {
+    void advance(epoch ep, time_type dt) override {
         PE("advance");
         EXPECTS(lowered_.state_synchronized());
         time_type tstart = lowered_.min_time();
 
         PE("event-setup");
-        // Bin pending events and batch for lowered cell.
-        std::vector<deliverable_event> staged_events;
-        staged_events.reserve(events_.size());
-
-        while (auto ev = events_.pop_if_before(tfinal)) {
-            staged_events.emplace_back(
-                binner_.bin(ev->target.gid, ev->time, tstart),
-                get_target_handle(ev->target),
-                ev->weight);
+        const auto& lc = event_lanes(ep.id);
+        staged_events_.clear();
+        for (auto lid: util::make_span(0, gids_.size())) {
+            auto& lane = lc[lid];
+            for (auto e: lane) {
+                if (e.time>=ep.tfinal) break;
+                e.time = binners_[lid].bin(e.time, tstart);
+                auto h = target_handles_[target_handle_divisions_[lid]+e.target.index];
+                auto ev = deliverable_event(e.time, h, e.weight);
+                staged_events_.push_back(ev);
+            }
         }
         PL();
 
@@ -132,7 +148,7 @@ public:
         sample_size_type max_samples_per_call = 0;
 
         for (auto& sa: sampler_map_) {
-            auto sample_times = sa.sched.events(tstart, tfinal);
+            auto sample_times = sa.sched.events(tstart, ep.tfinal);
             if (sample_times.empty()) {
                 continue;
             }
@@ -158,8 +174,9 @@ public:
         PL();
 
         // Run integration.
-        lowered_.setup_integration(tfinal, dt, std::move(staged_events), std::move(sample_events));
+        lowered_.setup_integration(ep.tfinal, dt, staged_events_, std::move(sample_events));
         PE("integrator-steps");
+
         while (!lowered_.integration_complete()) {
             lowered_.step_integration();
             if (util::is_debug_mode() && !lowered_.is_physical_solution()) {
@@ -210,9 +227,51 @@ public:
         PL();
     }
 
-    void enqueue_events(const std::vector<postsynaptic_spike_event>& events) override {
-        for (auto& e: events) {
-            events_.push(e);
+    void enqueue_events(
+            epoch ep,
+            util::subrange_view_type<std::vector<std::vector<postsynaptic_spike_event>>> events) override
+    {
+        using pse = postsynaptic_spike_event;
+
+        // Make convenience variables for event lanes:
+        //  lf: event lanes for the next epoch
+        //  lc: event lanes for the current epoch
+        auto& lf = event_lanes(ep.id+1);
+        const auto& lc = event_lanes(ep.id);
+
+        // For a cell, merge the incoming events with events in lc that are
+        // not to be delivered in thie epoch, and store the result in lf.
+        auto merge_future_events = [&](unsigned l) {
+            PE("sort");
+            // STEP 1: sort events in place in events[l]
+            util::sort_by(events[l], [](const pse& e){return event_time(e);});
+            PL();
+
+            PE("merge");
+            // STEP 2: clear lf to store merged list
+            lf[l].clear();
+
+            // STEP 3: merge new events and future events from lc into lf
+            auto pos = std::lower_bound(lc[l].begin(), lc[l].end(), ep.tfinal, event_time_less());
+            lf[l].resize(events[l].size()+std::distance(pos, lc[l].end()));
+            std::merge(
+                events[l].begin(), events[l].end(), pos, lc[l].end(), lf[l].begin(),
+                [](const pse& l, const pse& r) {return l.time<r.time;});
+            PL();
+        };
+
+        // This operation is independent for each cell, so it can be performed
+        // in parallel if there are sufficient cells in the cell group.
+        // TODO: use parallel loop based on argument to this function? This would
+        //       allow the caller, which has more context, to decide whether a
+        //       parallel loop is beneficial.
+        if (gids_.size()>1) {
+            threading::parallel_for::apply(0, gids_.size(), merge_future_events);
+        }
+        else {
+            for (unsigned l=0; l<gids_.size(); ++l) {
+                merge_future_events(l);
+            }
         }
     }
 
@@ -248,6 +307,10 @@ public:
     }
 
 private:
+    std::vector<std::vector<postsynaptic_spike_event>>& event_lanes(std::size_t epoch_id) {
+        return event_lanes_[epoch_id%2];
+    }
+
     // List of the gids of the cells in the group.
     std::vector<cell_gid_type> gids_;
 
@@ -264,10 +327,11 @@ private:
     std::vector<spike> spikes_;
 
     // Event time binning manager.
-    event_binner binner_;
+    std::vector<event_binner> binners_;
 
     // Pending events to be delivered.
-    event_queue<postsynaptic_spike_event> events_;
+    std::vector<std::vector<std::vector<postsynaptic_spike_event>>> event_lanes_;
+    std::vector<deliverable_event> staged_events_;
 
     // Pending samples to be taken.
     event_queue<sample_event> sample_events_;
diff --git a/src/model.cpp b/src/model.cpp
index c46c52ed039644aa2f8654594a88f328247b6c93..42833540a8e66a330b41b8962323fc3646656656 100644
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -29,12 +29,6 @@ model::model(const recipe& rec, const domain_decomposition& decomp):
             cell_groups_[i] = cell_group_factory(rec, decomp.groups[i]);
             PL(2);
         });
-
-    // Allocate an empty queue buffer for each cell group
-    // These must be set initially to ensure that a queue is available for each
-    // cell group for the first time step.
-    current_events().resize(num_groups());
-    future_events().resize(num_groups());
 }
 
 void model::reset() {
@@ -45,13 +39,6 @@ void model::reset() {
 
     communicator_.reset();
 
-    for(auto& q: current_events()) {
-        q.clear();
-    }
-    for(auto& q: future_events()) {
-        q.clear();
-    }
-
     current_spikes().clear();
     previous_spikes().clear();
 
@@ -72,14 +59,11 @@ time_type model::run(time_type tfinal, time_type dt) {
     auto update_cells = [&] () {
         threading::parallel_for::apply(
             0u, cell_groups_.size(),
-             [&](unsigned i) {
+            [&](unsigned i) {
+                PE("stepping");
                 auto &group = cell_groups_[i];
 
-                PE("stepping","events");
-                group->enqueue_events(current_events()[i]);
-                PL();
-
-                group->advance(tuntil, dt);
+                group->advance(epoch_, dt);
 
                 PE("events");
                 current_spikes().insert(group->spikes());
@@ -105,17 +89,24 @@ time_type model::run(time_type tfinal, time_type dt) {
         global_export_callback_(global_spikes.values());
         PL();
 
-        PE("events");
-        future_events() = communicator_.make_event_queues(global_spikes);
+        PE("events","from-spikes");
+        auto events = communicator_.make_event_queues(global_spikes);
         PL();
 
+        PE("enqueue");
+        for (auto i: util::make_span(0, cell_groups_.size())) {
+            cell_groups_[i]->enqueue_events(
+                epoch_,
+                util::subrange_view(events, communicator_.group_queue_range(i)));
+        }
+        PL(2);
+
         PL(2);
     };
 
+    tuntil = std::min(t_+t_interval, tfinal);
+    epoch_ = epoch(0, tuntil);
     while (t_<tfinal) {
-        tuntil = std::min(t_+t_interval, tfinal);
-
-        event_queues_.exchange();
         local_spikes_.exchange();
 
         // empty the spike buffers for the current integration period.
@@ -130,11 +121,13 @@ time_type model::run(time_type tfinal, time_type dt) {
         g.wait();
 
         t_ = tuntil;
+
+        tuntil = std::min(t_+t_interval, tfinal);
+        epoch_.advance(tuntil);
     }
 
     // Run the exchange one last time to ensure that all spikes are output
     // to file.
-    event_queues_.exchange();
     local_spikes_.exchange();
     exchange();
 
diff --git a/src/model.hpp b/src/model.hpp
index b1477101efe49379e5a36d7ed95e1dc3fc8aa843..3c75c609c29be1bfcef07c64ba8f8b7f51b56b53 100644
--- a/src/model.hpp
+++ b/src/model.hpp
@@ -6,9 +6,10 @@
 #include <backends.hpp>
 #include <cell_group.hpp>
 #include <common_types.hpp>
-#include <domain_decomposition.hpp>
 #include <communication/communicator.hpp>
 #include <communication/global_policy.hpp>
+#include <domain_decomposition.hpp>
+#include <epoch.hpp>
 #include <recipe.hpp>
 #include <sampling.hpp>
 #include <thread_private_spike_store.hpp>
@@ -61,12 +62,12 @@ public:
 private:
     std::size_t num_groups() const;
 
+    // keep track of information about the current integration interval
+    epoch epoch_;
+
     time_type t_ = 0.;
     std::vector<cell_group_ptr> cell_groups_;
 
-    using event_queue_type = typename communicator_type::event_queue;
-    util::double_buffer<std::vector<event_queue_type>> event_queues_;
-
     using local_spike_store_type = thread_private_spike_store;
     util::double_buffer<local_spike_store_type> local_spikes_;
 
@@ -79,8 +80,8 @@ private:
 
     communicator_type communicator_;
 
-    // Convenience functions that map the spike buffers and event queues onto
-    // the appropriate integration interval.
+    // Convenience functions that map the spike buffers onto the appropriate
+    // integration interval.
     //
     // To overlap communication and computation, integration intervals of
     // size Delta/2 are used, where Delta is the minimum delay in the global
@@ -90,17 +91,10 @@ private:
     // Then we define the following :
     //      current_spikes : spikes generated in the current interval
     //      previous_spikes: spikes generated in the preceding interval
-    //      current_events : events to be delivered at the start of
-    //                       the current interval
-    //      future_events  : events to be delivered at the start of
-    //                       the next interval
 
     local_spike_store_type& current_spikes()  { return local_spikes_.get(); }
     local_spike_store_type& previous_spikes() { return local_spikes_.other(); }
 
-    std::vector<event_queue_type>& current_events()  { return event_queues_.get(); }
-    std::vector<event_queue_type>& future_events()   { return event_queues_.other(); }
-
     // Sampler associations handles are managed by a helper class.
     util::handle_set<sampler_association_handle> sassoc_handles_;
 };
diff --git a/src/profiling/meter_manager.cpp b/src/profiling/meter_manager.cpp
index c1c4723098e59b610944996f86fc05dfbf36f3fe..aa869108a74760dd757580a35c3fbcdeda372a49 100644
--- a/src/profiling/meter_manager.cpp
+++ b/src/profiling/meter_manager.cpp
@@ -120,10 +120,17 @@ meter_report make_meter_report(const meter_manager& manager) {
 
     // Gather a vector with the names of the node that each rank is running on.
     auto host = hostname();
-    report.hosts = gcom::gather(host? *host: "unknown", 0);
+    auto hosts = gcom::gather(host? *host: "unknown", 0);
+    report.hosts = hosts;
+
+    // Count the number of unique hosts.
+    // This is equivalent to the number of nodes on most systems.
+    util::sort(hosts);
+    auto num_hosts = std::distance(hosts.begin(), std::unique(hosts.begin(), hosts.end()));
 
     report.checkpoints = manager.checkpoint_names();
     report.num_domains = gcom::size();
+    report.num_hosts = num_hosts;
     report.communication_policy = gcom::kind();
 
     return report;
@@ -141,17 +148,20 @@ nlohmann::json to_json(const meter_report& report) {
 
 // Print easy to read report of meters to a stream.
 std::ostream& operator<<(std::ostream& o, const meter_report& report) {
-    o << "\n---- meters ------------------------------------------------------------\n";
-    o << strprintf("%21s", "");
+    o << "\n---- meters -------------------------------------------------------------------------------\n";
+    o << strprintf("meter%16s", "");
     for (auto const& m: report.meters) {
         if (m.name=="time") {
-            o << strprintf("%16s", "time (s)");
+            o << strprintf("%16s", "time(s)");
         }
         else if (m.name.find("memory")!=std::string::npos) {
-            o << strprintf("%16s", m.name+" (MB)");
+            o << strprintf("%16s", m.name+"(MB)");
+        }
+        else if (m.name.find("energy")!=std::string::npos) {
+            o << strprintf("%16s", m.name+"(kJ)");
         }
     }
-    o << "\n------------------------------------------------------------------------\n";
+    o << "\n-------------------------------------------------------------------------------------------\n";
     int cp_index = 0;
     for (auto name: report.checkpoints) {
         name.resize(20);
@@ -159,11 +169,17 @@ std::ostream& operator<<(std::ostream& o, const meter_report& report) {
         for (const auto& m: report.meters) {
             if (m.name=="time") {
                 std::vector<double> times = m.measurements[cp_index];
-                o << strprintf("%16.4f", algorithms::mean(times));
+                o << strprintf("%16.3f", algorithms::mean(times));
             }
             else if (m.name.find("memory")!=std::string::npos) {
                 std::vector<double> mem = m.measurements[cp_index];
-                o << strprintf("%16.4f", algorithms::mean(mem)*1e-6);
+                o << strprintf("%16.3f", algorithms::mean(mem)*1e-6);
+            }
+            else if (m.name.find("energy")!=std::string::npos) {
+                std::vector<double> e = m.measurements[cp_index];
+                // TODO: this is an approximation: better reduce a subset of measurements
+                auto doms_per_host = double(report.num_domains)/report.num_hosts;
+                o << strprintf("%16.3f", algorithms::sum(e)/doms_per_host*1e-3);
             }
         }
         o << "\n";
diff --git a/src/profiling/meter_manager.hpp b/src/profiling/meter_manager.hpp
index eba604e74b92e4c87ec277798a48037853ee694a..832f47b2fe8118cb62362105813aca2a7a22c601 100644
--- a/src/profiling/meter_manager.hpp
+++ b/src/profiling/meter_manager.hpp
@@ -53,6 +53,7 @@ public:
 struct meter_report {
     std::vector<std::string> checkpoints;
     unsigned num_domains;
+    unsigned num_hosts;
     arb::communication::global_policy_kind communication_policy;
     std::vector<measurement> meters;
     std::vector<std::string> hosts;
diff --git a/src/rss_cell_group.hpp b/src/rss_cell_group.hpp
index f69b7aaa2bf0ab92bddb32a1d13d096f970d5c4f..6d46a544267bd083ab12b3606bc2bd8cfef56030 100644
--- a/src/rss_cell_group.hpp
+++ b/src/rss_cell_group.hpp
@@ -34,20 +34,20 @@ public:
 
     void set_binning_policy(binning_kind policy, time_type bin_interval) override {}
 
-    void advance(time_type tfinal, time_type dt) override {
+    void advance(epoch ep, time_type dt) override {
         for (const auto& cell: cells_) {
             auto t = std::max(cell.start_time, time_);
-            auto t_end = std::min(cell.stop_time, tfinal);
+            auto t_end = std::min(cell.stop_time, ep.tfinal);
 
             while (t < t_end) {
                 spikes_.push_back({{cell.gid, 0}, t});
                 t += cell.period;
             }
         }
-        time_ = tfinal;
+        time_ = ep.tfinal;
     }
 
-    void enqueue_events(const std::vector<postsynaptic_spike_event>& events) override {
+    void enqueue_events(epoch, util::subrange_view_type<std::vector<std::vector<postsynaptic_spike_event>>>) override {
         std::logic_error("rss_cell cannot deliver events");
     }
 
diff --git a/src/util/partition.hpp b/src/util/partition.hpp
index a4138b66a658bcb883ca5e7578db7c50780c19a8..1eccafe479e2a3e598fb506e3c1728de84f5be42 100644
--- a/src/util/partition.hpp
+++ b/src/util/partition.hpp
@@ -170,6 +170,8 @@ make_partition(Part& divisions, const Sizes& sizes, T from=T{}) {
     return partition_view(divisions);
 }
 
+template <typename Part>
+using partition_view_type = partition_range<typename sequence_traits<Part>::const_iterator>;
 
 } // namespace util
 } // namespace arb
diff --git a/src/util/range.hpp b/src/util/range.hpp
index 75e2b5791062a33f2ea283ae40a63d16aec695d5..34eac57ab2b4ddf465ef5092fe0bf6d88824329b 100644
--- a/src/util/range.hpp
+++ b/src/util/range.hpp
@@ -150,6 +150,11 @@ range<U, V> make_range(const U& left, const V& right) {
     return range<U, V>(left, right);
 }
 
+template <typename U, typename V>
+range<U, V> make_range(const std::pair<U, V>& iterators) {
+    return range<U, V>(iterators.first, iterators.second);
+}
+
 // Present a possibly sentinel-terminated range as an STL-compatible sequence
 // using the sentinel_iterator adaptor.
 
diff --git a/src/util/rangeutil.hpp b/src/util/rangeutil.hpp
index ba2a3c0a3f66c8a5bce593447215e09ba067aa79..1556877a0966967d2f64fc47f5d55475c6d0ac91 100644
--- a/src/util/rangeutil.hpp
+++ b/src/util/rangeutil.hpp
@@ -66,6 +66,11 @@ subrange_view(Seq& seq, std::pair<Offset1, Offset2> index) {
     return subrange_view(seq, index.first, index.second);
 }
 
+// helper for determining the type of a subrange_view
+template <typename Seq>
+using subrange_view_type = decltype(subrange_view(std::declval<Seq&>(), 0, 0));
+
+
 // Fill container or range.
 
 template <typename Seq, typename V>
diff --git a/tests/ubench/CMakeLists.txt b/tests/ubench/CMakeLists.txt
index 5e64c23f21a4ddb97a76e1e589c95f1957d04209..75e4cf905ad2c058b9e1c5169195a52bde638e4d 100644
--- a/tests/ubench/CMakeLists.txt
+++ b/tests/ubench/CMakeLists.txt
@@ -5,6 +5,7 @@ include(ExternalProject)
 set(bench_sources
     accumulate_functor_values.cpp
     event_setup.cpp
+    event_binning.cpp
 )
 
 set(bench_sources_cuda
diff --git a/tests/ubench/event_binning.cpp b/tests/ubench/event_binning.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a807d08bb64acd17551ad8b41d68a16b2048c133
--- /dev/null
+++ b/tests/ubench/event_binning.cpp
@@ -0,0 +1,111 @@
+// Test overheads of hashing function using in binning of events (following method).
+// Only tests hashing overhead, which isn't enough for proper analysis of costs.
+//
+// Keep this test as a prototype for testing, esp. when looking into binning.
+
+#include <random>
+#include <vector>
+
+#include <event_queue.hpp>
+#include <backends/event.hpp>
+
+#include <benchmark/benchmark.h>
+
+using namespace arb;
+
+using pse = postsynaptic_spike_event;
+
+std::vector<cell_gid_type> generate_gids(size_t n) {
+    std::mt19937 engine;
+    std::uniform_int_distribution<cell_gid_type> dist(1u, 3u);
+
+    std::vector<cell_gid_type> gids;
+    gids.reserve(n);
+
+    gids.push_back(0);
+    while (gids.size()<n) {
+        gids.push_back(gids.back()+dist(engine));
+    }
+
+    return gids;
+}
+
+std::vector<std::vector<pse>> generate_inputs(const std::vector<cell_gid_type>& gids, size_t ev_per_cell) {
+    auto ncells = gids.size();
+    std::vector<std::vector<pse>> input_events;
+
+    std::uniform_int_distribution<cell_gid_type>(0u, ncells);
+    std::mt19937 gen;
+    std::uniform_int_distribution<cell_gid_type>
+        gid_dist(0u, ncells-1);
+
+    input_events.resize(ncells);
+    for (std::size_t i=0; i<ncells*ev_per_cell; ++i) {
+        postsynaptic_spike_event ev;
+        auto idx = gid_dist(gen);
+        auto gid = gids[idx];
+        auto t = 1.;
+        ev.target = {cell_gid_type(gid), cell_lid_type(0)};
+        ev.time = t;
+        ev.weight = 0;
+        input_events[idx].push_back(ev);
+    }
+
+    return input_events;
+}
+
+void no_hash(benchmark::State& state) {
+    const std::size_t ncells = state.range(0);
+    const std::size_t ev_per_cell = state.range(1);
+
+    // state
+    auto gids = generate_gids(ncells);
+    auto input_events = generate_inputs(gids, ev_per_cell);
+
+    std::vector<float> times(ncells);
+    while (state.KeepRunning()) {
+        for (size_t i=0; i<ncells; ++i) {
+            for (auto& ev: input_events[i]) {
+                times[i] += ev.time;
+            }
+        }
+
+        benchmark::ClobberMemory();
+    }
+}
+
+void yes_hash(benchmark::State& state) {
+    const std::size_t ncells = state.range(0);
+    const std::size_t ev_per_cell = state.range(1);
+
+    // state
+    auto gids = generate_gids(ncells);
+    auto input_events = generate_inputs(gids, ev_per_cell);
+
+    std::unordered_map<cell_gid_type, float> times;
+    for (auto gid: gids) {
+        times[gid] = 0;
+    }
+    while (state.KeepRunning()) {
+        for (size_t i=0; i<ncells; ++i) {
+            for (auto& ev: input_events[i]) {
+                times[i] += ev.time;
+            }
+        }
+
+        benchmark::ClobberMemory();
+    }
+}
+
+void run_custom_arguments(benchmark::internal::Benchmark* b) {
+    for (auto ncells: {1, 10, 100, 1000, 10000}) {
+        for (auto ev_per_cell: {128, 256, 512, 1024, 2048, 4096}) {
+            b->Args({ncells, ev_per_cell});
+        }
+    }
+}
+
+BENCHMARK(no_hash)->Apply(run_custom_arguments);
+BENCHMARK(yes_hash)->Apply(run_custom_arguments);
+
+BENCHMARK_MAIN();
diff --git a/tests/unit/test_dss_cell_group.cpp b/tests/unit/test_dss_cell_group.cpp
index d456225a066450810e6bbd79cf2f60e58d0d4130..4baa8d0c48f801aeb3e2ce31e5007fb3c56f6c01 100644
--- a/tests/unit/test_dss_cell_group.cpp
+++ b/tests/unit/test_dss_cell_group.cpp
@@ -18,13 +18,15 @@ TEST(dss_cell, basic_usage)
 
     // No spikes in this time frame.
     time_type dt = 0.01; // (note that dt is ignored in dss_cell_group).
-    sut.advance(0.09, dt);
+    epoch ep(0, 0.09);
+    sut.advance(ep, dt);
 
     auto spikes = sut.spikes();
     EXPECT_EQ(0u, spikes.size());
 
     // Only one in this time frame.
-    sut.advance(0.11, 0.01);
+    ep.advance(0.11);
+    sut.advance(ep, dt);
     spikes = sut.spikes();
     EXPECT_EQ(1u, spikes.size());
     ASSERT_FLOAT_EQ(spike_time, spikes[0].time);
@@ -35,7 +37,8 @@ TEST(dss_cell, basic_usage)
     EXPECT_EQ(0u, spikes.size());
 
     // No spike to be emitted.
-    sut.advance(0.12, dt);
+    ep.advance(0.12);
+    sut.advance(ep, dt);
     spikes = sut.spikes();
     EXPECT_EQ(0u, spikes.size());
 
@@ -43,7 +46,8 @@ TEST(dss_cell, basic_usage)
     sut.reset();
 
     // Expect to have the one spike again after reset.
-    sut.advance(0.2, dt);
+    ep.advance(0.2);
+    sut.advance(ep, dt);
     spikes = sut.spikes();
     EXPECT_EQ(1u, spikes.size());
     ASSERT_FLOAT_EQ(spike_time, spikes[0].time);
diff --git a/tests/unit/test_event_binner.cpp b/tests/unit/test_event_binner.cpp
index f43e01eb5c7010bd5debe9cab8e56d4af1a7ba70..0d8ddaae6b268c5bceccf87192d7b23b24346675 100644
--- a/tests/unit/test_event_binner.cpp
+++ b/tests/unit/test_event_binner.cpp
@@ -6,54 +6,6 @@
 
 using namespace arb;
 
-TEST(event_binner, basic) {
-    using testing::seq_almost_eq;
-
-    std::pair<cell_gid_type, float> binning_test_data[] = {
-        { 11, 0.50 },
-        { 12, 0.70 },
-        { 14, 0.73 },
-        { 11, 1.80 },
-        { 12, 1.83 },
-        { 11, 1.90 },
-        { 11, 2.00 },
-        { 14, 2.00 },
-        { 11, 2.10 },
-        { 14, 2.30 }
-    };
-
-    std::unordered_map<cell_gid_type, std::vector<float>> ev_times;
-    std::vector<float> expected;
-
-    auto run_binner = [&](event_binner&& binner) {
-        ev_times.clear();
-        for (auto p: binning_test_data) {
-            ev_times[p.first].push_back(binner.bin(p.first, p.second));
-        }
-    };
-
-    run_binner(event_binner{binning_kind::none, 0});
-
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[11], (float []){0.50, 1.80, 1.90, 2.00, 2.10}));
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[12], (float []){0.70, 1.83}));
-    EXPECT_TRUE(ev_times[13].empty());
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[14], (float []){0.73, 2.00, 2.30}));
-
-    run_binner(event_binner{binning_kind::regular, 0.25});
-
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[11], (float []){0.50, 1.75, 1.75, 2.00, 2.00}));
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[12], (float []){0.50, 1.75}));
-    EXPECT_TRUE(ev_times[13].empty());
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[14], (float []){0.50, 2.00, 2.25}));
-
-    run_binner(event_binner{binning_kind::following, 0.25});
-
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[11], (float []){0.50, 1.80, 1.80, 1.80, 2.10}));
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[12], (float []){0.70, 1.83}));
-    EXPECT_TRUE(ev_times[13].empty());
-    EXPECT_TRUE(seq_almost_eq<float>(ev_times[14], (float []){0.73, 2.00, 2.30}));
-}
-
 TEST(event_binner, with_min) {
     using testing::seq_almost_eq;
 
@@ -74,10 +26,10 @@ TEST(event_binner, with_min) {
         times.clear();
         for (auto p: test_data) {
             if (use_min) {
-                times.push_back(binner.bin(0, p.time, p.t_min));
+                times.push_back(binner.bin(p.time, p.t_min));
             }
             else {
-                times.push_back(binner.bin(0, p.time));
+                times.push_back(binner.bin(p.time));
             }
         }
     };
diff --git a/tests/unit/test_mc_cell_group.cpp b/tests/unit/test_mc_cell_group.cpp
index 151e8a0defa375208c1c16d595684cf54dced468..e926488d145946d28531a1a97db9f4fe7a239557 100644
--- a/tests/unit/test_mc_cell_group.cpp
+++ b/tests/unit/test_mc_cell_group.cpp
@@ -2,6 +2,7 @@
 
 #include <backends/multicore/fvm.hpp>
 #include <common_types.hpp>
+#include <epoch.hpp>
 #include <fvm_multicell.hpp>
 #include <mc_cell_group.hpp>
 #include <util/rangeutil.hpp>
@@ -32,7 +33,7 @@ TEST(mc_cell_group, get_kind) {
 TEST(mc_cell_group, test) {
     mc_cell_group<fvm_cell> group{{0}, cable1d_recipe(make_cell()) };
 
-    group.advance(50, 0.01);
+    group.advance(epoch(0, 50), 0.01);
 
     // the model is expected to generate 4 spikes as a result of the
     // fixed stimulus over 50 ms
diff --git a/tests/unit/test_mc_cell_group.cu b/tests/unit/test_mc_cell_group.cu
index 331a2e94f55ed5f5f91725ad04f3dd9fe75dfe0c..f2eb389391e72c2c73012a4e558528ccf51a0ccf 100644
--- a/tests/unit/test_mc_cell_group.cu
+++ b/tests/unit/test_mc_cell_group.cu
@@ -25,7 +25,7 @@ TEST(mc_cell_group, test)
 {
     mc_cell_group<fvm_cell> group({0u}, cable1d_recipe(make_cell()));
 
-    group.advance(50, 0.01);
+    group.advance(50, 0.01, 0);
 
     // the model is expected to generate 4 spikes as a result of the
     // fixed stimulus over 50 ms
diff --git a/tests/unit/test_multi_event_stream.cu b/tests/unit/test_multi_event_stream.cu
index 6a0e9997b3a5ff0640cc2223dfa3bc4b553890b1..7e1d171915295657a12cd7794f255d2097710678 100644
--- a/tests/unit/test_multi_event_stream.cu
+++ b/tests/unit/test_multi_event_stream.cu
@@ -13,6 +13,11 @@
 
 using namespace arb;
 
+namespace {
+    auto evtime = [](deliverable_event e) { return event_time(e); };
+    auto evindex = [](deliverable_event e) { return event_index(e); };
+}
+
 using deliverable_event_stream = gpu::multi_event_stream<deliverable_event>;
 
 namespace common_events {
@@ -51,7 +56,8 @@ TEST(multi_event_stream, init) {
     EXPECT_EQ(n_cell, m.n_streams());
 
     auto events = common_events::events;
-    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
+    ASSERT_TRUE(util::is_sorted_by(events, evtime));
+    util::stable_sort_by(events, evindex);
     m.init(events);
     EXPECT_FALSE(m.empty());
 
@@ -99,7 +105,8 @@ TEST(multi_event_stream, mark) {
     ASSERT_EQ(n_cell, m.n_streams());
 
     auto events = common_events::events;
-    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
+    ASSERT_TRUE(util::is_sorted_by(events, evtime));
+    util::stable_sort_by(events, evindex);
     m.init(events);
 
     for (cell_size_type i = 0; i<n_cell; ++i) {
@@ -209,7 +216,8 @@ TEST(multi_event_stream, time_if_before) {
     ASSERT_EQ(n_cell, m.n_streams());
 
     auto events = common_events::events;
-    ASSERT_TRUE(util::is_sorted_by(events, [](deliverable_event e) { return event_time(e); }));
+    ASSERT_TRUE(util::is_sorted_by(events, evtime));
+    util::stable_sort_by(events, evindex);
     m.init(events);
 
     // Test times less than all event times (first event at t=2).
diff --git a/tests/unit/test_rss_cell.cpp b/tests/unit/test_rss_cell.cpp
index 15e176494781a5a5146ef74f078eb2cc02313332..690fe258361bc5dae7b2eee9568b9f70a9a91810 100644
--- a/tests/unit/test_rss_cell.cpp
+++ b/tests/unit/test_rss_cell.cpp
@@ -19,19 +19,22 @@ TEST(rss_cell, basic_usage)
     rss_cell_group sut({0}, rss_recipe(1u, desc));
 
     // No spikes in this time frame.
-    sut.advance(0.1, dt);
+    epoch ep(0, 0.1);
+    sut.advance(ep, dt);
     EXPECT_EQ(0u, sut.spikes().size());
 
     // Only on in this time frame
     sut.clear_spikes();
-    sut.advance(0.127, dt);
+    ep.advance(0.127);
+    sut.advance(ep, dt);
     EXPECT_EQ(1u, sut.spikes().size());
 
     // Reset cell group state.
     sut.reset();
 
     // Expect 12 spikes excluding the 0.5 end point.
-    sut.advance(0.5, dt);
+    ep.advance(0.5);
+    sut.advance(ep, dt);
     EXPECT_EQ(12u, sut.spikes().size());
 }
 
@@ -43,19 +46,19 @@ TEST(rss_cell, poll_time_after_end_time)
     rss_cell_group sut({0}, rss_recipe(1u, desc));
 
     // Expect 12 spikes in this time frame.
-    sut.advance(0.7, dt);
+    sut.advance(epoch(0, 0.7), dt);
     EXPECT_EQ(12u, sut.spikes().size());
 
     // Now ask for spikes for a time slot already passed:
     // It should result in zero spikes because of the internal state!
     sut.clear_spikes();
-    sut.advance(0.2, dt);
+    sut.advance(epoch(0, 0.2), dt);
     EXPECT_EQ(0u, sut.spikes().size());
 
     sut.reset();
 
     // Expect 12 excluding the 0.5
-    sut.advance(0.5, dt);
+    sut.advance(epoch(0, 0.5), dt);
     EXPECT_EQ(12u, sut.spikes().size());
 }
 
diff --git a/tests/validation/validate_synapses.cpp b/tests/validation/validate_synapses.cpp
index 55fc0e62c28b10027438a8c977a72bb12f4249b1..633bd418676b87fb2c2a41a351e50f502c7bdf2a 100644
--- a/tests/validation/validate_synapses.cpp
+++ b/tests/validation/validate_synapses.cpp
@@ -42,11 +42,11 @@ void run_synapse_test(
     c.add_synapse({1, 0.5}, syn_default);
 
     // injected spike events
-    std::vector<postsynaptic_spike_event> synthetic_events = {
+    std::vector<std::vector<postsynaptic_spike_event>> synthetic_events = {{
         {{0u, 0u}, 10.0, 0.04},
         {{0u, 0u}, 20.0, 0.04},
         {{0u, 0u}, 40.0, 0.04}
-    };
+    }};
 
     // exclude points of discontinuity from linf analysis
     std::vector<float> exclude = {10.f, 20.f, 40.f};
@@ -75,7 +75,10 @@ void run_synapse_test(
 
         auto decomp = partition_load_balance(rec, nd);
         model m(rec, decomp);
-        m.group(0).enqueue_events(synthetic_events);
+
+        // Add events an odd epoch (1), so that they are available during integration of epoch 0.
+        // This is a bit of a hack.
+        m.group(0).enqueue_events(epoch(1, 0.), util::subrange_view(synthetic_events, 0, 1));
 
         runner.run(m, ncomp, sample_dt, t_end, dt, exclude);
     }