diff --git a/miniapp/io.cpp b/miniapp/io.cpp
index ac7573ccec32a4036f49c515b80047ccb602a1e0..17756e2cf5fba0bafa70354b2f7ebe5294316422 100644
--- a/miniapp/io.cpp
+++ b/miniapp/io.cpp
@@ -11,11 +11,15 @@
 
 // Let TCLAP understand value arguments that are of an optional type.
 
+namespace TCLAP {
+
 template <typename V>
-struct TCLAP::ArgTraits<nest::mc::util::optional<V>> {
+struct ArgTraits<nest::mc::util::optional<V>> {
     using ValueCategory = ValueLike;
 };
 
+} // namespace TCLAP
+
 namespace nest {
 namespace mc {
 
diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp
index 3e9d4dd227efbb90823b99db2fedd00cc94e4f81..36c85ba1000b832019fe8cf792a4a14405cec19a 100644
--- a/miniapp/miniapp.cpp
+++ b/miniapp/miniapp.cpp
@@ -65,9 +65,11 @@ int main(int argc, char** argv) {
         model_type m(*recipe, cell_range.first, cell_range.second);
 
         // inject some artificial spikes, 1 per 20 neurons.
-        cell_gid_type spike_cell = 20*((cell_range.first+19)/20);
-        for (; spike_cell<cell_range.second; spike_cell+=20) {
-            m.add_artificial_spike({spike_cell,0u});
+        std::vector<cell_gid_type> local_sources;
+        cell_gid_type first_spike_cell = 20*((cell_range.first+19)/20);
+        for (auto c=first_spike_cell; c<cell_range.second; c+=20) {
+            local_sources.push_back(c);
+            m.add_artificial_spike({c, 0});
         }
 
         // attach samplers to all probes
@@ -79,13 +81,31 @@ int main(int argc, char** argv) {
             }
 
             traces.push_back(make_trace(probe.id, probe.probe));
-            m.attach_sampler(probe.id, make_trace_sampler(traces.back().get(),sample_dt));
+            m.attach_sampler(probe.id, make_trace_sampler(traces.back().get(), sample_dt));
         }
 
+        m.print_spikes();
+
+        // dummy run of the model for one step to ensure that profiling is consistent
+        m.run(options.dt, options.dt);
+
+        // reset the model
+        m.reset();
+        std::cout << "\n";
+        m.print_spikes();
+        // which requires resetting the sources
+        for (auto source : local_sources) {
+            m.add_artificial_spike({source, 0});
+        }
+        std::cout << "\n";
+        m.print_spikes();
+
         // run model
         m.run(options.tfinal, options.dt);
-        util::profiler_output(0.001);
 
+        // output profile and diagnostic feedback
+        auto const num_steps = options.tfinal / options.dt;
+        util::profiler_output(0.001, m.num_cells(), int(num_steps));
         std::cout << "there were " << m.num_spikes() << " spikes\n";
 
         // save traces
diff --git a/src/cell_group.hpp b/src/cell_group.hpp
index e70d4b0bb5826b6ccde449f63a7fad6dab29c783..f03d134a0aa57465781aa13ea3dd6028ded0bff5 100644
--- a/src/cell_group.hpp
+++ b/src/cell_group.hpp
@@ -56,7 +56,10 @@ public:
     }
 
     void reset() {
-        remove_samplers();
+        clear_spikes();
+        clear_events();
+        //remove_samplers();
+        reset_samplers();
         initialize_cells();
         for (auto& spike_source: spike_sources_) {
             spike_source.source.reset(cell_, 0.f);
@@ -140,6 +143,10 @@ public:
         spikes_.clear();
     }
 
+    void clear_events() {
+        events_.clear();
+    }
+
     void add_sampler(cell_member_type probe_id, sampler_function s, time_type start_time = 0) {
         auto sampler_index = uint32_t(samplers_.size());
         samplers_.push_back({probe_id, s});
@@ -151,6 +158,14 @@ public:
         samplers_.clear();
     }
 
+    void reset_samplers() {
+        // clear all pending sample events and reset to start at time 0
+        sample_events_.clear();
+        for(uint32_t i=0u; i<samplers_.size(); ++i) {
+            sample_events_.push({i, time_type(0)});
+        }
+    }
+
 private:
     void initialize_cells() {
         cell_.voltage()(memory::all) = -65.;
@@ -166,7 +181,7 @@ private:
     /// spike detectors attached to the cell
     std::vector<spike_source_type> spike_sources_;
 
-    //. spikes that are generated
+    /// spikes that are generated
     std::vector<spike<source_id_type, time_type>> spikes_;
 
     /// pending events to be delivered
diff --git a/src/communication/communicator.hpp b/src/communication/communicator.hpp
index 8c72430f5c4646ce7747c76cac088bee65a58df8..c2036d91b5091965417bb8ead1b2d0740911789b 100644
--- a/src/communication/communicator.hpp
+++ b/src/communication/communicator.hpp
@@ -128,6 +128,10 @@ public:
         return communication_policy_;
     }
 
+    void reset() {
+        num_spikes_ = 0;
+    }
+
 private:
     std::size_t cell_group_index(cell_gid_type cell_gid) const {
         // this will be more elaborate when there is more than one cell per cell group
diff --git a/src/model.hpp b/src/model.hpp
index 84dd84298a78418ce882b68a29f2211a6e50401b..9ebd642968ebd4a132e29ac569fae34797faec0b 100644
--- a/src/model.hpp
+++ b/src/model.hpp
@@ -80,7 +80,28 @@ public:
         for (auto& group: cell_groups_) {
             group.reset();
         }
+
         communicator_.reset();
+
+        current_events().clear();
+        future_events().clear();
+
+        current_spikes().clear();
+        previous_spikes().clear();
+
+        util::profilers_restart();
+    }
+
+    void print_spikes() {
+        std::cout << "CURRENT : ";
+        for(auto s : current_spikes().gather()) {
+            std::cout << s << " ";
+        }
+        std::cout << "\nPREVIOUS : ";
+        for(auto s : previous_spikes().gather()) {
+            std::cout << s << " ";
+        }
+        std::cout << "\n";
     }
 
     time_type run(time_type tfinal, time_type dt) {
@@ -141,6 +162,7 @@ public:
 
             t_ = tuntil;
         }
+
         return t_;
     }
 
@@ -164,8 +186,16 @@ public:
 
     const std::vector<probe_record>& probes() const { return probes_; }
 
-    std::size_t num_spikes() const { return communicator_.num_spikes(); }
-    std::size_t num_groups() const { return cell_groups_.size(); }
+    std::size_t num_spikes() const {
+        return communicator_.num_spikes();
+    }
+    std::size_t num_groups() const {
+        return cell_groups_.size();
+    }
+    std::size_t num_cells() const {
+        // TODO: fix when the assumption that there is one cell per cell group is no longer valid
+        return num_groups();
+    }
 
 private:
     cell_gid_type cell_from_;
@@ -174,11 +204,11 @@ private:
     std::vector<cell_group_type> cell_groups_;
     communicator_type communicator_;
     std::vector<probe_record> probes_;
-    using spike_type = typename communicator_type::spike_type;
 
     using event_queue_type = typename communicator_type::event_queue;
     util::double_buffer< std::vector<event_queue_type> > event_queues_;
 
+    using spike_type = typename communicator_type::spike_type;
     using local_spike_store_type = thread_private_spike_store<time_type>;
     util::double_buffer< local_spike_store_type > local_spikes_;
 
diff --git a/src/profiling/profiler.cpp b/src/profiling/profiler.cpp
index 1e30449ab4b225385570292bea527fbcc6981c3d..ca1394181836f63909073fb51fa265da57c5de57 100644
--- a/src/profiling/profiler.cpp
+++ b/src/profiling/profiler.cpp
@@ -1,9 +1,9 @@
 #include <numeric>
 
-#include "profiler.hpp"
-#include "util/debug.hpp"
-
+#include <common_types.hpp>
 #include <communication/global_policy.hpp>
+#include <profiling/profiler.hpp>
+#include <util/debug.hpp>
 
 namespace nest {
 namespace mc {
@@ -241,6 +241,17 @@ void profiler::stop() {
     deactivate();
 }
 
+void profiler::restart() {
+    if (!is_activated()) {
+        start();
+        return;
+    }
+    deactivate();
+    root_region_.clear();
+    start();
+}
+
+
 profiler_node profiler::performance_tree() {
     if (is_activated()) {
         stop();
@@ -281,14 +292,23 @@ void profiler_leave(int nlevels) {
 }
 
 // iterate over all profilers and ensure that they have the same start stop times
-void stop_profilers() {
+void profilers_stop() {
     for (auto& p : data::profilers_) {
         p.stop();
     }
 }
 
-void profiler_output(double threshold) {
-    stop_profilers();
+void profilers_restart() {
+    auto i = 0;
+    for (auto& p : data::profilers_) {
+        p.restart();
+        ++i;
+    }
+    std::cout << "just stopped " << i << " profilers" << std::endl;
+}
+
+void profiler_output(double threshold, cell_size_type num_local_cells, int num_steps) {
+    profilers_stop();
 
     // Find the earliest start time and latest stop time over all profilers
     // This can be used to calculate the wall time for this communicator.
@@ -323,6 +343,11 @@ void profiler_output(double threshold) {
     auto ncomms = communication::global_policy::size();
     auto comm_rank = communication::global_policy::id();
     bool print = comm_rank==0 ? true : false;
+
+    // calculate the throughput in terms of cell steps per second
+    auto local_throughput = num_local_cells*num_steps / wall_time;
+    auto global_throughput = communication::global_policy::sum(local_throughput);
+
     if(print) {
         std::cout << " ---------------------------------------------------- \n";
         std::cout << "|                      profiler                      |\n";
@@ -335,7 +360,6 @@ void profiler_output(double threshold) {
         std::snprintf(
             line, sizeof(line), "%-18s%10d\n",
             "communicators", int(ncomms));
-        std::cout << line;
         std::snprintf(
             line, sizeof(line), "%-18s%10d\n",
             "threads", int(nthreads));
@@ -345,6 +369,15 @@ void profiler_output(double threshold) {
             "thread efficiency", float(efficiency));
         std::cout << line << "\n";
         p.print(std::cout, threshold);
+        std::cout << "\n";
+        std::snprintf(
+            line, sizeof(line), "%-18s%10s%10s\n",
+            "", "local", "global");
+        std::cout << line;
+        std::snprintf(
+            line, sizeof(line), "%-18s%10d%10d\n",
+            "throughput", int(local_throughput), int(global_throughput));
+        std::cout << line;
 
         std::cout << "\n\n";
     }
@@ -354,6 +387,7 @@ void profiler_output(double threshold) {
     as_json["threads"] = nthreads;
     as_json["efficiency"] = efficiency;
     as_json["communicators"] = ncomms;
+    as_json["throughput"] = unsigned(local_throughput);
     as_json["rank"] = comm_rank;
     as_json["regions"] = p.as_json();
 
@@ -368,8 +402,9 @@ void profiler_stop() {}
 void profiler_enter(const char*) {}
 void profiler_leave() {}
 void profiler_leave(int) {}
-void stop_profilers() {}
-void profiler_output(double threshold) {}
+void profilers_stop() {}
+void profiler_output(double threshold, cell_size_type num_local_cells, int num_steps) {}
+void profilers_restart() {};
 #endif
 
 } // namespace util
diff --git a/src/profiling/profiler.hpp b/src/profiling/profiler.hpp
index 4332925a5b3ee46c8c07164a80c4acf5abd72989..0830f854a6fca9de3a75096dbec5c877a1c8f66c 100644
--- a/src/profiling/profiler.hpp
+++ b/src/profiling/profiler.hpp
@@ -121,6 +121,11 @@ public:
 
     bool has_subregions() const { return subregions_.size() > 0; }
 
+    void clear() {
+        subregions_.clear();
+        start_time();
+    }
+
     size_t hash() const { return hash_; }
 
     region_type* subregion(const char* n);
@@ -170,6 +175,10 @@ public:
     /// stop (deactivate) the profiler
     void stop();
 
+    /// restart the profiler
+    /// remove all trace information and restart timer for the root region
+    void restart();
+
     /// the time stamp at which the profiler was started (avtivated)
     timer_type::time_point start_time() const { return start_time_; }
 
@@ -231,10 +240,14 @@ void profiler_leave();
 void profiler_leave(int nlevels);
 
 /// iterate and stop them
-void stop_profilers();
+void profilers_stop();
+
+/// reset profilers
+void profilers_restart();
 
 /// print the collated profiler to std::cout
-void profiler_output(double threshold);
+//void profiler_output(double threshold);
+void profiler_output(double threshold, cell_size_type num_local_cells, int num_steps);
 
 } // namespace util
 } // namespace mc