diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3853cf474977e6e7d2b3190c1dceaddb9f6e1298..c5e6e846b2606d636456acb8a83ea7255e0f5f31 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -395,9 +395,6 @@ set(ARB_MODCC_FLAGS)
 if(ARB_VECTORIZE)
     list(APPEND ARB_MODCC_FLAGS "--simd")
 endif()
-if(ARB_WITH_PROFILING)
-    list(APPEND ARB_MODCC_FLAGS "--profile")
-endif()
 
 #----------------------------------------------------------
 # Set up install paths, permissions.
diff --git a/arbor/benchmark_cell_group.cpp b/arbor/benchmark_cell_group.cpp
index d5692531aef3ef7bfb8591155b1c19e787cde6d7..0069b40512ffc68e9c2f53baf6de7ff977d88067 100644
--- a/arbor/benchmark_cell_group.cpp
+++ b/arbor/benchmark_cell_group.cpp
@@ -61,7 +61,7 @@ void benchmark_cell_group::advance(epoch ep,
     using std::chrono::high_resolution_clock;
     using duration_type = std::chrono::duration<double, std::micro>;
 
-    PE(advance_bench_cell);
+    PE(advance:bench:cell);
     // Micro-seconds to advance in this epoch.
     auto us = 1e3*(ep.duration());
     for (auto i: util::make_span(0, gids_.size())) {
diff --git a/arbor/communication/communicator.cpp b/arbor/communication/communicator.cpp
index 8fa93e9b1b2504bb044b6c9c61c22bfeb6bc975e..9fa4a0d76424d9389ffaec3bb40b1db9d02df86a 100644
--- a/arbor/communication/communicator.cpp
+++ b/arbor/communication/communicator.cpp
@@ -139,12 +139,12 @@ time_type communicator::min_delay() {
 }
 
 gathered_vector<spike> communicator::exchange(std::vector<spike> local_spikes) {
-    PE(communication_exchange_sort);
+    PE(communication:exchange:sort);
     // sort the spikes in ascending order of source gid
     util::sort_by(local_spikes, [](spike s){return s.source;});
     PL();
 
-    PE(communication_exchange_gather);
+    PE(communication:exchange:gather);
     // global all-to-all to gather a local copy of the global spike list on each node.
     auto global_spikes = distributed_->gather_spikes(local_spikes);
     num_spikes_ += global_spikes.size();
diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index 5af43287e0609af9afa7b9ac024d2d99e1a8d0dc..e6f5143ba0b2968570bb5912098e234bbe046f49 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -198,7 +198,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
     set_gpu();
 
     // Integration setup
-    PE(advance_integrate_setup);
+    PE(advance:integrate:setup);
     threshold_watcher_.clear_crossings();
 
     auto n_samples = staged_samples.size();
@@ -227,11 +227,11 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
 
         // Deliver events and accumulate mechanism current contributions.
 
-        PE(advance_integrate_events);
+        PE(advance:integrate:events);
         state_->deliverable_events.mark_until_after(state_->time);
         PL();
 
-        PE(advance_integrate_current_zero);
+        PE(advance:integrate:current:zero);
         state_->zero_currents();
         PL();
         for (auto& m: mechanisms_) {
@@ -245,7 +245,7 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
             m->update_current();
         }
 
-        PE(advance_integrate_events);
+        PE(advance:integrate:events);
         state_->deliverable_events.drop_marked_events();
 
         // Update event list and integration step times.
@@ -260,13 +260,13 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         // want to use mean current contributions as opposed to point
         // sample.)
 
-        PE(advance_integrate_stimuli)
+        PE(advance:integrate:stimuli)
         state_->add_stimulus_current();
         PL();
 
         // Take samples at cell time if sample time in this step interval.
 
-        PE(advance_integrate_samples);
+        PE(advance:integrate:samples);
         sample_events_.mark_until(state_->time_to);
         state_->take_samples(sample_events_.marked_events(), sample_time_, sample_value_);
         sample_events_.drop_marked_events();
@@ -274,10 +274,10 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
 
         // Integrate voltage by matrix solve.
 
-        PE(advance_integrate_matrix_build);
+        PE(advance:integrate:matrix:build);
         matrix_.assemble(state_->dt_intdom, state_->voltage, state_->current_density, state_->conductivity);
         PL();
-        PE(advance_integrate_matrix_solve);
+        PE(advance:integrate:matrix:solve);
         matrix_.solve(state_->voltage);
         PL();
 
@@ -289,17 +289,17 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
 
         // Update ion concentrations.
 
-        PE(advance_integrate_ionupdate);
+        PE(advance:integrate:ionupdate);
         update_ion_state();
         PL();
 
         // Update time and test for spike threshold crossings.
 
-        PE(advance_integrate_threshold);
+        PE(advance:integrate:threshold);
         threshold_watcher_.test(&state_->time_since_spike);
         PL();
 
-        PE(advance_integrate_post)
+        PE(advance:integrate:post)
         if (post_events_) {
             for (auto& m: mechanisms_) {
                 m->post_event();
@@ -313,14 +313,14 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
         // Check for non-physical solutions:
 
         if (check_voltage_mV_>0) {
-            PE(advance_integrate_physicalcheck);
+            PE(advance:integrate:physicalcheck);
             assert_voltage_bounded(check_voltage_mV_);
             PL();
         }
 
         // Check for end of integration.
 
-        PE(advance_integrate_stepsupdate);
+        PE(advance:integrate:stepsupdate);
         if (!--remaining_steps) {
             tmin_ = state_->time_bounds().first;
             remaining_steps = dt_steps(tmin_, tfinal, dt_max);
diff --git a/arbor/include/arbor/mechanism.hpp b/arbor/include/arbor/mechanism.hpp
index 62c90f99a6af5d59b9102684c4694c0ea7bc07e6..d5cc221dccb01b534f6355acc387af249f6d5388 100644
--- a/arbor/include/arbor/mechanism.hpp
+++ b/arbor/include/arbor/mechanism.hpp
@@ -9,6 +9,8 @@
 #include <arbor/fvm_types.hpp>
 #include <arbor/mechanism_abi.h>
 #include <arbor/mechinfo.hpp>
+#include <arbor/profile/profiler.hpp>
+#include <arbor/version.hpp>
 
 namespace arb {
 
@@ -32,6 +34,8 @@ public:
     mechanism(const arb_mechanism_type m,
               const arb_mechanism_interface& i): mech_{m}, iface_{i}, ppack_{} {
         if (mech_.abi_version != ARB_MECH_ABI_VERSION) throw unsupported_abi_error{mech_.abi_version};
+        state_prof_id   = profile::profiler_region_id("advance:integrate:state:"+internal_name());
+        current_prof_id = profile::profiler_region_id("advance:integrate:current:"+internal_name());
     }
     mechanism() = default;
     mechanism(const mechanism&) = delete;
@@ -55,8 +59,8 @@ public:
 
     // Forward to interface methods
     void initialize()     { ppack_.vec_t = *time_ptr_ptr; iface_.init_mechanism(&ppack_); }
-    void update_current() { ppack_.vec_t = *time_ptr_ptr; iface_.compute_currents(&ppack_); }
-    void update_state()   { ppack_.vec_t = *time_ptr_ptr; iface_.advance_state(&ppack_); }
+    void update_current() { prof_enter(current_prof_id); ppack_.vec_t = *time_ptr_ptr; iface_.compute_currents(&ppack_); prof_exit(); }
+    void update_state()   { prof_enter(state_prof_id);   ppack_.vec_t = *time_ptr_ptr; iface_.advance_state(&ppack_);    prof_exit(); }
     void update_ions()    { ppack_.vec_t = *time_ptr_ptr; iface_.write_ions(&ppack_); }
     void post_event()     { ppack_.vec_t = *time_ptr_ptr; iface_.post_event(&ppack_); }
     void deliver_events(arb_deliverable_event_stream& stream) { ppack_.vec_t  = *time_ptr_ptr; iface_.apply_events(&ppack_, &stream); }
@@ -68,6 +72,21 @@ public:
     arb_mechanism_interface iface_;
     arb_mechanism_ppack ppack_;
     arb_value_type** time_ptr_ptr = nullptr;
+
+private:
+#ifdef ARB_PROFILE_ENABLED
+    void prof_enter(profile::region_id_type id) {
+        profile::profiler_enter(id);
+    }
+    void prof_exit() {
+        profile::profiler_leave();
+    }
+#else
+    void prof_enter(profile::region_id_type) {}
+    void prof_exit() {}
+#endif
+    profile::region_id_type state_prof_id;
+    profile::region_id_type current_prof_id;
 };
 
 struct mechanism_layout {
diff --git a/arbor/include/arbor/profile/profiler.hpp b/arbor/include/arbor/profile/profiler.hpp
index 9dbd2c61e910311f5b4fc4eb242d303957077109..f812d13d82895d3303348ffa0107a5adcde536d9 100644
--- a/arbor/include/arbor/profile/profiler.hpp
+++ b/arbor/include/arbor/profile/profiler.hpp
@@ -39,7 +39,7 @@ void profiler_enter(std::size_t region_id);
 void profiler_leave();
 
 profile profiler_summary();
-std::size_t profiler_region_id(const char* name);
+std::size_t profiler_region_id(const std::string& name);
 
 std::ostream& operator<<(std::ostream&, const profile&);
 
diff --git a/arbor/lif_cell_group.cpp b/arbor/lif_cell_group.cpp
index 99182f3e4d6877666cd814f500c28e42cde64157..64ca1c364458adb06285d09d3a45398ea5e723c0 100644
--- a/arbor/lif_cell_group.cpp
+++ b/arbor/lif_cell_group.cpp
@@ -40,7 +40,7 @@ cell_kind lif_cell_group::get_cell_kind() const {
 }
 
 void lif_cell_group::advance(epoch ep, time_type dt, const event_lane_subrange& event_lanes) {
-    PE(advance_lif);
+    PE(advance:lif);
     if (event_lanes.size() > 0) {
         for (auto lid: util::make_span(gids_.size())) {
             // Advance each cell independently.
diff --git a/arbor/mc_cell_group.cpp b/arbor/mc_cell_group.cpp
index fa0f6a4d9fe790fa379f6eec03cce30e24ccbcee..8d140e6edc8db3eb2dcbe52852533aa6f894cdcc 100644
--- a/arbor/mc_cell_group.cpp
+++ b/arbor/mc_cell_group.cpp
@@ -394,7 +394,7 @@ void mc_cell_group::advance(epoch ep, time_type dt, const event_lane_subrange& e
 
     // Bin and collate deliverable events from event lanes.
 
-    PE(advance_eventsetup);
+    PE(advance:eventsetup);
     staged_events_.clear();
 
     // Skip event handling if nothing to deliver.
@@ -452,7 +452,7 @@ void mc_cell_group::advance(epoch ep, time_type dt, const event_lane_subrange& e
     // value as defined below, grouping together all the samples of the
     // same probe for this callback in this association.
 
-    PE(advance_samplesetup);
+    PE(advance:samplesetup);
     std::vector<sampler_call_info> call_info;
 
     std::vector<sample_event> sample_events;
@@ -533,7 +533,7 @@ void mc_cell_group::advance(epoch ep, time_type dt, const event_lane_subrange& e
     // vector of sample entries from the lowered cell sample times and values
     // and then call the callback.
 
-    PE(advance_sampledeliver);
+    PE(advance:sampledeliver);
     std::vector<sample_record> sample_records;
     sample_records.reserve(max_samples_per_call);
 
diff --git a/arbor/profile/profiler.cpp b/arbor/profile/profiler.cpp
index 9320a784cbc64a316c0544d4926a19149c1c8efc..03bfc75070706a37a8c6a4b0f4623e8d97240623 100644
--- a/arbor/profile/profiler.cpp
+++ b/arbor/profile/profiler.cpp
@@ -20,12 +20,12 @@ using util::make_span;
 namespace {
     // Check whether a string describes a valid profiler region name.
     bool is_valid_region_string(const std::string& s) {
-        if (s.size()==0u || s.front()=='_' || s.back()=='_') return false;
+        if (s.size()==0u || s.front()==':' || s.back()==':') return false;
         return s.find("__") == s.npos;
     }
 
     //
-    // Return a list of the words in the string, using '_' as the delimiter
+    // Return a list of the words in the string, using ':' as the delimiter
     // string, e.g.:
     //      "communicator"             -> {"communicator"}
     //      "communicator_events"      -> {"communicator", "events"}
@@ -33,11 +33,11 @@ namespace {
     std::vector<std::string> split(const std::string& str) {
         std::vector<std::string> cont;
         std::size_t first = 0;
-        std::size_t last = str.find('_');
+        std::size_t last = str.find(':');
         while (last != std::string::npos) {
             cont.push_back(str.substr(first, last - first));
             first = last + 1;
-            last = str.find('_', first);
+            last = str.find(':', first);
         }
         cont.push_back(str.substr(first, last - first));
         return cont;
@@ -91,7 +91,7 @@ class profiler {
     // The regions are assigned consecutive indexes in the order that they are
     // added to the profiler with calls to `region_index()`, with the first
     // region numbered zero.
-    std::unordered_map<const char*, region_id_type> name_index_;
+    std::unordered_map<std::string, region_id_type> name_index_;
 
     // The name of each region being recorded, with index stored in name_index_
     // is used to index into region_names_.
@@ -108,10 +108,10 @@ public:
 
     void initialize(task_system_handle& ts);
     void enter(region_id_type index);
-    void enter(const char* name);
+    void enter(const std::string& name);
     void leave();
     const std::vector<std::string>& regions() const;
-    region_id_type region_index(const char* name);
+    region_id_type region_index(const std::string& name);
     profile results() const;
 
     static profiler& get_global_profiler() {
@@ -186,7 +186,7 @@ void profiler::enter(region_id_type index) {
     recorders_[thread_ids_.at(std::this_thread::get_id())].enter(index);
 }
 
-void profiler::enter(const char* name) {
+void profiler::enter(const std::string& name) {
     if (!init_) return;
     const auto index = region_index(name);
     recorders_[thread_ids_.at(std::this_thread::get_id())].enter(index);
@@ -197,7 +197,7 @@ void profiler::leave() {
     recorders_[thread_ids_.at(std::this_thread::get_id())].leave();
 }
 
-region_id_type profiler::region_index(const char* name) {
+region_id_type profiler::region_index(const std::string& name) {
     // The name_index_ hash table is shared by all threads, so all access
     // has to be protected by a mutex.
     std::lock_guard<std::mutex> guard(mutex_);
@@ -249,6 +249,20 @@ profile profiler::results() const {
 
     p.num_threads = recorders_.size();
 
+    // Remove elements with count == 0
+    for(unsigned i=0; i<p.counts.size();) {
+        if (p.counts[i] != 0) {
+            ++i;
+            continue;
+        }
+        std::swap(p.counts[i], p.counts.back());
+        std::swap(p.times[i],  p.times.back());
+        std::swap(p.names[i],  p.names.back());
+        p.counts.pop_back();
+        p.times.pop_back();
+        p.names.pop_back();
+    }
+
     return p;
 }
 
@@ -332,7 +346,7 @@ void profiler_leave() {
     profiler::get_global_profiler().leave();
 }
 
-region_id_type profiler_region_id(const char* name) {
+region_id_type profiler_region_id(const std::string& name) {
     if (!is_valid_region_string(name)) {
         throw std::runtime_error(std::string("'")+name+"' is not a valid profiler region name.");
     }
@@ -370,7 +384,7 @@ void profiler_enter(region_id_type) {}
 profile profiler_summary();
 void profiler_print(const profile& prof, float threshold) {};
 profile profiler_summary() {return profile();}
-region_id_type profiler_region_id(const char*) {return 0;}
+region_id_type profiler_region_id(const std::string&) {return 0;}
 std::ostream& operator<<(std::ostream& o, const profile&) {return o;}
 
 #endif // ARB_HAVE_PROFILING
diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp
index b16e7a26e3ffd4cea3d1ec58e048ed322d28cf83..daa22c3ecca6a0294659b119400a3fc17a26865d 100644
--- a/arbor/simulation.cpp
+++ b/arbor/simulation.cpp
@@ -44,13 +44,13 @@ void merge_cell_events(
     std::vector<event_generator>& generators,
     pse_vector& new_events)
 {
-    PE(communication_enqueue_setup);
+    PE(communication:enqueue:setup);
     new_events.clear();
     old_events = split_sorted_range(old_events, t_from, event_time_less()).second;
     PL();
 
     if (!generators.empty()) {
-        PE(communication_enqueue_setup);
+        PE(communication:enqueue:setup);
         // Tree-merge events in [t_from, t_to) from old, pending and generator events.
 
         std::vector<event_span> spanbuf;
@@ -70,7 +70,7 @@ void merge_cell_events(
         }
         PL();
 
-        PE(communication_enqueue_tree);
+        PE(communication:enqueue:tree);
         tree_merge_events(spanbuf, new_events);
         PL();
 
@@ -79,7 +79,7 @@ void merge_cell_events(
     }
 
     // Merge (remaining) old and pending events.
-    PE(communication_enqueue_merge);
+    PE(communication:enqueue:merge);
     auto n = new_events.size();
     new_events.resize(n+pending.size()+old_events.size());
     std::merge(pending.begin(), pending.end(), old_events.begin(), old_events.end(), new_events.begin()+n);
@@ -350,7 +350,7 @@ time_type simulation_state::run(time_type tfinal, time_type dt) {
                 auto queues = util::subrange_view(event_lanes(current.id), communicator_.group_queue_range(i));
                 group->advance(current, dt, queues);
 
-                PE(advance_spikes);
+                PE(advance:spikes);
                 local_spikes(current.id).insert(group->spikes());
                 group->clear_spikes();
                 PL();
@@ -361,14 +361,14 @@ time_type simulation_state::run(time_type tfinal, time_type dt) {
     // post-synaptic spike events to per-cell pending event vectors.
     auto exchange = [this](epoch prev) {
         // Collate locally generated spikes.
-        PE(communication_exchange_gatherlocal);
+        PE(communication:exchange:gatherlocal);
         auto all_local_spikes = local_spikes(prev.id).gather();
         PL();
         // Gather generated spikes across all ranks.
         auto global_spikes = communicator_.exchange(all_local_spikes);
 
         // Present spikes to user-supplied callbacks.
-        PE(communication_spikeio);
+        PE(communication:spikeio);
         if (local_export_callback_) {
             local_export_callback_(all_local_spikes);
         }
@@ -378,7 +378,7 @@ time_type simulation_state::run(time_type tfinal, time_type dt) {
         PL();
 
         // Append events formed from global spikes to per-cell pending event queues.
-        PE(communication_walkspikes);
+        PE(communication:walkspikes);
         communicator_.make_event_queues(global_spikes, pending_events_);
         PL();
     };
@@ -388,7 +388,7 @@ time_type simulation_state::run(time_type tfinal, time_type dt) {
     auto enqueue = [this](epoch next) {
         foreach_cell(
             [&](cell_size_type i) {
-                PE(communication_enqueue_sort);
+                PE(communication:enqueue:sort);
                 util::sort(pending_events_[i]);
                 PL();
 
diff --git a/arbor/spike_source_cell_group.cpp b/arbor/spike_source_cell_group.cpp
index ee3be61b489c84759af8ec4301588f99537a8f91..82d1062ba537ec807ca0591eda67a2ff175d9566 100644
--- a/arbor/spike_source_cell_group.cpp
+++ b/arbor/spike_source_cell_group.cpp
@@ -46,7 +46,7 @@ cell_kind spike_source_cell_group::get_cell_kind() const {
 }
 
 void spike_source_cell_group::advance(epoch ep, time_type dt, const event_lane_subrange& event_lanes) {
-    PE(advance_sscell);
+    PE(advance:sscell);
 
     for (auto i: util::count_along(gids_)) {
         const auto gid = gids_[i];
diff --git a/doc/cpp/profiler.rst b/doc/cpp/profiler.rst
index df39c429115fdf77faf512598c6e923529609404..7b191972d5b45dd24fa386331b23e4b25e3ca6a2 100644
--- a/doc/cpp/profiler.rst
+++ b/doc/cpp/profiler.rst
@@ -79,7 +79,7 @@ For example, network simulations have two main regions of code to profile: those
 We would like to break these regions down further, e.g. break the `communication` time into time spent performing `spike exchange` and `event binning`.
 
 The subdivision of profiling regions is encoded in the region names.
-For example, ``PE(communication_exchange)`` indicates that we are profiling the ``exchange`` sub-region of the top level ``communication`` region.
+For example, ``PE(communication:exchange)`` indicates that we are profiling the ``exchange`` sub-region of the top level ``communication`` region.
 
 Below is an example of using sub-regions:
 
@@ -95,26 +95,26 @@ Below is an example of using sub-regions:
         int num_cells = 100;
 
         void communicate() {
-            PE(communication_sortspikes);
+            PE(communication:sortspikes);
             auto local_spikes = get_local_spikes();
             sort(local_spikes);
             PL();
 
-            PE(communication_exchange);
+            PE(communication:exchange);
             global_spikes = exchange_spikes(local_spikes);
             PL();
         }
 
         void update_cell(int i) {
-            PE(update_setup);
+            PE(update:setup);
             setup_events(i);
             PL();
 
-            PE(update_advance_state);
+            PE(update:advance:state);
             update_cell_states(i);
             PL();
 
-            PE(update_advance_current);
+            PE(update:advance:current);
             update_cell_current(i);
             PL();
         }
diff --git a/modcc/modcc.cpp b/modcc/modcc.cpp
index 4cb4cb3a87035739bf2ebb4d9ef43a66852cf407..6e9b51ba7fbe3478b2899d5c2eba5ef65a557340 100644
--- a/modcc/modcc.cpp
+++ b/modcc/modcc.cpp
@@ -106,12 +106,10 @@ std::ostream& operator<<(std::ostream& out, const Options& opt) {
 }
 
 std::ostream& operator<<(std::ostream& out, const printer_options& popt) {
-    static const char* noyes[2] = {"no", "yes"};
     static const std::string line_end = cyan(" |") + "\n";
 
     return out <<
         table_prefix{"namespace"} << popt.cpp_namespace << line_end <<
-        table_prefix{"profile"} << noyes[popt.profile] << line_end <<
         table_prefix{"simd"} << popt.simd << line_end;
 }
 
@@ -138,7 +136,6 @@ const char* usage_str =
         "-t|--target            [Build module for target; Avaliable targets: 'cpu', 'gpu']\n"
         "-s|--simd              [Generate code with explicit SIMD vectorization]\n"
         "-S|--simd-abi          [Override SIMD ABI in generated code. Use /n suffix to force SIMD width to be size n. Examples: 'avx2', 'native/4', ...]\n"
-        "-P|--profile           [Build with profiled kernels]\n"
         "-V|--verbose           [Toggle verbose mode]\n"
         "-A|--analyse           [Toggle analysis mode]\n"
         "-T|--trace-codegen     [Leave trace marks in generated source]\n"
@@ -172,7 +169,6 @@ int main(int argc, char **argv) {
                 { to::set(opt.verbose),  to::flag,                       "-V", "--verbose" },
                 { to::set(opt.analysis), to::flag,                       "-A", "--analyse" },
                 { opt.modulename,                                        "-m", "--module" },
-                { to::set(popt.profile), to::flag,                       "-P", "--profile" },
                 { popt.cpp_namespace,                                    "-N", "--namespace" },
                 { to::action(enable_simd), to::flag,                     "-s", "--simd" },
                 { popt.simd,                                             "-S", "--simd-abi" },
diff --git a/modcc/printer/cprinter.cpp b/modcc/printer/cprinter.cpp
index bd7f5bb9ebf4ae951b15df8188dc879ec8c8d426..872f3d4344ffe1affbc2e56ac1e2d3cbcd34e10f 100644
--- a/modcc/printer/cprinter.cpp
+++ b/modcc/printer/cprinter.cpp
@@ -144,28 +144,6 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     auto ion_deps = module_.ion_deps();
     std::string fingerprint = "<placeholder>";
 
-    auto profiler_enter = [name, opt](const char* region_prefix) -> std::string {
-        static std::regex invalid_profile_chars("[^a-zA-Z0-9]");
-
-        if (opt.profile) {
-            std::string region_name = region_prefix;
-            region_name += '_';
-            region_name += std::regex_replace(name, invalid_profile_chars, "");
-
-            return
-                "{\n"
-                "    static auto id = ::arb::profile::profiler_region_id(\""
-                + region_name + "\");\n"
-                "    ::arb::profile::profiler_enter(id);\n"
-                "}\n";
-        }
-        else return "";
-    };
-
-    auto profiler_leave = [opt]() -> std::string {
-        return opt.profile? "::arb::profile::profiler_leave();\n": "";
-    };
-
     io::pfxstringstream out;
 
     ENTER(out);
@@ -177,9 +155,6 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
         "#include <"  << arb_header_prefix() << "mechanism_abi.h>\n"
         "#include <" << arb_header_prefix() << "math.hpp>\n";
 
-    opt.profile &&
-        out << "#include <" << arb_header_prefix() << "profile/profiler.hpp>\n";
-
     if (with_simd) {
         out << "#include <" << arb_header_prefix() << "simd/simd.hpp>\n";
         out << "#undef NDEBUG\n";
@@ -343,15 +318,11 @@ std::string emit_cpp_source(const Module& module_, const printer_options& opt) {
     out << popindent << "}\n\n";
 
     out << "static void advance_state(arb_mechanism_ppack* pp) {\n" << indent;
-    out << profiler_enter("advance_integrate_state");
     emit_body(state_api);
-    out << profiler_leave();
     out << popindent << "}\n\n";
 
     out << "static void compute_currents(arb_mechanism_ppack* pp) {\n" << indent;
-    out << profiler_enter("advance_integrate_current");
     emit_body(current_api);
-    out << profiler_leave();
     out << popindent << "}\n\n";
 
     out << "static void write_ions(arb_mechanism_ppack* pp) {\n" << indent;
diff --git a/modcc/printer/printeropt.hpp b/modcc/printer/printeropt.hpp
index 72130dc3221ebf7d47e4a949aca50613611d25d7..a1a20f6c081832589f2049638598b979b2680c13 100644
--- a/modcc/printer/printeropt.hpp
+++ b/modcc/printer/printeropt.hpp
@@ -14,9 +14,5 @@ struct printer_options {
     // Explicit vectorization (C printer only)? Default is none.
     simd_spec simd;
 
-    // Instrument kernels? True => use ::arb::profile regions.
-    // Currently only supported for C printer.
-
-    bool profile = false;
     bool trace_codegen = false;
 };
diff --git a/test/unit/test_abi.cpp b/test/unit/test_abi.cpp
index 29e533a8578210e57be06798bc73549cb2783461..481c0e1803fefb2c1b0340e59de3f89d8a9866b8 100644
--- a/test/unit/test_abi.cpp
+++ b/test/unit/test_abi.cpp
@@ -25,6 +25,7 @@ TEST(abi, multicore_initialisation) {
 
     arb_mechanism_type type{};
     type.abi_version = ARB_MECH_ABI_VERSION;
+    type.name       = "dummy";
     type.globals    = globals.data(); type.n_globals    = globals.size();
     type.parameters = params.data();  type.n_parameters = params.size();
     type.state_vars = states.data();  type.n_state_vars = states.size();
@@ -102,6 +103,7 @@ TEST(abi, multicore_null) {
 
     arb_mechanism_type type{};
     type.abi_version = ARB_MECH_ABI_VERSION;
+    type.name       = "dummy";
     type.globals    = globals.data(); type.n_globals    = globals.size();
     type.parameters = params.data();  type.n_parameters = params.size();
     type.state_vars = states.data();  type.n_state_vars = states.size();
@@ -166,6 +168,7 @@ TEST(abi, gpu_initialisation) {
 
     arb_mechanism_type type{};
     type.abi_version = ARB_MECH_ABI_VERSION;
+    type.name       = "dummy";
     type.globals    = globals.data(); type.n_globals    = globals.size();
     type.parameters = params.data();  type.n_parameters = params.size();
     type.state_vars = states.data();  type.n_state_vars = states.size();
@@ -242,6 +245,7 @@ TEST(abi, gpu_null) {
 
     arb_mechanism_type type{};
     type.abi_version = ARB_MECH_ABI_VERSION;
+    type.name       = "dummy";
     type.globals    = globals.data(); type.n_globals    = globals.size();
     type.parameters = params.data();  type.n_parameters = params.size();
     type.state_vars = states.data();  type.n_state_vars = states.size();
diff --git a/test/unit/test_domain_decomposition.cpp b/test/unit/test_domain_decomposition.cpp
index 0db031e1c621b7c923ccb6977a218c7ca905ea9e..6f701102c13d7e03d835c016fdf12da2b533198f 100644
--- a/test/unit/test_domain_decomposition.cpp
+++ b/test/unit/test_domain_decomposition.cpp
@@ -6,6 +6,7 @@
 #include <arbor/domdecexcept.hpp>
 #include <arbor/domain_decomposition.hpp>
 #include <arbor/load_balance.hpp>
+#include <arbor/version.hpp>
 
 #include <arborenv/default_env.hpp>
 
@@ -532,7 +533,7 @@ TEST(domain_decomposition, partition_by_groups) {
 TEST(domain_decomposition, invalid) {
     proc_allocation resources;
     resources.num_threads = 1;
-    resources.gpu_id = -1; // disable GPU if available
+    resources.gpu_id = arbenv::default_gpu();
     auto ctx = make_context(resources);
 
     {