diff --git a/src/backends/fvm_gpu.hpp b/src/backends/fvm_gpu.hpp
index 0718db34a4466d4288a7ccf75efd10bc3335ae11..6572a7c6b113ac22e9638178240a942631e7dcd5 100644
--- a/src/backends/fvm_gpu.hpp
+++ b/src/backends/fvm_gpu.hpp
@@ -8,6 +8,8 @@
 #include <memory/memory.hpp>
 #include <util/span.hpp>
 
+#include "stimulus_gpu.hpp"
+
 namespace nest {
 namespace mc {
 namespace gpu {
@@ -154,6 +156,8 @@ struct backend {
 
     using mechanism = mechanisms::mechanism_ptr<backend>;
 
+    using stimulus = mechanisms::gpu::stimulus<backend>;
+
     static mechanism make_mechanism(
         const std::string& name,
         view vec_v, view vec_i,
@@ -236,7 +240,6 @@ void assemble_matrix(matrix_update_param_pack<T, I> params, T dt) {
     }
 }
 
-
 } // namespace multicore
 } // namespace mc
 } // namespace nest
diff --git a/src/backends/fvm_multicore.hpp b/src/backends/fvm_multicore.hpp
index 8166525ee7d01c89bf77218caf21513a2d2dc422..a82229e7e2078e0d921d04c2d8f0e5091ba5eef4 100644
--- a/src/backends/fvm_multicore.hpp
+++ b/src/backends/fvm_multicore.hpp
@@ -7,6 +7,8 @@
 #include <memory/memory.hpp>
 #include <util/span.hpp>
 
+#include "stimulus_multicore.hpp"
+
 namespace nest {
 namespace mc {
 namespace multicore {
@@ -117,6 +119,8 @@ struct backend {
 
     using mechanism = mechanisms::mechanism_ptr<backend>;
 
+    using stimulus = mechanisms::multicore::stimulus<backend>;
+
     static mechanism make_mechanism(
         const std::string& name,
         view vec_v, view vec_i,
@@ -153,3 +157,4 @@ private:
 } // namespace multicore
 } // namespace mc
 } // namespace nest
+
diff --git a/src/backends/stimulus_gpu.hpp b/src/backends/stimulus_gpu.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..3f3cfa70e4b9a45bdfb7693353c7b241d5339451
--- /dev/null
+++ b/src/backends/stimulus_gpu.hpp
@@ -0,0 +1,149 @@
+#pragma once
+
+#include <cmath>
+#include <limits>
+
+#include <mechanism.hpp>
+#include <algorithms.hpp>
+#include <util/pprintf.hpp>
+
+namespace nest{
+namespace mc{
+namespace mechanisms {
+namespace gpu {
+
+namespace kernels {
+    __device__
+    inline double atomicAdd(double* address, double val) {
+        using I = unsigned long long int;
+        I* address_as_ull = (I*)address;
+        I old = *address_as_ull, assumed;
+        do {
+            assumed = old;
+            old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed)));
+        } while (assumed != old);
+        return __longlong_as_double(old);
+    }
+
+    template <typename T, typename I>
+    __global__
+    void stim_current(
+        const T* delay, const T* duration, const T* amplitude,
+        const I* node_index, int n, T t, T* current)
+    {
+        using value_type = T;
+        using iarray = I;
+
+        auto i = threadIdx.x + blockDim.x*blockIdx.x;
+
+        if (i<n) {
+            if (t>=delay[i] && t<(delay[i]+duration[i])) {
+                // use subtraction because the electrode currents are specified
+                // in terms of current into the compartment
+                atomicAdd(current+node_index[i], -amplitude[i]);
+            }
+        }
+    }
+} // namespace kernels
+
+template<class Backend>
+class stimulus : public mechanism<Backend> {
+public:
+    using base = mechanism<Backend>;
+    using value_type  = typename base::value_type;
+    using size_type   = typename base::size_type;
+
+    using array = typename base::array;
+    using iarray  = typename base::iarray;
+    using view   = typename base::view;
+    using iview  = typename base::iview;
+    using const_iview = typename base::const_iview;
+    using indexed_view_type= typename base::indexed_view_type;
+    using ion_type = typename base::ion_type;
+
+    stimulus(view vec_v, view vec_i, iarray&& node_index):
+        base(vec_v, vec_i, std::move(node_index))
+    {}
+
+    using base::size;
+
+    std::size_t memory() const override {
+        return 0;
+    }
+
+    void set_params(value_type t_, value_type dt_) override {
+        t = t_;
+        dt = dt_;
+    }
+
+    std::string name() const override {
+        return "stimulus";
+    }
+
+    mechanismKind kind() const override {
+        return mechanismKind::point;
+    }
+
+    bool uses_ion(ionKind k) const override {
+        return false;
+    }
+
+    void set_ion(ionKind k, ion_type& i, std::vector<size_type>const& index) override {
+        throw std::domain_error(
+            nest::mc::util::pprintf("mechanism % does not support ion type\n", name()));
+    }
+
+    void nrn_init() override {}
+    void nrn_state() override {}
+
+    void net_receive(int i_, value_type weight) override {
+        throw std::domain_error("stimulus mechanism should never receive an event\n");
+    }
+
+    void set_parameters(
+        const std::vector<value_type>& amp,
+        const std::vector<value_type>& dur,
+        const std::vector<value_type>& del)
+    {
+        amplitude = memory::on_gpu(amp);
+        duration = memory::on_gpu(dur);
+        delay = memory::on_gpu(del);
+    }
+
+    void nrn_current() override {
+        if (amplitude.size() != size()) {
+            throw std::domain_error("stimulus called with mismatched parameter size\n");
+        }
+
+        // don't launch a kernel if there are no stimuli
+        if (!size()) return;
+
+        auto n = size();
+        auto thread_dim = 192;
+        dim3 dim_block(thread_dim);
+        dim3 dim_grid((n+thread_dim-1)/thread_dim );
+
+        kernels::stim_current<value_type, size_type><<<dim_grid, dim_block>>>(
+            delay.data(), duration.data(), amplitude.data(),
+            node_index_.data(), n, t,
+            vec_i_.data()
+        );
+
+    }
+
+    value_type dt = 0;
+    value_type t = 0;
+
+    array amplitude;
+    array duration;
+    array delay;
+
+    using base::vec_v_;
+    using base::vec_i_;
+    using base::node_index_;
+};
+
+} // namespace gpu
+} // namespace mechanisms
+} // namespace mc
+} // namespace nest
diff --git a/src/backends/stimulus_multicore.hpp b/src/backends/stimulus_multicore.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..59deb6e108ec46c7ff7d390f508af6e4290003ae
--- /dev/null
+++ b/src/backends/stimulus_multicore.hpp
@@ -0,0 +1,110 @@
+#pragma once
+
+#include <cmath>
+#include <limits>
+
+#include <mechanism.hpp>
+#include <algorithms.hpp>
+#include <util/pprintf.hpp>
+
+namespace nest{
+namespace mc{
+namespace mechanisms{
+namespace multicore{
+
+template<class Backend>
+class stimulus : public mechanisms::mechanism<Backend> {
+public:
+    using base = mechanisms::mechanism<Backend>;
+    using value_type  = typename base::value_type;
+    using size_type   = typename base::size_type;
+
+    using array = typename base::array;
+    using iarray  = typename base::iarray;
+    using view   = typename base::view;
+    using iview  = typename base::iview;
+    using const_iview = typename base::const_iview;
+    using indexed_view_type= typename base::indexed_view_type;
+    using ion_type = typename base::ion_type;
+
+    stimulus(view vec_v, view vec_i, iarray&& node_index):
+        base(vec_v, vec_i, std::move(node_index))
+    {}
+
+    using base::size;
+
+    std::size_t memory() const override {
+        return 0;
+    }
+
+    void set_params(value_type t_, value_type dt_) override {
+        t = t_;
+        dt = dt_;
+    }
+
+    std::string name() const override {
+        return "stimulus";
+    }
+
+    mechanisms::mechanismKind kind() const override {
+        return mechanisms::mechanismKind::point;
+    }
+
+    bool uses_ion(mechanisms::ionKind k) const override {
+        return false;
+    }
+
+    void set_ion(mechanisms::ionKind k, ion_type& i, std::vector<size_type>const& index) override {
+        throw std::domain_error(
+                nest::mc::util::pprintf("mechanism % does not support ion type\n", name()));
+    }
+
+    void nrn_init() override {}
+    void nrn_state() override {}
+
+    void net_receive(int i_, value_type weight) override {
+        throw std::domain_error("stimulus mechanism should never receive an event\n");
+    }
+
+    void set_parameters(
+        const std::vector<value_type>& amp,
+        const std::vector<value_type>& dur,
+        const std::vector<value_type>& del)
+    {
+        amplitude = amp;
+        duration = dur;
+        delay = del;
+    }
+
+    void nrn_current() override {
+        if (amplitude.size() != size()) {
+            throw std::domain_error("stimulus called with mismatched parameter size\n");
+        }
+        indexed_view_type vec_i(vec_i_, node_index_);
+        int n = size();
+        for(int i=0; i<n; ++i) {
+            if (t>=delay[i] && t<(delay[i]+duration[i])) {
+                // use subtraction because the electrod currents are specified
+                // in terms of current into the compartment
+                vec_i[i] -= amplitude[i];
+            }
+        }
+    }
+
+    value_type dt = 0;
+    value_type t = 0;
+
+    std::vector<value_type> amplitude;
+    std::vector<value_type> duration;
+    std::vector<value_type> delay;
+
+    using base::vec_v_;
+    using base::vec_i_;
+    using base::node_index_;
+};
+
+} // namespace multicore
+} // namespace mechanisms
+} // namespace mc
+} // namespace nest
+
diff --git a/src/fvm_multicell.hpp b/src/fvm_multicell.hpp
index 3719648a49ad3de6db1cf2c8fa98815bc999e1ec..4dc1966fd6e92733b7392698e758281fe61cc397 100644
--- a/src/fvm_multicell.hpp
+++ b/src/fvm_multicell.hpp
@@ -66,8 +66,6 @@ public:
     using target_handle = std::pair<size_type, size_type>;
     using probe_handle = std::pair<const array fvm_multicell::*, size_type>;
 
-    using stimulus_store_type = std::vector<std::pair<size_type, i_clamp>>;
-
     fvm_multicell() = default;
 
     void resting_potential(value_type potential_mV) {
@@ -106,6 +104,9 @@ public:
     /// mechanism type
     using mechanism = typename backend::mechanism;
 
+    /// stimulus type
+    using stimulus = typename backend::stimulus;
+
     /// ion species storage
     using ion = typename backend::ion;
 
@@ -169,13 +170,14 @@ public:
         return (v>-1000.) && (v<1000.);
     }
 
-    /// return reference to the stimuli
-    stimulus_store_type& stimuli() {
-        return stimuli_;
-    }
-
-    stimulus_store_type const& stimuli() const {
-        return stimuli_;
+    /// Return reference to the mechanism that matches name.
+    /// The reference is const, because it this information should not be
+    /// modified by the caller, however it is needed for unit testing.
+    util::optional<const mechanism&> find_mechanism(const std::string& name) const {
+        auto it = std::find_if(
+            std::begin(mechanisms_), std::end(mechanisms_),
+            [&name](const mechanism& m) {return m->name()==name;});
+        return it==mechanisms_.end() ? util::nothing: util::just(*it);
     }
 
     value_type time() const { return t_; }
@@ -219,8 +221,6 @@ private:
     /// the ion species
     std::map<mechanisms::ionKind, ion> ions_;
 
-    stimulus_store_type stimuli_;
-
     std::vector<std::pair<const array fvm_multicell::*, size_type>> probes_;
 
     /// Compact representation of the control volumes into which a segment is
@@ -518,10 +518,35 @@ void fvm_multicell<Backend>::initialize(
             map_entry.push_back(syn_cv);
         }
 
+        //
         // add the stimuli
+        //
+
+        // step 1: pack the index and parameter information into flat vectors
+        std::vector<size_type> stim_index;
+        std::vector<value_type> stim_durations;
+        std::vector<value_type> stim_delays;
+        std::vector<value_type> stim_amplitudes;
         for (const auto& stim: c.stimuli()) {
             auto idx = comp_ival.first+find_cv_index(stim.location, graph);
-            stimuli_.push_back({idx, stim.clamp});
+            stim_index.push_back(idx);
+            stim_durations.push_back(stim.clamp.duration());
+            stim_delays.push_back(stim.clamp.delay());
+            stim_amplitudes.push_back(stim.clamp.amplitude());
+        }
+
+        // step 2: create the stimulus mechanism and initialize the stimulus
+        //         parameters
+        // NOTE: the indexes and associated metadata (durations, delays,
+        //       amplitudes) have not been permuted to ascending cv index order,
+        //       as is the case with other point processes.
+        //       This is because the hard-coded stimulus mechanism makes no
+        //       optimizations that rely on this assumption.
+        if (stim_index.size()) {
+            auto stim = new stimulus(
+                voltage_, current_, memory::make_const_view(stim_index));
+            stim->set_parameters(stim_amplitudes, stim_durations, stim_delays);
+            mechanisms_.push_back(mechanism(stim));
         }
 
         // detector handles are just their corresponding compartment indices
@@ -739,17 +764,6 @@ void fvm_multicell<Backend>::advance(double dt) {
         m->nrn_current();
         PL();
     }
-
-    // add current contributions from stimuli
-    for (auto& stim : stimuli_) {
-        auto ie = stim.second.amplitude(t_); // [nA]
-        auto loc = stim.first;
-
-        // note: current_ and ie have the same units [nA]
-        if (ie!=0.) {
-            current_[loc] = current_[loc] - ie;
-        }
-    }
     PL();
 
     // solve the linear system
diff --git a/tests/unit/test_fvm_multi.cpp b/tests/unit/test_fvm_multi.cpp
index 308081063214ad2d4f58859ebb96dbf9fffcc242..a4d7e90243c51a05fa11b532ee1a6f11a3004484 100644
--- a/tests/unit/test_fvm_multi.cpp
+++ b/tests/unit/test_fvm_multi.cpp
@@ -181,8 +181,11 @@ TEST(fvm_multi, stimulus)
     // delay     |   5  |    1
     // duration  |  80  |    2
     // amplitude | 0.3  |  0.1
-    // compmnt   |   4  |    0
-
+    // CV        |   4  |    0
+    //
+    // The implementation of the stimulus is tested by creating a lowered cell, then
+    // testing that the correct currents are injected at the correct control volumes
+    // as during the stimulus windows.
 
     std::vector<fvm_cell::target_handle> targets;
     std::vector<fvm_cell::detector_handle> detectors;
@@ -191,18 +194,44 @@ TEST(fvm_multi, stimulus)
     fvm_cell fvcell;
     fvcell.initialize(singleton_view(cell), detectors, targets, probes);
 
-    auto& stim = fvcell.stimuli();
-    EXPECT_EQ(stim.size(), 2u);
+    auto ref = fvcell.find_mechanism("stimulus");
+    ASSERT_TRUE(ref) << "no stimuli retrieved from lowered fvm cell: expected 2";
+
+    auto& stims = ref.get();
+    EXPECT_EQ(stims->size(), 2u);
 
-    EXPECT_EQ(stim[0].first, 4u);
-    EXPECT_EQ(stim[1].first, 0u);
+    auto I = fvcell.current();
+
+    auto soma_idx = 0u;
+    auto dend_idx = 4u;
+
+    // test 1: Test that no current is injected at t=0
+    memory::fill(I, 0.);
+    stims->set_params(0, 0.1);
+    stims->nrn_current();
+    for (auto i: I) {
+        EXPECT_EQ(i, 0.);
+    }
 
-    EXPECT_EQ(stim[0].second.delay(), 5.);
-    EXPECT_EQ(stim[1].second.delay(), 1.);
-    EXPECT_EQ(stim[0].second.duration(), 80.);
-    EXPECT_EQ(stim[1].second.duration(),  2.);
-    EXPECT_EQ(stim[0].second.amplitude(), 0.3);
-    EXPECT_EQ(stim[1].second.amplitude(), 0.1);
+    // test 2: Test that current is injected at soma at t=1
+    stims->set_params(1, 0.1);
+    stims->nrn_current();
+    EXPECT_EQ(I[soma_idx], -0.1);
+
+    // test 3: Test that current is still injected at soma at t=1.5.
+    //         Note that we test for injection of -0.2, because the
+    //         current contributions are accumulative, and the current
+    //         values have not been cleared since the last update.
+    stims->set_params(1.5, 0.1);
+    stims->nrn_current();
+    EXPECT_EQ(I[soma_idx], -0.2);
+
+    // test 4: test at t=10ms, when the the soma stim is not active, and
+    //         dendrite stimulus is injecting a current of 0.3 nA
+    stims->set_params(10, 0.1);
+    stims->nrn_current();
+    EXPECT_EQ(I[soma_idx], -0.2);
+    EXPECT_EQ(I[dend_idx], -0.3);
 }
 
 // test that mechanism indexes are computed correctly