diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp
index f2a454852c33d169d2013f09795251a5f56aff17..d033f464f343d52bada8c2984d761311852faa71 100644
--- a/arbor/fvm_lowered_cell_impl.hpp
+++ b/arbor/fvm_lowered_cell_impl.hpp
@@ -114,6 +114,14 @@ private:
     static unsigned dt_steps(value_type t0, value_type t1, value_type dt) {
         return t0>=t1? 0: 1+(unsigned)((t1-t0)/dt);
     }
+
+    // Sets the GPU used for CUDA calls from the thread that calls it.
+    // The GPU will be the one in the execution context context_.
+    // If not called, the thread may attempt to launch on a different GPU,
+    // leading to crashes.
+    void set_gpu() {
+        if (context_.gpu->has_gpu()) context_.gpu->set_gpu();
+    }
 };
 
 template <typename Backend>
@@ -152,6 +160,8 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate(
 {
     using util::as_const;
 
+    set_gpu();
+
     // Integration setup
     PE(advance_integrate_setup);
     threshold_watcher_.clear_crossings();
@@ -301,6 +311,8 @@ void fvm_lowered_cell_impl<B>::initialize(
     using util::value_by_key;
     using util::keys;
 
+    set_gpu();
+
     std::vector<mc_cell> cells;
     const std::size_t ncell = gids.size();
 
diff --git a/arbor/gpu_context.cpp b/arbor/gpu_context.cpp
index 34b078eab9fa066e70250e8ade4266a32cf42b2e..a71b5a902773a3c29a89a80a0ffc4cbf8831d33d 100644
--- a/arbor/gpu_context.cpp
+++ b/arbor/gpu_context.cpp
@@ -38,7 +38,12 @@ bool gpu_context::has_gpu() const {
 
 #ifndef ARB_HAVE_GPU
 
+void gpu_context::set_gpu() const {
+    throw arbor_exception("Arbor must be compiled with CUDA support to set a GPU.");
+}
+
 void gpu_context::synchronize_for_managed_access() const {}
+
 gpu_context::gpu_context(int) {
     throw arbor_exception("Arbor must be compiled with CUDA support to select a GPU.");
 }
@@ -52,13 +57,12 @@ gpu_context::gpu_context(int gpu_id) {
         throw arbor_exception("Invalid GPU id " + std::to_string(gpu_id));
     }
 
-    // Set the device
-    status = cudaSetDevice(gpu_id);
-    if (status!=cudaSuccess) {
-        throw arbor_exception("Unable to select GPU id " + std::to_string(gpu_id));
-    }
-
+    // Set the device.
+    // The device will also have to be set for every host thread that uses the
+    // GPU, however performing this call here is a good check that the GPU can
+    // be set and initialized.
     id_ = gpu_id;
+    set_gpu();
 
     // Record the device attributes
     attributes_ = 0;
@@ -76,6 +80,19 @@ void gpu_context::synchronize_for_managed_access() const {
     }
 }
 
+void gpu_context::set_gpu() const {
+    if (!has_gpu()) {
+        throw arbor_exception(
+            "Call to gpu_context::set_gpu() when there is no GPU selected.");
+    }
+    auto status = cudaSetDevice(id_);
+    if (status != cudaSuccess) {
+        throw arbor_exception(
+            "Unable to select GPU id " + std::to_string(id_)
+            + ": " + cudaGetErrorName(status));
+    }
+}
+
 #endif
 
 } // namespace arb
diff --git a/arbor/gpu_context.hpp b/arbor/gpu_context.hpp
index 2b1e4f496a2d47c3a848e2bd08f1c58a5f02ff0e..44de47493c42614ce82f3c4d9ef4e6dec8585a8d 100644
--- a/arbor/gpu_context.hpp
+++ b/arbor/gpu_context.hpp
@@ -17,6 +17,9 @@ public:
     bool has_atomic_double() const;
     void synchronize_for_managed_access() const;
     bool has_gpu() const;
+    // Calls cudaSetDevice(id), so that GPU calls from the calling thread will
+    // be executed on the GPU.
+    void set_gpu() const;
 };
 
 using gpu_context_handle = std::shared_ptr<gpu_context>;
diff --git a/arbor/memory/cuda_wrappers.cpp b/arbor/memory/cuda_wrappers.cpp
index 7e694d7e347bb2847987444ee9d7b2ee6b680af7..d31c82b40cdb3c44b9827a3f79596c443535018f 100644
--- a/arbor/memory/cuda_wrappers.cpp
+++ b/arbor/memory/cuda_wrappers.cpp
@@ -1,6 +1,8 @@
 #include <cstdlib>
 #include <string>
 
+#include <arbor/arbexcept.hpp>
+
 #include "util.hpp"
 
 #ifdef ARB_HAVE_GPU
@@ -8,8 +10,8 @@
 #include <cuda.h>
 #include <cuda_runtime.h>
 
-#define LOG_CUDA_ERROR(error, msg)\
-LOG_ERROR("memory:: "+std::string(__func__)+" "+std::string((msg))+": "+cudaGetErrorString(error))
+#define HANDLE_CUDA_ERROR(error, msg)\
+throw arbor_exception("CUDA memory:: "+std::string(__func__)+" "+std::string((msg))+": "+cudaGetErrorString(error));
 
 namespace arb {
 namespace memory {
@@ -18,29 +20,25 @@ using std::to_string;
 
 void cuda_memcpy_d2d(void* dest, const void* src, std::size_t n) {
     if (auto error = cudaMemcpy(dest, src, n, cudaMemcpyDeviceToDevice)) {
-        LOG_CUDA_ERROR(error, "n="+to_string(n));
-        abort();
+        HANDLE_CUDA_ERROR(error, "n="+to_string(n));
     }
 }
 
 void cuda_memcpy_d2h(void* dest, const void* src, std::size_t n) {
     if (auto error = cudaMemcpy(dest, src, n, cudaMemcpyDeviceToHost)) {
-        LOG_CUDA_ERROR(error, "n="+to_string(n));
-        abort();
+        HANDLE_CUDA_ERROR(error, "n="+to_string(n));
     }
 }
 
 void cuda_memcpy_h2d(void* dest, const void* src, std::size_t n) {
     if (auto error = cudaMemcpy(dest, src, n, cudaMemcpyHostToDevice)) {
-        LOG_CUDA_ERROR(error, "n="+to_string(n));
-        abort();
+        HANDLE_CUDA_ERROR(error, "n="+to_string(n));
     }
 }
 
 void* cuda_host_register(void* ptr, std::size_t size) {
     if (auto error = cudaHostRegister(ptr, size, cudaHostRegisterPortable)) {
-        LOG_CUDA_ERROR(error, "unable to register host memory");
-        return nullptr;
+        HANDLE_CUDA_ERROR(error, "unable to register host memory");
     }
     return ptr;
 }
@@ -53,8 +51,7 @@ void* cuda_malloc(std::size_t n) {
     void* ptr;
 
     if (auto error = cudaMalloc(&ptr, n)) {
-        LOG_CUDA_ERROR(error, "unable to allocate "+to_string(n)+" bytes");
-        ptr = nullptr;
+        HANDLE_CUDA_ERROR(error, "unable to allocate "+to_string(n)+" bytes");
     }
     return ptr;
 }
@@ -63,15 +60,14 @@ void* cuda_malloc_managed(std::size_t n) {
     void* ptr;
 
     if (auto error = cudaMallocManaged(&ptr, n)) {
-        LOG_CUDA_ERROR(error, "unable to allocate "+to_string(n)+" bytes");
-        ptr = nullptr;
+        HANDLE_CUDA_ERROR(error, "unable to allocate "+to_string(n)+" bytes of managed memory");
     }
     return ptr;
 }
 
 void cuda_free(void* ptr) {
     if (auto error = cudaFree(ptr)) {
-        LOG_CUDA_ERROR(error, "");
+        HANDLE_CUDA_ERROR(error, "");
     }
 }