From 2c135d753e90ca4abbe027ef06063cbdcc673ef0 Mon Sep 17 00:00:00 2001 From: noraabiakar <nora.abiakar@gmail.com> Date: Wed, 22 Aug 2018 18:53:44 +0200 Subject: [PATCH] Create gpu_context and manage it as part of execution_context (#566) * Add gpu_context as part of execution context containing information about GPU availability, managed_memory synchronization, and atomic double availability. * Choose between ON and OFF for ARB_GPU in CMake. If ON compile for K20, K80, and P100 Note that we still need compile time information about the GPU in cuda_atomic.hpp for atomicAdd(double*, double*). This is because the function is only defined when the program is compiled for sm_60 or more. --- CMakeLists.txt | 28 ++------- arbor/CMakeLists.txt | 2 +- arbor/backends/gpu/fvm.hpp | 6 +- arbor/backends/gpu/managed_ptr.cpp | 12 ---- arbor/backends/gpu/managed_ptr.hpp | 35 +----------- arbor/backends/gpu/stack.hpp | 15 +++-- arbor/backends/gpu/threshold_watcher.hpp | 7 ++- arbor/backends/multicore/fvm.hpp | 7 ++- .../backends/multicore/threshold_watcher.hpp | 6 +- arbor/cell_group_factory.cpp | 6 +- arbor/cell_group_factory.hpp | 7 ++- arbor/fvm_lowered_cell.hpp | 3 +- arbor/fvm_lowered_cell_impl.cpp | 6 +- arbor/fvm_lowered_cell_impl.hpp | 7 ++- arbor/gpu_context.cpp | 11 ++++ arbor/gpu_context.hpp | 57 +++++++++++++++++++ arbor/partition_load_balance.cpp | 4 +- arbor/simulation.cpp | 2 +- include/arbor/execution_context.hpp | 9 ++- test/unit/CMakeLists.txt | 1 + test/unit/test_backend.cpp | 5 +- test/unit/test_fvm_lowered.cpp | 20 +++++-- test/unit/test_gpu_stack.cu | 16 ++++-- test/unit/test_mc_cell_group.cpp | 4 +- test/unit/test_mc_cell_group_gpu.cpp | 3 +- test/unit/test_probe.cpp | 4 +- test/unit/test_spikes.cpp | 3 +- 27 files changed, 176 insertions(+), 110 deletions(-) delete mode 100644 arbor/backends/gpu/managed_ptr.cpp create mode 100644 arbor/gpu_context.cpp create mode 100644 arbor/gpu_context.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b4fc22b8..f21e36d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,11 +8,6 @@ enable_language(CXX) # Configure-time build options for Arbor: #---------------------------------------------------------- -# Enable CUDA support with ARB_GPU_MODEL. - -set(ARB_GPU_MODEL "none" CACHE STRING "The target GPU architecture: one of {none,K20,K80,P100}") -set_property(CACHE ARB_GPU_MODEL PROPERTY STRINGS none K20 K80 P100 ) - # Specify target archiecture. set(ARB_ARCH "" CACHE STRING "Target architecture for arbor libraries") @@ -37,6 +32,7 @@ set(ARB_VALIDATION_DATA_DIR "${PROJECT_SOURCE_DIR}/validation/data" CACHE PATH #---------------------------------------------------------- # Configure-time features for Arbor: #---------------------------------------------------------- +option(ARB_WITH_GPU "build with GPU support" OFF) option(ARB_WITH_MPI "build with MPI support" OFF) @@ -72,7 +68,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) # Add CUDA as a language if GPU support requested. # (This has to be set early so as to enable CUDA tests in generator # expressions.) -if(NOT ARB_GPU_MODEL MATCHES "none") +if(ARB_WITH_GPU) enable_language(CUDA) endif() @@ -169,7 +165,7 @@ endif() # CUDA support #-------------- -if(NOT ARB_GPU_MODEL MATCHES "none") +if(ARB_WITH_GPU) set(ARB_WITH_CUDA TRUE) add_compile_options( @@ -177,22 +173,10 @@ if(NOT ARB_GPU_MODEL MATCHES "none") "$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe=--diag_suppress=unsigned_compare_with_zero>") target_compile_definitions(arbor-private-deps INTERFACE ARB_HAVE_GPU) - if(ARB_GPU_MODEL MATCHES "K20") - set(cuda_arch sm_35) - set(arb_cuda_arch 350) - elseif(ARB_GPU_MODEL MATCHES "K80") - set(cuda_arch sm_37) - set(arb_cuda_arch 370) - elseif(ARB_GPU_MODEL MATCHES "P100") - set(cuda_arch sm_60) - set(arb_cuda_arch 600) - else() - message(FATAL_ERROR "-- GPU architecture '${ARB_GPU_MODEL}' not supported. Use one of {none, K20, K80, P100}") - endif() + target_compile_options(arbor-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_35,code=sm_35>) + target_compile_options(arbor-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_37,code=sm_37>) + target_compile_options(arbor-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_60,code=sm_60>) - target_compile_options(arbor-private-deps INTERFACE - $<$<COMPILE_LANGUAGE:CUDA>:-arch=${cuda_arch}>) - target_compile_definitions(arbor-private-deps INTERFACE ARB_CUDA_ARCH=${arb_cuda_arch}) endif() # Use libunwind if available for pretty printing stack traces diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt index b07fddd6..37917fb7 100644 --- a/arbor/CMakeLists.txt +++ b/arbor/CMakeLists.txt @@ -10,6 +10,7 @@ set(arbor_sources builtin_mechanisms.cpp cell_group_factory.cpp common_types_io.cpp + gpu_context.cpp local_alloc.cpp event_binner.cpp fvm_layout.cpp @@ -54,7 +55,6 @@ if(ARB_WITH_CUDA) backends/gpu/stimulus.cpp backends/gpu/stimulus.cu backends/gpu/threshold_watcher.cu - backends/gpu/managed_ptr.cpp backends/gpu/matrix_assemble.cu backends/gpu/matrix_interleave.cu backends/gpu/matrix_solve.cu diff --git a/arbor/backends/gpu/fvm.hpp b/arbor/backends/gpu/fvm.hpp index 262d9e3c..9d02eea3 100644 --- a/arbor/backends/gpu/fvm.hpp +++ b/arbor/backends/gpu/fvm.hpp @@ -50,7 +50,8 @@ struct backend { static threshold_watcher voltage_watcher( const shared_state& state, const std::vector<index_type>& cv, - const std::vector<value_type>& thresholds) + const std::vector<value_type>& thresholds, + const execution_context& context) { return threshold_watcher( state.cv_to_cell.data(), @@ -58,7 +59,8 @@ struct backend { state.time_to.data(), state.voltage.data(), cv, - thresholds); + thresholds, + context); } }; diff --git a/arbor/backends/gpu/managed_ptr.cpp b/arbor/backends/gpu/managed_ptr.cpp deleted file mode 100644 index 90cd37a9..00000000 --- a/arbor/backends/gpu/managed_ptr.cpp +++ /dev/null @@ -1,12 +0,0 @@ -namespace arb { -namespace gpu { - -// TODO: make this a runtime check - -bool device_concurrent_managed_access() { - return (ARB_CUDA_ARCH >= 600); // all GPUs from P100 -} - -} // namespace gpu -} // namespace arb - diff --git a/arbor/backends/gpu/managed_ptr.hpp b/arbor/backends/gpu/managed_ptr.hpp index 9d977662..98202d32 100644 --- a/arbor/backends/gpu/managed_ptr.hpp +++ b/arbor/backends/gpu/managed_ptr.hpp @@ -8,18 +8,6 @@ namespace arb { namespace gpu { -// Pre-pascal NVIDIA GPUs don't support page faulting for GPU reads of managed -// memory, so when a kernel is launched, all managed memory is copied to the -// GPU. The upshot of this is that no CPU-side reads can be made of _any_ -// managed memory can be made whe _any_ kernel is running. -// -// The following helper function can be used to determine whether -// synchronization is required before CPU-side reads of managed memory: if the -// device concurrentManagedAccess property is zero, then safe host-side requires -// a synchronization. - -bool device_concurrent_managed_access(); - // used to indicate that the type pointed to by the managed_ptr is to be // constructed in the managed_ptr constructor struct construct_in_place_tag {}; @@ -41,11 +29,7 @@ public: using element_type = T; using pointer = element_type*; using reference = element_type&; - - managed_ptr(): - concurrent_managed_access(device_concurrent_managed_access()) - {} - + managed_ptr(const managed_ptr& other) = delete; // Allocate memory and construct in place using args. @@ -53,18 +37,14 @@ public: // point of the wrapper is to hide the complexity of allocating managed // memory and constructing a type in place. template <typename... Args> - managed_ptr(construct_in_place_tag, Args&&... args): - concurrent_managed_access(device_concurrent_managed_access()) - { + managed_ptr(construct_in_place_tag, Args&&... args) { memory::managed_allocator<element_type> allocator; data_ = allocator.allocate(1u); synchronize(); data_ = new (data_) element_type(std::forward<Args>(args)...); } - managed_ptr(managed_ptr&& other): - concurrent_managed_access(other.concurrent_managed_access) - { + managed_ptr(managed_ptr&& other) { std::swap(other.data_, data_); } @@ -113,16 +93,7 @@ public: cudaDeviceSynchronize(); } - // Synchronize if concurrent host-side access is not supported. - void host_access() const { - if (!concurrent_managed_access) { - cudaDeviceSynchronize(); - } - } - private: - const bool concurrent_managed_access; - __host__ __device__ bool is_allocated() const { return data_!=nullptr; diff --git a/arbor/backends/gpu/stack.hpp b/arbor/backends/gpu/stack.hpp index 53293463..1d177199 100644 --- a/arbor/backends/gpu/stack.hpp +++ b/arbor/backends/gpu/stack.hpp @@ -5,6 +5,7 @@ #include <arbor/assert.hpp> #include "backends/gpu/managed_ptr.hpp" +#include "gpu_context.hpp" #include "memory/allocator.hpp" #include "stack_storage.hpp" @@ -31,7 +32,11 @@ class stack { using allocator = memory::managed_allocator<U>; using storage_type = stack_storage<value_type>; + + using gpu_context_handle = std::shared_ptr<arb::gpu_context>; + managed_ptr<storage_type> storage_; + gpu_context_handle gpu_context_; managed_ptr<storage_type> create_storage(unsigned n) { auto p = make_managed_ptr<storage_type>(); @@ -45,9 +50,10 @@ public: stack& operator=(const stack& other) = delete; stack(const stack& other) = delete; - stack(): storage_(create_storage(0)) {} + stack(const gpu_context_handle& gpu_ctx): + storage_(create_storage(0)), gpu_context_(gpu_ctx) {} - stack(stack&& other): storage_(create_storage(0)) { + stack(stack&& other): storage_(create_storage(0)), gpu_context_(other.gpu_context_) { std::swap(storage_, other.storage_); } @@ -56,7 +62,8 @@ public: return *this; } - explicit stack(unsigned capacity): storage_(create_storage(capacity)) {} + explicit stack(unsigned capacity, const gpu_context_handle& gpu_ctx): + storage_(create_storage(capacity)), gpu_context_(gpu_ctx) {} ~stack() { storage_.synchronize(); @@ -68,7 +75,7 @@ public: // Perform any required synchronization if concurrent host-side access is not supported. // (Correctness still requires that GPU operations on this stack are complete.) void host_access() const { - storage_.host_access(); + gpu_context_->synchronize_for_managed_access(); } void clear() { diff --git a/arbor/backends/gpu/threshold_watcher.hpp b/arbor/backends/gpu/threshold_watcher.hpp index 7cc62f8a..68942b71 100644 --- a/arbor/backends/gpu/threshold_watcher.hpp +++ b/arbor/backends/gpu/threshold_watcher.hpp @@ -40,13 +40,16 @@ public: threshold_watcher(threshold_watcher&& other) = default; threshold_watcher& operator=(threshold_watcher&& other) = default; + threshold_watcher(const execution_context& ctx): stack_(ctx.gpu) {} + threshold_watcher( const fvm_index_type* cv_to_cell, const fvm_value_type* t_before, const fvm_value_type* t_after, const fvm_value_type* values, const std::vector<fvm_index_type>& cv_index, - const std::vector<fvm_value_type>& thresholds + const std::vector<fvm_value_type>& thresholds, + const execution_context& ctx ): cv_to_cell_(cv_to_cell), t_before_(t_before), @@ -58,7 +61,7 @@ public: v_prev_(memory::const_host_view<fvm_value_type>(values, cv_index.size())), // TODO: allocates enough space for 10 spikes per watch. // A more robust approach might be needed to avoid overflows. - stack_(10*size()) + stack_(10*size(), ctx.gpu) { crossings_.reserve(stack_.capacity()); reset(); diff --git a/arbor/backends/multicore/fvm.hpp b/arbor/backends/multicore/fvm.hpp index c4318763..99dcd2b6 100644 --- a/arbor/backends/multicore/fvm.hpp +++ b/arbor/backends/multicore/fvm.hpp @@ -2,6 +2,7 @@ #include <string> #include <vector> +#include <arbor/execution_context.hpp> #include "backends/event.hpp" #include "backends/multicore/matrix_state.hpp" @@ -46,7 +47,8 @@ struct backend { static threshold_watcher voltage_watcher( const shared_state& state, const std::vector<index_type>& cv, - const std::vector<value_type>& thresholds) + const std::vector<value_type>& thresholds, + const execution_context& context) { return threshold_watcher( state.cv_to_cell.data(), @@ -54,7 +56,8 @@ struct backend { state.time_to.data(), state.voltage.data(), cv, - thresholds); + thresholds, + context); } }; diff --git a/arbor/backends/multicore/threshold_watcher.hpp b/arbor/backends/multicore/threshold_watcher.hpp index dcb38168..13e55257 100644 --- a/arbor/backends/multicore/threshold_watcher.hpp +++ b/arbor/backends/multicore/threshold_watcher.hpp @@ -1,6 +1,7 @@ #pragma once #include <arbor/assert.hpp> +#include <arbor/execution_context.hpp> #include <arbor/fvm_types.hpp> #include <arbor/math.hpp> @@ -14,13 +15,16 @@ class threshold_watcher { public: threshold_watcher() = default; + threshold_watcher(const execution_context& ctx) {} + threshold_watcher( const fvm_index_type* cv_to_cell, const fvm_value_type* t_before, const fvm_value_type* t_after, const fvm_value_type* values, const std::vector<fvm_index_type>& cv_index, - const std::vector<fvm_value_type>& thresholds + const std::vector<fvm_value_type>& thresholds, + const execution_context& context ): cv_to_cell_(cv_to_cell), t_before_(t_before), diff --git a/arbor/cell_group_factory.cpp b/arbor/cell_group_factory.cpp index d82bada0..0fc0a08b 100644 --- a/arbor/cell_group_factory.cpp +++ b/arbor/cell_group_factory.cpp @@ -18,13 +18,13 @@ cell_group_ptr make_cell_group(Args&&... args) { return cell_group_ptr(new Impl(std::forward<Args>(args)...)); } -cell_group_factory cell_kind_implementation(cell_kind ck, backend_kind bk) { +cell_group_factory cell_kind_implementation(cell_kind ck, backend_kind bk, const execution_context& ctx) { using gid_vector = std::vector<cell_gid_type>; switch (ck) { case cell_kind::cable1d_neuron: - return [bk](const gid_vector& gids, const recipe& rec) { - return make_cell_group<mc_cell_group>(gids, rec, make_fvm_lowered_cell(bk)); + return [bk, ctx](const gid_vector& gids, const recipe& rec) { + return make_cell_group<mc_cell_group>(gids, rec, make_fvm_lowered_cell(bk, ctx)); }; case cell_kind::spike_source: diff --git a/arbor/cell_group_factory.hpp b/arbor/cell_group_factory.hpp index 47c476e5..0cd72de5 100644 --- a/arbor/cell_group_factory.hpp +++ b/arbor/cell_group_factory.hpp @@ -11,6 +11,7 @@ #include <arbor/common_types.hpp> #include <arbor/recipe.hpp> +#include <arbor/execution_context.hpp> #include "cell_group.hpp" @@ -18,10 +19,10 @@ namespace arb { using cell_group_factory = std::function<cell_group_ptr (const std::vector<cell_gid_type>&, const recipe&)>; -cell_group_factory cell_kind_implementation(cell_kind, backend_kind); +cell_group_factory cell_kind_implementation(cell_kind, backend_kind, const execution_context&); -inline bool cell_kind_supported(cell_kind c, backend_kind b) { - return static_cast<bool>(cell_kind_implementation(c, b)); +inline bool cell_kind_supported(cell_kind c, backend_kind b, const execution_context& ctx) { + return static_cast<bool>(cell_kind_implementation(c, b, ctx)); } } // namespace arb diff --git a/arbor/fvm_lowered_cell.hpp b/arbor/fvm_lowered_cell.hpp index 81a26887..bb4184fc 100644 --- a/arbor/fvm_lowered_cell.hpp +++ b/arbor/fvm_lowered_cell.hpp @@ -4,6 +4,7 @@ #include <vector> #include <arbor/common_types.hpp> +#include <arbor/execution_context.hpp> #include <arbor/fvm_types.hpp> #include <arbor/recipe.hpp> @@ -44,6 +45,6 @@ struct fvm_lowered_cell { using fvm_lowered_cell_ptr = std::unique_ptr<fvm_lowered_cell>; -fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p); +fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p, const execution_context& ctx); } // namespace arb diff --git a/arbor/fvm_lowered_cell_impl.cpp b/arbor/fvm_lowered_cell_impl.cpp index 1136f64e..01b0ee0f 100644 --- a/arbor/fvm_lowered_cell_impl.cpp +++ b/arbor/fvm_lowered_cell_impl.cpp @@ -12,13 +12,13 @@ namespace arb { -fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p) { +fvm_lowered_cell_ptr make_fvm_lowered_cell(backend_kind p, const execution_context& ctx) { switch (p) { case backend_kind::multicore: - return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<multicore::backend>); + return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<multicore::backend>(ctx)); case backend_kind::gpu: #ifdef ARB_HAVE_GPU - return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<gpu::backend>); + return fvm_lowered_cell_ptr(new fvm_lowered_cell_impl<gpu::backend>(ctx)); #endif ; // fall through default: diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp index 82ecbba6..fcc9670d 100644 --- a/arbor/fvm_lowered_cell_impl.hpp +++ b/arbor/fvm_lowered_cell_impl.hpp @@ -43,6 +43,8 @@ public: using index_type = fvm_index_type; using size_type = fvm_size_type; + fvm_lowered_cell_impl(execution_context ctx): context_(ctx), threshold_watcher_(ctx) {}; + void reset() override; void initialize( @@ -71,6 +73,8 @@ private: using sample_event_stream = typename backend::sample_event_stream; using threshold_watcher = typename backend::threshold_watcher; + execution_context context_; + std::unique_ptr<shared_state> state_; // Cell state shared across mechanisms. // TODO: Can we move the backend-dependent data structures below into state_? @@ -446,8 +450,7 @@ void fvm_lowered_cell_impl<B>::initialize( } } - threshold_watcher_ = threshold_watcher(state_->cv_to_cell.data(), state_->time.data(), - state_->time_to.data(), state_->voltage.data(), detector_cv, detector_threshold); + threshold_watcher_ = backend::voltage_watcher(*state_, detector_cv, detector_threshold, context_); reset(); } diff --git a/arbor/gpu_context.cpp b/arbor/gpu_context.cpp new file mode 100644 index 00000000..9e2042b3 --- /dev/null +++ b/arbor/gpu_context.cpp @@ -0,0 +1,11 @@ +#include <memory> + +#include "gpu_context.hpp" + +namespace arb { + +std::shared_ptr<gpu_context> make_gpu_context() { + return std::make_shared<gpu_context>(); +} + +} diff --git a/arbor/gpu_context.hpp b/arbor/gpu_context.hpp new file mode 100644 index 00000000..fea7fc25 --- /dev/null +++ b/arbor/gpu_context.hpp @@ -0,0 +1,57 @@ +#include <memory> + +#ifdef ARB_HAVE_GPU +#include <cuda.h> +#include <cuda_runtime.h> +#endif + +namespace arb { + +#ifndef ARB_HAVE_GPU +struct gpu_context { + bool has_gpu_; + size_t attributes_; + + gpu_context(): has_gpu_(false), attributes_(0) {} +}; + +#else + +enum gpu_flags { + has_concurrent_managed_access = 1, + has_atomic_double = 2 +}; + +struct gpu_context { + bool has_gpu_; + size_t attributes_; + + gpu_context() : has_gpu_(true) { + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, 0); + attributes_ = 0; + if (prop.concurrentManagedAccess) { + attributes_ |= gpu_flags::has_concurrent_managed_access; + } + if (prop.major*100 + prop.minor >= 600) { + attributes_ |= gpu_flags::has_atomic_double; + } + }; + + bool has_concurrent_managed_access() { + return attributes_ & gpu_flags::has_concurrent_managed_access; + } + + bool has_atomic_double() { + return attributes_ & gpu_flags::has_atomic_double; + } + + void synchronize_for_managed_access() { + if(!has_concurrent_managed_access()) { + cudaDeviceSynchronize(); + } + } +}; + +#endif +} diff --git a/arbor/partition_load_balance.cpp b/arbor/partition_load_balance.cpp index 60dd3946..0e4ffca3 100644 --- a/arbor/partition_load_balance.cpp +++ b/arbor/partition_load_balance.cpp @@ -65,8 +65,8 @@ domain_decomposition partition_load_balance( // of cell group updates according to rules such as the back end on // which the cell group is running. - auto has_gpu_backend = [](cell_kind c) { - return cell_kind_supported(c, backend_kind::gpu); + auto has_gpu_backend = [ctx](cell_kind c) { + return cell_kind_supported(c, backend_kind::gpu, ctx); }; std::vector<cell_kind> kinds; diff --git a/arbor/simulation.cpp b/arbor/simulation.cpp index be20b6da..87d88c28 100644 --- a/arbor/simulation.cpp +++ b/arbor/simulation.cpp @@ -160,7 +160,7 @@ simulation_state::simulation_state( foreach_group_index( [&](cell_group_ptr& group, int i) { const auto& group_info = decomp.groups[i]; - auto factory = cell_kind_implementation(group_info.kind, group_info.backend); + auto factory = cell_kind_implementation(group_info.kind, group_info.backend, ctx); group = factory(group_info.gids, rec); }); diff --git a/include/arbor/execution_context.hpp b/include/arbor/execution_context.hpp index fdc9ea9f..3a457a2a 100644 --- a/include/arbor/execution_context.hpp +++ b/include/arbor/execution_context.hpp @@ -11,20 +11,25 @@ namespace arb { namespace threading { class task_system; } +struct gpu_context; + using task_system_handle = std::shared_ptr<threading::task_system>; using distributed_context_handle = std::shared_ptr<distributed_context>; +using gpu_context_handle = std::shared_ptr<gpu_context>; task_system_handle make_thread_pool(); task_system_handle make_thread_pool(int nthreads); +gpu_context_handle make_gpu_context(); struct execution_context { distributed_context_handle distributed; task_system_handle thread_pool; + gpu_context_handle gpu; execution_context(): distributed(std::make_shared<distributed_context>()), - thread_pool(arb::make_thread_pool()) {}; - + thread_pool(arb::make_thread_pool()), + gpu(arb::make_gpu_context()) {}; }; } diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 2945330a..d12ff7bd 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -140,5 +140,6 @@ add_executable(unit ${unit_sources} ${test_mech_sources}) add_dependencies(unit build_test_mods) target_compile_options(unit PRIVATE ${ARB_CXXOPT_ARCH}) target_compile_definitions(unit PRIVATE "-DDATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}/swc\"") +target_compile_definitions(unit PRIVATE ARB_HAVE_GPU) target_include_directories(unit PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers arbor-aux) diff --git a/test/unit/test_backend.cpp b/test/unit/test_backend.cpp index 74db35c2..48f5836c 100644 --- a/test/unit/test_backend.cpp +++ b/test/unit/test_backend.cpp @@ -8,9 +8,10 @@ using namespace arb; TEST(backends, gpu_test) { + execution_context context; #ifdef ARB_GPU_ENABLED - EXPECT_NO_THROW(make_fvm_lowered_cell(backend_kind::gpu)); + EXPECT_NO_THROW(make_fvm_lowered_cell(backend_kind::gpu, context)); #else - EXPECT_ANY_THROW(make_fvm_lowered_cell(backend_kind::gpu)); + EXPECT_ANY_THROW(make_fvm_lowered_cell(backend_kind::gpu, context)); #endif } diff --git a/test/unit/test_fvm_lowered.cpp b/test/unit/test_fvm_lowered.cpp index d6631dde..9001ef0c 100644 --- a/test/unit/test_fvm_lowered.cpp +++ b/test/unit/test_fvm_lowered.cpp @@ -80,6 +80,8 @@ using namespace arb; TEST(fvm_lowered, matrix_init) { + execution_context context; + auto isnan = [](auto v) { return std::isnan(v); }; auto ispos = [](auto v) { return v>0; }; auto isneg = [](auto v) { return v<0; }; @@ -92,7 +94,7 @@ TEST(fvm_lowered, matrix_init) std::vector<target_handle> targets; probe_association_map<probe_handle> probe_map; - fvm_cell fvcell; + fvm_cell fvcell(context); fvcell.initialize({0}, cable1d_recipe(cell), targets, probe_map); auto& J = fvcell.*private_matrix_ptr; @@ -116,6 +118,8 @@ TEST(fvm_lowered, matrix_init) TEST(fvm_lowered, target_handles) { using namespace arb; + execution_context context; + mc_cell cells[] = { make_cell_ball_and_stick(), make_cell_ball_and_3stick() @@ -135,7 +139,7 @@ TEST(fvm_lowered, target_handles) { std::vector<target_handle> targets; probe_association_map<probe_handle> probe_map; - fvm_cell fvcell; + fvm_cell fvcell(context); fvcell.initialize({0, 1}, cable1d_recipe(cells), targets, probe_map); mechanism* expsyn = find_mechanism(fvcell, "expsyn"); @@ -175,6 +179,8 @@ TEST(fvm_lowered, stimulus) { // amplitude | 0.3 | 0.1 // CV | 4 | 0 + execution_context context; + std::vector<mc_cell> cells; cells.push_back(make_cell_ball_and_stick(false)); @@ -197,7 +203,7 @@ TEST(fvm_lowered, stimulus) { std::vector<target_handle> targets; probe_association_map<probe_handle> probe_map; - fvm_cell fvcell; + fvm_cell fvcell(context); fvcell.initialize({0}, cable1d_recipe(cells), targets, probe_map); mechanism* stim = find_mechanism(fvcell, "_builtin_stimulus"); @@ -250,6 +256,8 @@ TEST(fvm_lowered, derived_mechs) { // // 3. Cell with both test_kin1 and custom_kin1. + execution_context context; + std::vector<mc_cell> cells(3); for (int i = 0; i<3; ++i) { mc_cell& c = cells[i]; @@ -290,7 +298,7 @@ TEST(fvm_lowered, derived_mechs) { std::vector<target_handle> targets; probe_association_map<probe_handle> probe_map; - fvm_cell fvcell; + fvm_cell fvcell(context); fvcell.initialize({0, 1, 2}, rec, targets, probe_map); // Both mechanisms will have the same internal name, "test_kin1". @@ -373,6 +381,8 @@ TEST(fvm_lowered, weighted_write_ion) { // the same as a 100µm dendrite, which makes it easier to describe the // expected weights. + execution_context context; + mc_cell c; c.add_soma(5); @@ -394,7 +404,7 @@ TEST(fvm_lowered, weighted_write_ion) { std::vector<target_handle> targets; probe_association_map<probe_handle> probe_map; - fvm_cell fvcell; + fvm_cell fvcell(context); fvcell.initialize({0}, cable1d_recipe(c), targets, probe_map); auto& state = *(fvcell.*private_state_ptr).get(); diff --git a/test/unit/test_gpu_stack.cu b/test/unit/test_gpu_stack.cu index 18d9c180..9e0235dd 100644 --- a/test/unit/test_gpu_stack.cu +++ b/test/unit/test_gpu_stack.cu @@ -3,13 +3,15 @@ #include <backends/gpu/stack.hpp> #include <backends/gpu/stack_cu.hpp> #include <backends/gpu/managed_ptr.hpp> +#include <arbor/execution_context.hpp> using namespace arb; TEST(stack, construction) { using T = int; - gpu::stack<T> s(10); + execution_context context; + gpu::stack<T> s(10, context.gpu); EXPECT_EQ(0u, s.size()); EXPECT_EQ(10u, s.capacity()); @@ -51,9 +53,11 @@ TEST(stack, push_back) { using T = int; using stack = gpu::stack<T>; + execution_context context; + const unsigned n = 10; EXPECT_TRUE(n%2 == 0); // require n is even for tests to work - auto s = stack(n); + auto s = stack(n, context.gpu); auto& sstorage = s.storage(); kernels::push_back<<<1, n>>>(sstorage, kernels::all_ftor()); @@ -84,8 +88,10 @@ TEST(stack, overflow) { using T = int; using stack = gpu::stack<T>; + execution_context context; + const unsigned n = 10; - auto s = stack(n); + auto s = stack(n, context.gpu); auto& sstorage = s.storage(); EXPECT_FALSE(s.overflow()); @@ -101,7 +107,9 @@ TEST(stack, empty) { using T = int; using stack = gpu::stack<T>; - stack s(0u); + execution_context context; + + stack s(0u, context.gpu); EXPECT_EQ(s.size(), 0u); EXPECT_EQ(s.capacity(), 0u); diff --git a/test/unit/test_mc_cell_group.cpp b/test/unit/test_mc_cell_group.cpp index a9873cbc..66326c22 100644 --- a/test/unit/test_mc_cell_group.cpp +++ b/test/unit/test_mc_cell_group.cpp @@ -14,8 +14,10 @@ using namespace arb; namespace { + execution_context context; + fvm_lowered_cell_ptr lowered_cell() { - return make_fvm_lowered_cell(backend_kind::multicore); + return make_fvm_lowered_cell(backend_kind::multicore, context); } mc_cell make_cell() { diff --git a/test/unit/test_mc_cell_group_gpu.cpp b/test/unit/test_mc_cell_group_gpu.cpp index b1431454..ad15d437 100644 --- a/test/unit/test_mc_cell_group_gpu.cpp +++ b/test/unit/test_mc_cell_group_gpu.cpp @@ -13,7 +13,8 @@ using namespace arb; namespace { fvm_lowered_cell_ptr lowered_cell() { - return make_fvm_lowered_cell(backend_kind::gpu); + execution_context context; + return make_fvm_lowered_cell(backend_kind::gpu, context); } mc_cell make_cell() { diff --git a/test/unit/test_probe.cpp b/test/unit/test_probe.cpp index b22df63b..42326f35 100644 --- a/test/unit/test_probe.cpp +++ b/test/unit/test_probe.cpp @@ -19,6 +19,8 @@ using shared_state = multicore::backend::shared_state; ACCESS_BIND(std::unique_ptr<shared_state> fvm_cell::*, fvm_state_ptr, &fvm_cell::state_); TEST(probe, fvm_lowered_cell) { + execution_context context; + mc_cell bs = make_cell_ball_and_stick(false); i_clamp stim(0, 100, 0.3); @@ -37,7 +39,7 @@ TEST(probe, fvm_lowered_cell) { std::vector<target_handle> targets; probe_association_map<probe_handle> probe_map; - fvm_cell lcell; + fvm_cell lcell(context); lcell.initialize({0}, rec, targets, probe_map); EXPECT_EQ(3u, rec.num_probes(0)); diff --git a/test/unit/test_spikes.cpp b/test/unit/test_spikes.cpp index 656f0500..7be54ece 100644 --- a/test/unit/test_spikes.cpp +++ b/test/unit/test_spikes.cpp @@ -29,6 +29,7 @@ TEST(SPIKES_TEST_CLASS, threshold_watcher) { // the test creates a watch on 3 values in the array values (which has 10 // elements in total). + execution_context context; const auto n = 10; const std::vector<index_type> index{0, 5, 7}; @@ -53,7 +54,7 @@ TEST(SPIKES_TEST_CLASS, threshold_watcher) { list expected; // create the watch - backend::threshold_watcher watch(cell_index.data(), time_before.data(), time_after.data(), values.data(), index, thresh); + backend::threshold_watcher watch(cell_index.data(), time_before.data(), time_after.data(), values.data(), index, thresh, context); // initially the first and third watch should not be spiking // the second is spiking -- GitLab