diff --git a/arbor/backends/gpu/stack.hpp b/arbor/backends/gpu/stack.hpp index 1d1771995afd10586d135c48b555c951bf7d5ec1..b8b417ec15e53892eaf19eabacd0073fbd9e1f6e 100644 --- a/arbor/backends/gpu/stack.hpp +++ b/arbor/backends/gpu/stack.hpp @@ -1,6 +1,7 @@ #pragma once #include <algorithm> +#include <memory> #include <arbor/assert.hpp> diff --git a/arbor/gpu_context.cpp b/arbor/gpu_context.cpp index 9e2042b3e03a108c1476ffcf61ba1efaecd1b87f..1fcbba074bb430c8117b096d131210dcd6cef939 100644 --- a/arbor/gpu_context.cpp +++ b/arbor/gpu_context.cpp @@ -1,9 +1,48 @@ #include <memory> +#ifdef ARB_HAVE_GPU +#include <cuda.h> +#include <cuda_runtime.h> +#endif + #include "gpu_context.hpp" namespace arb { +bool gpu_context::has_concurrent_managed_access() const { + return attributes_ & gpu_flags::has_concurrent_managed_access; +} + +bool gpu_context::has_atomic_double() const { + return attributes_ & gpu_flags::has_atomic_double; +} + +#ifndef ARB_HAVE_GPU + +gpu_context::gpu_context(): has_gpu_(false), attributes_(0) {} +void gpu_context::synchronize_for_managed_access() const {} + +#else + +gpu_context::gpu_context(): has_gpu_(true), attributes_(0) { + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, 0); + if (prop.concurrentManagedAccess) { + attributes_ |= gpu_flags::has_concurrent_managed_access; + } + if (prop.major*100 + prop.minor >= 600) { + attributes_ |= gpu_flags::has_atomic_double; + } +}; + +void gpu_context::synchronize_for_managed_access() const { + if(!has_concurrent_managed_access()) { + cudaDeviceSynchronize(); + } +} + +#endif + std::shared_ptr<gpu_context> make_gpu_context() { return std::make_shared<gpu_context>(); } diff --git a/arbor/gpu_context.hpp b/arbor/gpu_context.hpp index fea7fc255bf8948748beff5fc5456817e3724f1e..edcb9c933ccddbafd68e3a1412c80dc54e7ea512 100644 --- a/arbor/gpu_context.hpp +++ b/arbor/gpu_context.hpp @@ -1,22 +1,7 @@ -#include <memory> - -#ifdef ARB_HAVE_GPU -#include <cuda.h> -#include <cuda_runtime.h> -#endif +#pragma once namespace arb { -#ifndef ARB_HAVE_GPU -struct gpu_context { - bool has_gpu_; - size_t attributes_; - - gpu_context(): has_gpu_(false), attributes_(0) {} -}; - -#else - enum gpu_flags { has_concurrent_managed_access = 1, has_atomic_double = 2 @@ -26,32 +11,11 @@ struct gpu_context { bool has_gpu_; size_t attributes_; - gpu_context() : has_gpu_(true) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, 0); - attributes_ = 0; - if (prop.concurrentManagedAccess) { - attributes_ |= gpu_flags::has_concurrent_managed_access; - } - if (prop.major*100 + prop.minor >= 600) { - attributes_ |= gpu_flags::has_atomic_double; - } - }; - - bool has_concurrent_managed_access() { - return attributes_ & gpu_flags::has_concurrent_managed_access; - } - - bool has_atomic_double() { - return attributes_ & gpu_flags::has_atomic_double; - } + gpu_context(); - void synchronize_for_managed_access() { - if(!has_concurrent_managed_access()) { - cudaDeviceSynchronize(); - } - } + bool has_concurrent_managed_access() const; + bool has_atomic_double() const; + void synchronize_for_managed_access() const; }; -#endif } diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index 23e4a59b5bb3b6cf01e03d46569e0e01376ce492..4006ec2adc485538d8667a1b5c9e24e69386ee1c 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -27,15 +27,19 @@ add_custom_command( set(arb_features) if(ARB_WITH_ASSERTIONS) + # define ARB_ASSERT_ENABLED in version.hpp list(APPEND arb_features ASSERT) endif() if(ARB_WITH_MPI) + # define ARB_MPI_ENABLED in version.hpp list(APPEND arb_features MPI) endif() if(ARB_WITH_CUDA) + # define ARB_GPU_ENABLED in version.hpp list(APPEND arb_features GPU) endif() if(ARB_WITH_PROFILING) + # define ARB_PROFILE_ENABLED in version.hpp list(APPEND arb_features PROFILE) endif() diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index d12ff7bd8d0609d2403dad63c75cf8134b96d55f..2945330af8a482cdf0ebbb28d995d940fd684803 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -140,6 +140,5 @@ add_executable(unit ${unit_sources} ${test_mech_sources}) add_dependencies(unit build_test_mods) target_compile_options(unit PRIVATE ${ARB_CXXOPT_ARCH}) target_compile_definitions(unit PRIVATE "-DDATADIR=\"${CMAKE_CURRENT_SOURCE_DIR}/swc\"") -target_compile_definitions(unit PRIVATE ARB_HAVE_GPU) target_include_directories(unit PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") target_link_libraries(unit PRIVATE gtest arbor arbor-private-headers arbor-aux)