diff --git a/.travis.yml b/.travis.yml index 0d555b54df85c8fe42c1a3de0772d34002860cdc..87a734c558dc9870f552017e560d565a32618bff 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ ######## Testing minimal compiler requirements ######## -# GCC 6.4.0 +# GCC 8.1.0 # Clang 7.0 # Apple Clang 1100.0.33.16 ####################################################### @@ -13,27 +13,27 @@ sudo: false matrix: include: ########################## OS X ######################### -## test gcc6 - single node/rank with threading backend ## +## test gcc8 - single node/rank with threading backend ## - name: "osx, gcc, serial, py" os: osx osx_image: xcode11.3 python: 3.6 env: - - MATRIX_EVAL="brew install gcc@6 && brew link --force --overwrite gcc@6 && brew install cmake && CC=gcc-6 && CXX=g++-6" + - MATRIX_EVAL="brew install gcc@8 && brew link --force --overwrite gcc@8 && brew install cmake && CC=gcc-8 && CXX=g++-8" - BUILD_NAME=cthread-osx-gcc-py - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=native - compiler: gcc-6 + compiler: gcc-8 -## test gcc6 - mpi with threading backend ## +## test gcc8 - mpi with threading backend ## - name: "osx, gcc, mpi, py" os: osx osx_image: xcode11.3 python: 3.6 env: - - MATRIX_EVAL="brew install gcc@6 && brew link --force --overwrite gcc@6 && brew install cmake && CC=gcc-6 && CXX=g++-6" + - MATRIX_EVAL="brew install gcc@8 && brew link --force --overwrite gcc@8 && brew install cmake && CC=gcc-8 && CXX=g++-8" - BUILD_NAME=mpi-osx-gcc-py - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=native - compiler: gcc-6 + compiler: gcc-8 ## test clang9 - single node/rank with threading backend ## - name: "osx, apple clang, serial, py" @@ -58,7 +58,7 @@ matrix: compiler: clang ######################### LINUX ######################### -## test gcc7 - single node/rank with threading backend ## +## test gcc8 - single node/rank with threading backend ## - name: "linux, gcc, serial, py" os: linux dist: bionic @@ -66,16 +66,16 @@ matrix: apt: sources: packages: - - g++-7 + - g++-8 - openmpi-bin - libopenmpi-dev env: - - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" + - MATRIX_EVAL="CC=gcc-8 && CXX=g++-8" - BUILD_NAME=cthread-linux-gcc-py - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=haswell - compiler: gcc-7 + compiler: gcc-8 -## test gcc7 - mpi with threading backend ## +## test gcc8 - mpi with threading backend ## - name: "linux, gcc, mpi, py" os: linux dist: bionic @@ -83,16 +83,16 @@ matrix: apt: sources: packages: - - g++-6 + - g++-8 - openmpi-bin - libopenmpi-dev env: - - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" + - MATRIX_EVAL="CC=gcc-8 && CXX=g++-8" - BUILD_NAME=mpi-linux-gcc-py - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=haswell - compiler: gcc-7 + compiler: gcc-8 -## test clang4 - single node/rank with threading backend ## +## test clang7 - single node/rank with threading backend ## - name: "linux, clang, serial, py" os: linux dist: bionic @@ -106,9 +106,9 @@ matrix: - MATRIX_EVAL="CC=clang && CXX=clang++" - BUILD_NAME=cthread-linux-clang-py - WITH_DISTRIBUTED=serial WITH_PYTHON=true PY=3 ARCH=native - compiler: clang-4.0 + compiler: clang-7.0 -## test clang4 - mpi with threading backend ## +## test clang7 - mpi with threading backend ## - name: "linux, clang, mpi, py" os: linux dist: bionic @@ -122,7 +122,7 @@ matrix: - MATRIX_EVAL="CC=clang && CXX=clang++" - BUILD_NAME=mpi-linux-clang-py - WITH_DISTRIBUTED=mpi WITH_PYTHON=true PY=3 ARCH=native - compiler: clang-4.0 + compiler: clang-7.0 before_install: - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then export HOMEBREW_NO_AUTO_UPDATE=1; brew cask uninstall --force oclint; fi diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b3079412e41415b3bece9ffb991144b28727be1..bed72ed2c6e51a33d2579e2ab23e667eda06a651 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,20 +92,24 @@ set(THREADS_PREFER_PTHREAD_FLAG OFF) if(ARB_GPU STREQUAL "cuda") set(ARB_WITH_NVCC TRUE) + # CMake 18 and later set the default CUDA architecture for + # each target according to CMAKE_CUDA_ARCHITECTURES. + if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 60 70 80) + endif() + enable_language(CUDA) - # Despite native CUDA support, the CUDA package is still required to find - # the NVML library and to export the cuda library dependencies from the - # installed target. - find_package(CUDA REQUIRED) + # Despite native CUDA support, the CUDA package is still required to export + # the cuda library dependencies from the installed target. + find_package(CUDA 10 REQUIRED) elseif(ARB_GPU STREQUAL "cuda-clang") set(ARB_WITH_CUDA_CLANG TRUE) - # The CUDA package is needed for clang compilation for the same reasons as - # above. + # The CUDA package is needed for clang compilation for the same reasons as above. # enable_langaue(CUDA) has a bug with clang - find_package(CUDA REQUIRED) + find_package(CUDA 10 REQUIRED) elseif(ARB_GPU STREQUAL "hip") set(ARB_WITH_HIP_CLANG TRUE) endif() @@ -132,7 +136,8 @@ include("CheckCompilerXLC") include("CompilerOptions") add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${CXXOPT_WALL}>") -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CUDA_STANDARD 14) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -256,28 +261,6 @@ if(ARB_WITH_GPU) if(ARB_WITH_NVCC OR ARB_WITH_CUDA_CLANG) target_include_directories(arborenv-private-deps INTERFACE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - # The arborenv lib needs to use the CUDA NVML library for CUDA 9. - - if (${CUDA_VERSION_MAJOR} LESS 10) - set(arborenv_requires_nvml TRUE) - endif() - - if(arborenv_requires_nvml) - set(nvml_names nvidia-ml) # Note: platform dependent? e.g. nvml.lib on Windows. - find_library(CUDA_NVML - NAMES ${nvml_names} - NO_DEFAULT_PATH - PATHS ${CMAKE_CUDA_IMPLICIT_DIRECTORIES} ${CUDA_TOOLKIT_ROOT_DIR} - PATH_SUFFIXES lib64/stubs lib/stubs) - - if (NOT CUDA_NVML) - message(FATAL_ERROR "Unable to find CUDA NVML library by: ${nvml_names}") - endif() - - target_link_libraries(arborenv-private-deps INTERFACE ${CUDA_NVML}) - target_compile_definitions(arborenv-private-deps INTERFACE ARBENV_USE_NVML) - endif() - add_compile_options( "$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe=--diag_suppress=integer_sign_change>" "$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe=--diag_suppress=unsigned_compare_with_zero>") @@ -285,20 +268,20 @@ if(ARB_WITH_GPU) if(ARB_WITH_NVCC) - target_compile_options(arbor-private-deps INTERFACE - $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_35,code=sm_35>) - target_compile_options(arbor-private-deps INTERFACE - $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_37,code=sm_37>) target_compile_options(arbor-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_60,code=sm_60>) target_compile_options(arbor-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_70,code=sm_70>) + if (${CUDA_VERSION_MAJOR} GREATER 10) + target_compile_options(arbor-private-deps INTERFACE + $<$<COMPILE_LANGUAGE:CUDA>:-gencode=arch=compute_80,code=sm_80>) + endif() target_compile_definitions(arbor-private-deps INTERFACE ARB_CUDA) target_compile_definitions(arborenv-private-deps INTERFACE ARB_CUDA) elseif(ARB_WITH_CUDA_CLANG) - set(clang_options_ -DARB_CUDA -xcuda --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_37 --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_70 --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) + set(clang_options_ -DARB_CUDA -xcuda --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_80 --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) target_compile_options(arbor-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${clang_options_}>) target_compile_options(arborenv-private-deps INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${clang_options_}>) @@ -456,10 +439,6 @@ if(ARB_WITH_GPU) set(arbor_override_import_lang CXX) set(arbor_add_import_libs ${CUDA_LIBRARIES}) set(arborenv_add_import_libs ${CUDA_LIBRARIES}) - - if(arborenv_requires_nvml) - list(APPEND arborenv_add_import_libs ${CUDA_NVML}) - endif() endif() configure_file( diff --git a/arbor/algorithms.hpp b/arbor/algorithms.hpp index 722edd0724c6878c1ecf4c6c7afc97b833259d72..bcc3b0f1ca049ce7ff463d1c5d0d7fa73e539166 100644 --- a/arbor/algorithms.hpp +++ b/arbor/algorithms.hpp @@ -30,7 +30,7 @@ template <typename C> typename util::sequence_traits<C>::value_type mean(C const& c) { - return util::sum(c)/util::size(c); + return util::sum(c)/std::size(c); } // returns the prefix sum of c in the form `[0, c[0], c[0]+c[1], ..., sum(c)]`. diff --git a/arbor/backends/gpu/multi_event_stream.cpp b/arbor/backends/gpu/multi_event_stream.cpp index 858c99789610102a6648b25e8ad04e3bebd4e0f7..c96c47d59e1135b06a10005b1d25f006696a9e34 100644 --- a/arbor/backends/gpu/multi_event_stream.cpp +++ b/arbor/backends/gpu/multi_event_stream.cpp @@ -6,6 +6,29 @@ namespace arb { namespace gpu { +// These wrappers are implemented in the multi_event_stream.cu file, which +// is spearately compiled by nvcc, to protect nvcc from having to parse C++17. +void mark_until_after_w(unsigned n, + fvm_index_type* mark, + fvm_index_type* span_end, + fvm_value_type* ev_time, + const fvm_value_type* t_until); +void mark_until_w(unsigned n, + fvm_index_type* mark, + fvm_index_type* span_end, + fvm_value_type* ev_time, + const fvm_value_type* t_until); +void drop_marked_events_w(unsigned n, + fvm_index_type* n_nonempty_stream, + fvm_index_type* span_begin, + fvm_index_type* span_end, + fvm_index_type* mark); +void event_time_if_before_w(unsigned n, + fvm_index_type* span_begin, + fvm_index_type* span_end, + fvm_value_type* ev_time, + fvm_value_type* t_until); + void multi_event_stream_base::clear() { memory::fill(span_begin_, 0u); memory::fill(span_end_, 0u); @@ -13,5 +36,29 @@ void multi_event_stream_base::clear() { n_nonempty_stream_[0] = 0; } +// Designate for processing events `ev` at head of each event stream `i` +// until `event_time(ev)` > `t_until[i]`. +void multi_event_stream_base::mark_until_after(const_view t_until) { + arb_assert(n_streams()==t_until.size()); + mark_until_after_w(n_stream_, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +} + +// Designate for processing events `ev` at head of each event stream `i` +// while `t_until[i]` > `event_time(ev)`. +void multi_event_stream_base::mark_until(const_view t_until) { + mark_until_w(n_stream_, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +} + +// Remove marked events from front of each event stream. +void multi_event_stream_base::drop_marked_events() { + drop_marked_events_w(n_stream_, n_nonempty_stream_.data(), span_begin_.data(), span_end_.data(), mark_.data()); +} + +// If the head of `i`th event stream exists and has time less than `t_until[i]`, set +// `t_until[i]` to the event time. +void multi_event_stream_base::event_time_if_before(view t_until) { + event_time_if_before_w(n_stream_, span_begin_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +} + } // namespace gpu } // namespace arb diff --git a/arbor/backends/gpu/multi_event_stream.cu b/arbor/backends/gpu/multi_event_stream.cu index b136f8dcd63d6fb87e4542508b83a3184d9e1ad2..7e3e90c72c4defb85a027ae1c9695a8e64c72307 100644 --- a/arbor/backends/gpu/multi_event_stream.cu +++ b/arbor/backends/gpu/multi_event_stream.cu @@ -1,7 +1,6 @@ #include <arbor/common_types.hpp> #include "backends/event.hpp" -#include "backends/gpu/multi_event_stream.hpp" #include "gpu_common.hpp" namespace arb { @@ -86,50 +85,54 @@ namespace kernels { } } // namespace kernels -// Designate for processing events `ev` at head of each event stream `i` -// until `event_time(ev)` > `t_until[i]`. -void multi_event_stream_base::mark_until_after(const_view t_until) { - arb_assert(n_streams()==t_until.size()); - - constexpr int block_dim = 128; - - unsigned n = n_stream_; - int nblock = impl::block_count(n, block_dim); - kernels::mark_until_after<<<nblock, block_dim>>>( - n, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +void mark_until_after_w(unsigned n, + fvm_index_type* mark, + fvm_index_type* span_end, + fvm_value_type* ev_time, + const fvm_value_type* t_until) +{ + const int nblock = impl::block_count(n, 128); + kernels::mark_until_after + <<<nblock, 128>>> + (n, mark, span_end, ev_time, t_until); } -// Designate for processing events `ev` at head of each event stream `i` -// while `t_until[i]` > `event_time(ev)`. -void multi_event_stream_base::mark_until(const_view t_until) { - arb_assert(n_streams()==t_until.size()); - constexpr int block_dim = 128; - - unsigned n = n_stream_; - int nblock = impl::block_count(n, block_dim); - kernels::mark_until<<<nblock, block_dim>>>( - n, mark_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +void mark_until_w(unsigned n, + fvm_index_type* mark, + fvm_index_type* span_end, + fvm_value_type* ev_time, + const fvm_value_type* t_until) +{ + const int nblock = impl::block_count(n, 128); + kernels::mark_until + <<<nblock, 128>>> + (n, mark, span_end, ev_time, t_until); } -// Remove marked events from front of each event stream. -void multi_event_stream_base::drop_marked_events() { - constexpr int block_dim = 128; +void drop_marked_events_w(unsigned n, + fvm_index_type* n_nonempty_stream, + fvm_index_type* span_begin, + fvm_index_type* span_end, + fvm_index_type* mark) +{ + const int nblock = impl::block_count(n, 128); + kernels::drop_marked_events + <<<nblock, 128>>> + (n, n_nonempty_stream, span_begin, span_end, mark); - unsigned n = n_stream_; - int nblock = impl::block_count(n, block_dim); - kernels::drop_marked_events<<<nblock, block_dim>>>( - n, n_nonempty_stream_.data(), span_begin_.data(), span_end_.data(), mark_.data()); } -// If the head of `i`th event stream exists and has time less than `t_until[i]`, set -// `t_until[i]` to the event time. -void multi_event_stream_base::event_time_if_before(view t_until) { - constexpr int block_dim = 128; - int nblock = impl::block_count(n_stream_, block_dim); - kernels::event_time_if_before<<<nblock, block_dim>>>( - n_stream_, span_begin_.data(), span_end_.data(), ev_time_.data(), t_until.data()); +void event_time_if_before_w(unsigned n, + fvm_index_type* span_begin, + fvm_index_type* span_end, + fvm_value_type* ev_time, + fvm_value_type* t_until) +{ + const int nblock = impl::block_count(n, 128); + kernels::event_time_if_before + <<<nblock, 128>>> + (n, span_begin, span_end, ev_time, t_until); } - } // namespace gpu } // namespace arb diff --git a/arbor/backends/multicore/mechanism.cpp b/arbor/backends/multicore/mechanism.cpp index a38be492b0775317ca497a9f3cbc016d09273bf1..63f50a362949aee2ed5f34433842ce0109cc2fbd 100644 --- a/arbor/backends/multicore/mechanism.cpp +++ b/arbor/backends/multicore/mechanism.cpp @@ -40,8 +40,8 @@ void copy_extend(const Source& source, Dest&& dest, const Fill& fill) { using std::begin; using std::end; - auto dest_n = util::size(dest); - auto source_n = util::size(source); + auto dest_n = std::size(dest); + auto source_n = std::size(source); auto n = source_n<dest_n? source_n: dest_n; auto tail = std::copy_n(begin(source), n, begin(dest)); diff --git a/arbor/backends/multicore/multi_event_stream.hpp b/arbor/backends/multicore/multi_event_stream.hpp index 18b42aa5380d98fcd9f209c2b7108a2265560b42..8b4c283e5a8f4b33ff5a857e608f52ed90e0cbba 100644 --- a/arbor/backends/multicore/multi_event_stream.hpp +++ b/arbor/backends/multicore/multi_event_stream.hpp @@ -97,7 +97,7 @@ public: void mark_until_after(const TimeSeq& t_until) { using ::arb::event_time; - arb_assert(n_streams()==util::size(t_until)); + arb_assert(n_streams()==std::size(t_until)); // note: operation on each `i` is independent. for (size_type i = 0; i<n_streams(); ++i) { @@ -118,7 +118,7 @@ public: void mark_until(const TimeSeq& t_until) { using ::arb::event_time; - arb_assert(n_streams()==util::size(t_until)); + arb_assert(n_streams()==std::size(t_until)); // note: operation on each `i` is independent. for (size_type i = 0; i<n_streams(); ++i) { diff --git a/arbor/fvm_lowered_cell.hpp b/arbor/fvm_lowered_cell.hpp index 9a4f397219f172928d05e01f53410e694cde8e00..a30e19d2fb08b6b313ca53147161c605d8b3b9c3 100644 --- a/arbor/fvm_lowered_cell.hpp +++ b/arbor/fvm_lowered_cell.hpp @@ -2,6 +2,7 @@ #include <cstddef> #include <memory> +#include <type_traits> #include <unordered_map> #include <vector> @@ -134,8 +135,8 @@ struct fvm_probe_data { return util::make_range( util::visit( [](auto& i) -> std::pair<const probe_handle*, const probe_handle*> { - using util::data; - using util::size; + using std::data; + using std::size; return {data(i.raw_handles), data(i.raw_handles)+size(i.raw_handles)}; }, info)); diff --git a/arbor/fvm_lowered_cell_impl.hpp b/arbor/fvm_lowered_cell_impl.hpp index 81ba0068c410a2d6d025502914f84f1412ebbac3..ebb430a5b79e3b0bababa031ba1e198c9d727f4d 100644 --- a/arbor/fvm_lowered_cell_impl.hpp +++ b/arbor/fvm_lowered_cell_impl.hpp @@ -202,8 +202,6 @@ fvm_integration_result fvm_lowered_cell_impl<Backend>::integrate( std::vector<deliverable_event> staged_events, std::vector<sample_event> staged_samples) { - using util::as_const; - set_gpu(); // Integration setup diff --git a/arbor/include/arbor/util/any_visitor.hpp b/arbor/include/arbor/util/any_visitor.hpp index 348bdaa8a42087a25b23029a9dce725d32da2427..48e5dfc02c762436362c7a23c0afc48ff07698b1 100644 --- a/arbor/include/arbor/util/any_visitor.hpp +++ b/arbor/include/arbor/util/any_visitor.hpp @@ -17,8 +17,6 @@ namespace util { namespace impl { -template <typename> using void_t = void; // TODO: C++17 use std::void_t. - template <typename X, typename Y> struct propagate_qualifier { using type = Y; }; @@ -64,7 +62,7 @@ struct any_visitor<T> { }; template <typename F> - struct invoke_or_throw<F, impl::void_t<decltype(std::declval<F>()())>> { + struct invoke_or_throw<F, std::void_t<decltype(std::declval<F>()())>> { template <typename A> static auto visit(F&& f, A&& a) { using Q = impl::propagate_qualifier_t<A, T>; @@ -103,7 +101,7 @@ struct invocable_impl { struct test: std::false_type {}; template <typename G> - struct test<G, void_t<decltype(std::declval<G>()(std::declval<A>()...))>>: std::true_type {}; + struct test<G, std::void_t<decltype(std::declval<G>()(std::declval<A>()...))>>: std::true_type {}; using type = typename test<F>::type; }; diff --git a/arbor/memory/array_view.hpp b/arbor/memory/array_view.hpp index cbdf1f5c9d31b2c312f498d1b0a6248128d4e7f5..53a69dfdca25c112e026b5535281189707b30532 100644 --- a/arbor/memory/array_view.hpp +++ b/arbor/memory/array_view.hpp @@ -228,7 +228,7 @@ public: return size_; } - bool is_empty() const { + bool empty() const { return size_==0; } diff --git a/arbor/profile/profiler.cpp b/arbor/profile/profiler.cpp index 4ecd1efc5cc04cd7302ce29554df04ac7646edaf..9320a784cbc64a316c0544d4926a19149c1c8efc 100644 --- a/arbor/profile/profiler.cpp +++ b/arbor/profile/profiler.cpp @@ -311,11 +311,11 @@ void print(std::ostream& o, if (proportion<thresh) return; if (n.count==profile_node::npos) { - snprintf(buf, util::size(buf), "_p_ %-20s%12s%12.3f%12.3f%8.1f", + snprintf(buf, std::size(buf), "_p_ %-20s%12s%12.3f%12.3f%8.1f", name.c_str(), "-", float(n.time), per_thread_time, proportion); } else { - snprintf(buf, util::size(buf), "_p_ %-20s%12lu%12.3f%12.3f%8.1f", + snprintf(buf, std::size(buf), "_p_ %-20s%12lu%12.3f%12.3f%8.1f", name.c_str(), n.count, float(n.time), per_thread_time, proportion); } o << "\n" << buf; @@ -353,7 +353,7 @@ std::ostream& operator<<(std::ostream& o, const profile& prof) { auto tree = make_profile_tree(prof); - snprintf(buf, util::size(buf), "_p_ %-20s%12s%12s%12s%8s", "REGION", "CALLS", "THREAD", "WALL", "\%"); + snprintf(buf, std::size(buf), "_p_ %-20s%12s%12s%12s%8s", "REGION", "CALLS", "THREAD", "WALL", "\%"); o << buf; print(o, tree, tree.time, prof.num_threads, 0, ""); return o; diff --git a/arbor/util/maputil.hpp b/arbor/util/maputil.hpp index 44956499488ef3d4e5828a7583fa958472638f08..bcecd481c8e28f64d1d41a510ae14245b389ad3e 100644 --- a/arbor/util/maputil.hpp +++ b/arbor/util/maputil.hpp @@ -39,7 +39,7 @@ template <typename Seq, typename = void> struct is_associative_container: std::false_type {}; template <typename Seq> -struct is_associative_container<Seq, void_t<maputil_impl::assoc_test<Seq>>>: maputil_impl::assoc_test<Seq> {}; +struct is_associative_container<Seq, std::void_t<maputil_impl::assoc_test<Seq>>>: maputil_impl::assoc_test<Seq> {}; // Find value in a sequence of key-value pairs or in a key-value assocation map, with // optional explicit comparator. diff --git a/arbor/util/mergeview.hpp b/arbor/util/mergeview.hpp index 86943b54371e237927cf59395c2c287e0969ef5f..4af3ab21b1fff4ee62b5cc9f04d8c1add0bad8ad 100644 --- a/arbor/util/mergeview.hpp +++ b/arbor/util/mergeview.hpp @@ -16,13 +16,13 @@ template <typename A, typename B, typename = void> struct has_common_type: std::false_type {}; template <typename A, typename B> -struct has_common_type<A, B, util::void_t<std::common_type_t<A, B>>>: std::true_type {}; +struct has_common_type<A, B, std::void_t<std::common_type_t<A, B>>>: std::true_type {}; template <typename A, typename B, typename X, typename = void> struct common_type_or_else { using type = X; }; template <typename A, typename B, typename X> -struct common_type_or_else<A, B, X, util::void_t<std::common_type_t<A, B>>> { +struct common_type_or_else<A, B, X, std::void_t<std::common_type_t<A, B>>> { using type = std::common_type_t<A, B>; }; diff --git a/arbor/util/meta.hpp b/arbor/util/meta.hpp index 4ab20318d488d5ae36ed3620adbfa4b1c5e64e03..e86a0a04b90745a53d803239c98eb60537e0d194 100644 --- a/arbor/util/meta.hpp +++ b/arbor/util/meta.hpp @@ -12,76 +12,6 @@ namespace arb { namespace util { -// The following classes and functions can be replaced -// with std functions when we migrate to later versions of C++. -// -// C++17: -// void_t, empty, data, as_const - -template <class...> -using void_t = void; - -template <typename X> -constexpr std::size_t size(const X& x) { return x.size(); } - -template <typename X, std::size_t N> -constexpr std::size_t size(X (&)[N]) noexcept { return N; } - -template <typename C> -constexpr auto data(C& c) { return c.data(); } - -template <typename C> -constexpr auto data(const C& c) { return c.data(); } - -template <typename T, std::size_t N> -constexpr T* data(T (&a)[N]) noexcept { return a; } - -template <typename T> -void as_const(T&& t) = delete; - -template <typename T> -constexpr std::add_const_t<T>& as_const(T& t) { - return t; -} - -// Use sequence `empty() const` method if exists, otherwise -// compare begin and end. - -namespace impl_empty { - template <typename C> - struct has_const_empty_method { - template <typename T> - static decltype(std::declval<const T>().empty(), std::true_type{}) test(int); - template <typename T> - static std::false_type test(...); - - using type = decltype(test<C>(0)); - }; - - using std::begin; - using std::end; - - template <typename Seq> - constexpr bool empty(const Seq& seq, std::false_type) { - return begin(seq)==end(seq); - } - - template <typename Seq> - constexpr bool empty(const Seq& seq, std::true_type) { - return seq.empty(); - } -} - -template <typename Seq> -constexpr bool empty(const Seq& seq) { - return impl_empty::empty(seq, typename impl_empty::has_const_empty_method<Seq>::type{}); -} - -template <typename T, std::size_t N> -constexpr bool empty(const T (& c)[N]) noexcept { - return false; // N cannot be zero -} - // Types associated with a container or sequence namespace impl_seqtrait { @@ -95,9 +25,19 @@ namespace impl_seqtrait { struct data_returns_pointer<T (&)[N], void>: public std::true_type {}; template <typename T> - struct data_returns_pointer<T, void_t<decltype(std::declval<T>().data())>>: + struct data_returns_pointer<T, std::void_t<decltype(std::declval<T>().data())>>: public std::is_pointer<decltype(std::declval<T>().data())>::type {}; + template <typename Seq, typename=void> + struct size_type_ { + using type = void; + }; + + template <typename Seq> + struct size_type_<Seq, std::void_t<decltype(std::size(std::declval<Seq&>()))>> { + using type = decltype(std::size(std::declval<Seq&>())); + }; + template <typename Seq> struct sequence_traits { using iterator = decltype(begin(std::declval<Seq&>())); @@ -105,7 +45,7 @@ namespace impl_seqtrait { using value_type = typename std::iterator_traits<iterator>::value_type; using reference = typename std::iterator_traits<iterator>::reference; using difference_type = typename std::iterator_traits<iterator>::difference_type; - using size_type = decltype(size(std::declval<Seq&>())); + using size_type = typename size_type_<Seq>::type; // For use with heterogeneous ranges: using sentinel = decltype(end(std::declval<Seq&>())); using const_sentinel = decltype(end(std::declval<const Seq&>())); @@ -119,7 +59,7 @@ namespace impl_seqtrait { std::false_type {}; template<typename T> - struct is_sequence<T, void_t<decltype(begin(std::declval<T>()))>>: + struct is_sequence<T, std::void_t<decltype(begin(std::declval<T>()))>>: std::true_type {}; } @@ -178,7 +118,7 @@ template <typename T, typename = void> struct is_iterator: public std::false_type {}; template <typename T> -struct is_iterator<T, void_t<typename std::iterator_traits<T>::iterator_category>>: +struct is_iterator<T, std::void_t<typename std::iterator_traits<T>::iterator_category>>: public std::true_type {}; template <typename T> @@ -249,7 +189,7 @@ template <typename I, typename E> struct common_random_access_iterator< I, E, - void_t<decltype(false? std::declval<I>(): std::declval<E>())>, + std::void_t<decltype(false? std::declval<I>(): std::declval<E>())>, std::enable_if_t< is_random_access_iterator< std::decay_t<decltype(false? std::declval<I>(): std::declval<E>())> @@ -269,7 +209,7 @@ struct has_common_random_access_iterator: std::false_type {}; template <typename I, typename E> -struct has_common_random_access_iterator<I, E, void_t<util::common_random_access_iterator_t<I, E>>>: +struct has_common_random_access_iterator<I, E, std::void_t<util::common_random_access_iterator_t<I, E>>>: std::true_type {}; // Generic accessors: diff --git a/arbor/util/ordered_forest.hpp b/arbor/util/ordered_forest.hpp index 567226c4c58702c2fa104883d8d47b90a3289d06..38a8bc09f4d87301b3ff4f26ae8361aa242c29ad 100644 --- a/arbor/util/ordered_forest.hpp +++ b/arbor/util/ordered_forest.hpp @@ -73,8 +73,7 @@ template <typename V, typename Allocator> struct ordered_forest_builder; template <typename V, typename Allocator = std::allocator<V>> -struct ordered_forest { -private: +class ordered_forest { struct node { V* item_ = nullptr; node* parent_ = nullptr; diff --git a/arbor/util/partition.hpp b/arbor/util/partition.hpp index e4190b262ebb677336071878d048d4ba91b79b06..f6d4f67cae6acee933855d309bce760efe9b7cb3 100644 --- a/arbor/util/partition.hpp +++ b/arbor/util/partition.hpp @@ -112,11 +112,11 @@ partition_range<SeqIter> partition_view(const Seq& r) { * * If the first parameter is `partition_in_place`, the provided * container `divisions` will not be resized, and the partition will - * be of length `util::size(divisions)-1` or zero if `divisions` is + * be of length `std::size(divisions)-1` or zero if `divisions` is * empty. * - * Otherwise, `divisions` will be be resized to `util::size(sizes)+1` - * and represent a partition of length `util::size(sizes)`. + * Otherwise, `divisions` will be be resized to `std::size(sizes)+1` + * and represent a partition of length `std::size(sizes)`. * * Returns a partition view over `divisions`. */ @@ -159,7 +159,7 @@ template < > partition_range<typename sequence_traits<Part>::const_iterator> make_partition(Part& divisions, const Sizes& sizes, T from=T{}) { - divisions.resize(size(sizes)+1); + divisions.resize(std::size(sizes)+1); // (would use std::inclusive_scan in C++17) auto pi = std::begin(divisions); diff --git a/arbor/util/piecewise.hpp b/arbor/util/piecewise.hpp index ef067d62cdaa0e8fc64ef2b364bbdccf618f48d2..9f15798972c942f4d679fb64c3cac5f9126efcc1 100644 --- a/arbor/util/piecewise.hpp +++ b/arbor/util/piecewise.hpp @@ -24,7 +24,7 @@ constexpr pw_size_type pw_npos = -1; template <typename T> struct indexed_const_iterator { - using size_type = decltype(util::size(std::declval<T>())); + using size_type = decltype(std::size(std::declval<T>())); using difference_type = std::make_signed_t<size_type>; using value_type = decltype(std::declval<T>()[0]); diff --git a/arbor/util/rangeutil.hpp b/arbor/util/rangeutil.hpp index 7e6a5fda30db450992cc4ed81f695fb4dd5dc9fa..c8db7115b94bdb7ec70dd61677b92cbe3903b5e6 100644 --- a/arbor/util/rangeutil.hpp +++ b/arbor/util/rangeutil.hpp @@ -39,7 +39,7 @@ range_view(Seq&& seq) { template <typename Seq, typename = std::enable_if_t<sequence_traits<Seq&&>::is_contiguous>> auto range_pointer_view(Seq&& seq) { - return make_range(util::data(seq), util::data(seq)+util::size(seq)); + return make_range(std::data(seq), std::data(seq)+std::size(seq)); } template < @@ -250,7 +250,7 @@ Value max_value(const Seq& seq, Compare cmp = Compare{}) { using std::begin; using std::end; - if (util::empty(seq)) { + if (std::empty(seq)) { return Value{}; } @@ -277,7 +277,7 @@ std::pair<Value, Value> minmax_value(const Seq& seq, Compare cmp = Compare{}) { using std::begin; using std::end; - if (util::empty(seq)) { + if (std::empty(seq)) { return {Value{}, Value{}}; } diff --git a/arbor/util/span.hpp b/arbor/util/span.hpp index 817e1acf2c434e63c42aaaaf3162fba9b6dd6a4b..c6599e163f15698b64c384567efd5bb5fbd28224 100644 --- a/arbor/util/span.hpp +++ b/arbor/util/span.hpp @@ -39,7 +39,7 @@ span<I> make_span(I right) { template <typename Seq> auto count_along(const Seq& s) { - return util::make_span(util::size(s)); + return util::make_span(std::size(s)); } } // namespace util diff --git a/arbor/util/strprintf.hpp b/arbor/util/strprintf.hpp index f548e79b1136bfdff828e42e790cf2da7af9ee48..eacd5c49c39fd28ac25e6ddf4aff4fa0b9d46aa7 100644 --- a/arbor/util/strprintf.hpp +++ b/arbor/util/strprintf.hpp @@ -10,6 +10,7 @@ #include <string> #include <sstream> #include <system_error> +#include <type_traits> #include <utility> #include <vector> @@ -33,7 +34,7 @@ namespace impl_to_string { }; template <typename T> - struct select<T, util::void_t<decltype(to_string(std::declval<T>()))>> { + struct select<T, std::void_t<decltype(to_string(std::declval<T>()))>> { static std::string str(const T& v) { return to_string(v); } diff --git a/arborenv/gpu_uuid.cpp b/arborenv/gpu_uuid.cpp index ec64249bc4d6bb8b1edfee24be68d1cf9668e2c1..3d2c55d074510fe36b4eae339f102892b8751ae5 100644 --- a/arborenv/gpu_uuid.cpp +++ b/arborenv/gpu_uuid.cpp @@ -15,16 +15,6 @@ #include "gpu_api.hpp" -// CUDA 10 allows GPU uuid to be queried via cudaGetDeviceProperties. -// Previous versions require the CUDA NVML library to get uuid. -// -// ARBENV_USE_NVML will be defined at configuration time if using -// CUDA version 9. - -#ifdef ARBENV_USE_NVML - #include <nvml.h> -#endif - #ifdef __linux__ extern "C" { #include <unistd.h> @@ -92,8 +82,6 @@ std::runtime_error make_runtime_error(api_error_type error_code) { + error_code.name() + ": " + error_code.description()); } -#ifndef ARBENV_USE_NVML - // For CUDA 10 and later the uuid of all available GPUs is straightforward // to obtain by querying cudaGetDeviceProperties for each visible device. std::vector<uuid> get_gpu_uuids() { @@ -119,15 +107,16 @@ std::vector<uuid> get_gpu_uuids() { throw make_runtime_error(status); } - // Copy the bytes from props.uuid to uuids[i]. - #ifdef ARB_HIP + // Build a unique string for the device and hash it, then + // copy the bytes of the has to uuids[i]. auto host = get_hostname(); if (!host) throw std::runtime_error("Can't uniquely identify GPUs on the system"); auto uid = std::hash<std::string>{} (*host + '-' + std::to_string(props.pciBusID) + '-' + std::to_string(props.pciDeviceID)); auto b = reinterpret_cast<const unsigned char*>(&uid); std::copy(b, b+sizeof(std::size_t), uuids[i].bytes.begin()); #else + // Copy the bytes from props.uuid to uuids[i]. auto b = reinterpret_cast<const unsigned char*>(&props.uuid); std::copy(b, b+sizeof(uuid), uuids[i].bytes.begin()); #endif @@ -136,145 +125,6 @@ std::vector<uuid> get_gpu_uuids() { return uuids; } -#else - -std::runtime_error make_runtime_error(nvmlReturn_t error_code) { - return std::runtime_error( - std::string("cuda nvml runtime error: ") + nvmlErrorString(error_code)); -} - -// Split CUDA_VISIBLE_DEVICES variable string into a list of integers. -// The environment variable can have spaces, and the order is important: -// i.e. "0,1" is not the same as "1,0". -// CUDA_VISIBLE_DEVICES="1,0" -// CUDA_VISIBLE_DEVICES="0, 1" -// The CUDA run time parses the list until it finds an error, then returns -// the partial list. -// i.e. -// CUDA_VISIBLE_DEVICES="1, 0, hello" -> {1,0} -// CUDA_VISIBLE_DEVICES="hello, 1" -> {} -// All non-numeric characters at end of a value appear to be ignored: -// CUDA_VISIBLE_DEVICES="0a,1" -> {0,1} -// CUDA_VISIBLE_DEVICES="a0,1" -> {} -// This doesn't try too hard to check for all possible errors. -std::vector<int> parse_visible_devices(std::string str, int ngpu) { - std::vector<int> values; - std::istringstream ss(str); - while (ss) { - int v; - if (ss >> v) { - if (v<0 || v>=ngpu) break; - values.push_back(v); - while (ss && ss.get()!=','); - } - } - return values; -} - -// Take a uuid string with the format: -// GPU-f1fd7811-e4d3-4d54-abb7-efc579fb1e28 -// And convert to a 16 byte sequence -// -// Assume that the intput string is correctly formatted. -uuid string_to_uuid(char* str) { - uuid result; - unsigned n = std::strlen(str); - - // Remove the "GPU" from front of string, and the '-' hyphens, e.g.: - // GPU-f1fd7811-e4d3-4d54-abb7-efc579fb1e28 - // becomes - // f1fd7811e4d34d54abb7efc579fb1e28 - std::remove_if(str, str+n, [](char c){return !std::isxdigit(c);}); - - // Converts a single hex character, i.e. 0123456789abcdef, to int - // Assumes that input is a valid hex character. - auto hex_c2i = [](unsigned char c) -> unsigned char { - c = std::tolower(c); - return std::isalpha(c)? c-'a'+10: c-'0'; - }; - - // Convert pairs of characters into single bytes. - for (int i=0; i<16; ++i) { - const char* s = str+2*i; - result.bytes[i] = (hex_c2i(s[0])<<4) + hex_c2i(s[1]); - } - - return result; -} - -// For CUDA 9 the only way to get gpu uuid is via NVML. -// NVML can be used to query all GPU devices, not just the -// devices that have been made visible to the calling process. -// Hence, there are two steps to finding the uuid of visible devices: -// 1. Query the environment variable CUDA_VISIBLE_DEVICES to -// determine which devices are locally visible, and to enumerate -// them correctly. -// 2. Query NVML for the uuid of each visible device. -std::vector<uuid> get_gpu_uuids() { - // Get number of devices. - int ngpus = 0; - auto status = get_device_count(&ngpus); - if (status.no_device_found()) return {}; - else if (!status) throw make_runtime_error(status); - - // Attempt to initialize nvml - auto nvml_status = nvmlInit(); - const bool nvml_init = (nvml_status==NVML_ERROR_ALREADY_INITIALIZED); - if (!nvml_init && nvml_status!=NVML_SUCCESS) { - throw make_runtime_error(nvml_status); - } - auto nvml_guard = on_scope_exit([nvml_init](){if (!nvml_init) nvmlShutdown();}); - - // store the uuids - std::vector<uuid> uuids; - - // find the number of available GPUs - unsigned count = -1; - nvml_status = nvmlDeviceGetCount(&count); - if (nvml_status!=NVML_SUCCESS) throw make_runtime_error(nvml_status); - - // Indexes of GPUs available on this rank - std::vector<int> device_ids; - - // Test if the environment variable CUDA_VISIBLE_DEVICES has been set. - const char* visible_device_env = std::getenv("CUDA_VISIBLE_DEVICES"); - // If set, attempt to parse the device ids from it. - if (visible_device_env) { - // Parse the gpu ids from the environment variable - device_ids = parse_visible_devices(visible_device_env, count); - if ((unsigned)ngpus != device_ids.size()) { - // Mismatch between device count detected by cuda runtime - // and that set in environment variable. - throw std::runtime_error( - "Mismatch between the number of devices in CUDA_VISIBLE_DEVICES" - " and the number of devices detected by cudaGetDeviceCount."); - } - } - // Not set, so all devices must be available. - else { - device_ids.resize(count); - std::iota(device_ids.begin(), device_ids.end(), 0); - } - - // For each device id, query NVML for the device's uuid. - for (int i: device_ids) { - char buffer[NVML_DEVICE_UUID_BUFFER_SIZE]; - // get handle of gpu with index i - nvmlDevice_t handle; - nvml_status = nvmlDeviceGetHandleByIndex(i, &handle); - if (nvml_status!=NVML_SUCCESS) throw make_runtime_error(nvml_status); - - // get uuid as a string with format GPU-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - nvml_status = nvmlDeviceGetUUID(handle, buffer, sizeof(buffer)); - if (nvml_status!=NVML_SUCCESS) throw make_runtime_error(nvml_status); - - uuids.push_back(string_to_uuid(buffer)); - } - - return uuids; -} -#endif // ndef ARBENV_USE_NVML - // Compare two sets of uuids // 1: both sets are identical // -1: some common elements diff --git a/arborenv/include/arborenv/with_mpi.hpp b/arborenv/include/arborenv/with_mpi.hpp index ee9e5f15dfc514ed001ff6496598b0b1c65df174..53ef6a9201c9bad9f92094b31568887cbd2d4341 100644 --- a/arborenv/include/arborenv/with_mpi.hpp +++ b/arborenv/include/arborenv/with_mpi.hpp @@ -26,7 +26,7 @@ struct with_mpi { // force exit the application before the exception that is unwinding // the stack has been caught, which would deny the opportunity to print // an error message explaining the cause of the exception. - if (!std::uncaught_exception()) { + if (std::uncaught_exceptions()==0) { MPI_Finalize(); } } diff --git a/doc/install.rst b/doc/install.rst index 87ba36c7daa1057c7400f7cda333212dc092b132..97f22e04394e29676ac97fd597e633abeeac4eed 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -31,7 +31,7 @@ with very few tools. =========== ============================================ Git To check out the code, minimum version 2.0. CMake To set up the build, minimum version 3.12. - compiler A C++14 compiler. See `compilers <install-compilers_>`_. + compiler A C++17 compiler. See `compilers <install-compilers_>`_. =========== ============================================ .. _install-compilers: @@ -39,7 +39,7 @@ with very few tools. Compilers ~~~~~~~~~ -Arbor requires a C++ compiler that fully supports C++14. +Arbor requires a C++ compiler that fully supports C++17. We recommend using GCC or Clang, for which Arbor has been tested and optimised. .. table:: Supported Compilers @@ -47,10 +47,10 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. =========== ============ ============================================ Compiler Min version Notes =========== ============ ============================================ - GCC 6.1.0 - Clang 4.0 Needs GCC 6 or later for standard library. + GCC 8.4.0 + Clang 7.0 Needs GCC 8 or later for standard library. Apple Clang 9 Apple LLVM version 9.0.0 (clang-900.0.39.2) - Hip Clang Unofficial Release + Hip Clang Rocm 3.6 HIP support is currently experimental. =========== ============ ============================================ .. _note_CC: @@ -60,7 +60,7 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. CMake should use. If these are not set, CMake will attempt to automatically choose a compiler, which may be too old to compile Arbor. For example, the default compiler chosen below by CMake was GCC 4.8.5 at ``/usr/bin/c++``, - so the ``CC`` and ``CXX`` variables were used to specify GCC 6.1.0 before calling ``cmake``. + so the ``CC`` and ``CXX`` variables were used to specify GCC 10.2.0 before calling ``cmake``. .. code-block:: bash @@ -70,8 +70,8 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. # check which version of GCC is available $ g++ --version - g++ (GCC) 6.1.0 - Copyright (C) 2015 Free Software Foundation, Inc. + g++ (GCC) 10.2.0 + Copyright (C) 2020 Free Software Foundation, Inc. # set environment variables for compilers $ export CC=`which gcc`; export CXX=`which g++`; @@ -79,10 +79,10 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. # launch CMake # the compiler version and path is given in the CMake output $ cmake .. - -- The C compiler identification is GNU 6.1.0 - -- The CXX compiler identification is GNU 6.1.0 - -- Check for working C compiler: /cm/local/apps/gcc/6.1.0/bin/gcc - -- Check for working C compiler: /cm/local/apps/gcc/6.1.0/bin/gcc -- works + -- The C compiler identification is GNU 10.2.0 + -- The CXX compiler identification is GNU 10.2.0 + -- Check for working C compiler: /cm/local/apps/gcc/10.2.0/bin/gcc + -- Check for working C compiler: /cm/local/apps/gcc/10.2.0/bin/gcc -- works ... .. Note:: @@ -108,7 +108,7 @@ Optional Requirements GPU Support ~~~~~~~~~~~ -Arbor has full support for NVIDIA GPUs, for which the NVIDIA CUDA toolkit version 9 is required. +Arbor has full support for NVIDIA GPUs, for which the NVIDIA CUDA toolkit version 10 is required. And experimental support for AMD GPUs when compiled with hip-clang (non-release compiler). Distributed @@ -220,13 +220,6 @@ CMake parameters and flags, follow links to the more detailed descriptions below cmake -DARB_WITH_ASSERTIONS=ON -DCMAKE_BUILD_TYPE=debug -.. topic:: `Release <buildtarget_>`_ mode (compiler optimizations enabled) with the default - compiler, optimized for the local `system architecture <install-architecture_>`_. - - .. code-block:: bash - - cmake -DARB_ARCH=native - .. topic:: `Release <buildtarget_>`_ mode with `Clang <install-compilers_>`_. .. code-block:: bash @@ -241,12 +234,12 @@ CMake parameters and flags, follow links to the more detailed descriptions below cmake -DARB_VECTORIZE=ON -DARB_ARCH=haswell -.. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <install-vectorize_>`_, targeting the `Broadwell architecture <install-vectorize_>`_, with support for `Nvidia GPUs <install-gpu_>`_, and building with `GCC 6 <install-compilers_>`_. +.. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <install-vectorize_>`_, targeting the `Broadwell architecture <install-vectorize_>`_, with support for `Nvidia GPUs <install-gpu_>`_, and building with `GCC 9 <install-compilers_>`_. .. code-block:: bash - export CC=gcc-6 - export CXX=g++-6 + export CC=gcc-9 + export CXX=g++-9 cmake -DARB_VECTORIZE=ON -DARB_ARCH=broadwell -DARB_GPU=cuda .. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <install-vectorize_>`_, targeting the `Broadwell architecture <install-vectorize_>`_, with support for `AMD GPUs <install-gpu_>`_, and building with `hipcc <install-compilers_>`_. @@ -258,11 +251,11 @@ CMake parameters and flags, follow links to the more detailed descriptions below cmake -DARB_VECTORIZE=ON -DARB_ARCH=broadwell -DARB_GPU=hip -.. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <install-vectorize_>`_, optimized for the `local system architecture <install-architecture_>`_ and `install <install_>`_ in ``/opt/arbor`` +.. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <install-vectorize_>`_, optimized for the local system architecture and `install <install_>`_ in ``/opt/arbor`` .. code-block:: bash - cmake -DARB_VECTORIZE=ON -DARB_ARCH=native -DCMAKE_INSTALL_PREFIX=/opt/arbor + cmake -DARB_VECTORIZE=ON -DCMAKE_INSTALL_PREFIX=/opt/arbor .. _buildtarget: @@ -392,8 +385,8 @@ Arbor has experimental support for AMD GPUs using HIP. The only compiler current Arbor is built for all supported AMD GPUs and the available GPU will be used at runtime. .. Note:: - Arbor supports and has been tested on the Kepler (K20 & K80), Pascal (P100) and Volta (V100) GPUs - as well as Vega10 and Vega20 GPUs + Arbor supports and has been tested on Pascal (P100) and Volta (V100) NVIDIA GPUs, + as well as Mi50 and Mi60 AMD GPUs. .. _install-python: @@ -411,12 +404,12 @@ CMake ``ARB_WITH_PYTHON`` option: By default ``ARB_WITH_PYTHON=OFF``. When this option is turned on, a Python module called :py:mod:`arbor` is built. A specific version of Python can be set when configuring with CMake using the -``PYTHON_EXECUTABLE`` variable. For example, to use Python 3.7 installed on a Linux -system with the executable in ``/usr/bin/python3.7``: +``PYTHON_EXECUTABLE`` variable. For example, to use Python 3.8 installed on a Linux +system with the executable in ``/usr/bin/python3.8``: .. code-block:: bash - cmake .. -DARB_WITH_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.7 + cmake .. -DARB_WITH_PYTHON=ON -DPYTHON_EXECUTABLE=/usr/bin/python3.8 By default the Python module will be installed in the standard ``CMAKE_INSTALL_PREFIX`` location. To install the module in a different location, for example as a @@ -437,7 +430,7 @@ On the target LINUX system, the Arbor package was installed in By default CMake sets ``CMAKE_INSTALL_PREFIX`` to ``/usr/local`` on Linux and OS X. The compiled libraries are installed in ``/usr/local/lib``, headers are installed in ``/usr/local/include``, and the Python module will be installed in a path like - ``/usr/local/lib/python3.7/site-packages``. + ``/usr/local/lib/python3.8/site-packages``. Because ``/usr/local`` is a system path, the installation phase needs to be run as root, i.e. ``sudo make install``, even if ``ARB_PYTHON_PREFIX`` is set to a user path that does not require root to install. diff --git a/modcc/module.cpp b/modcc/module.cpp index 16597ab01a225f4f757b4beb134560d414d7a9a8..aaf9fcc113142701791d38796286ac56a9ad47a3 100644 --- a/modcc/module.cpp +++ b/modcc/module.cpp @@ -460,13 +460,8 @@ bool Module::semantic() { } } - // handle the case where there is no SOLVE in BREAKPOINT - if(!found_solve) { - warning(" there is no SOLVE statement, required to update the" - " state variables, in the BREAKPOINT block", - breakpoint->location()); - } - else { + // handle the case where there is a SOLVE in BREAKPOINT (which is the typical case) + if (found_solve) { // Redo semantic pass in order to elimate any removed local symbols. api_state->semantic(symbols_); } diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 21fe5be9123db9ec8f9c784a99a4007401770933..64f7cb6d9979945220c84bc465e5ebc685e6bd57 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -11,7 +11,7 @@ endif() # Set up pybind11, which is used to generate Python bindings. # Pybind11 has good cmake support, so just add the pybind11 directory, # instead of using find_package. -set(PYBIND11_CPP_STANDARD -std=c++14) +set(PYBIND11_CPP_STANDARD -std=c++17) add_subdirectory(pybind11) set(pyarb_source diff --git a/python/pybind11 b/python/pybind11 index e2b884c33bcde70b2ea562ffa52dd7ebee276d50..3b1dbebabc801c9cf6f0953a4c20b904d444f879 160000 --- a/python/pybind11 +++ b/python/pybind11 @@ -1 +1 @@ -Subproject commit e2b884c33bcde70b2ea562ffa52dd7ebee276d50 +Subproject commit 3b1dbebabc801c9cf6f0953a4c20b904d444f879 diff --git a/test/unit-distributed/test_mpi.cpp b/test/unit-distributed/test_mpi.cpp index 935242c27b43ca269202dbc563395e0a1cc1a7da..9dfa81d1fb45b1af9c5cc0015d60a1abb320a217 100644 --- a/test/unit-distributed/test_mpi.cpp +++ b/test/unit-distributed/test_mpi.cpp @@ -79,12 +79,12 @@ TEST(mpi, gather_all_with_partition) { if (i%2) { int rank_data[] = { i, i+7, i+8 }; util::append(expected_values, rank_data); - expected_divisions.push_back(expected_divisions.back()+util::size(rank_data)); + expected_divisions.push_back(expected_divisions.back()+std::size(rank_data)); } else { int rank_data[] = { i }; util::append(expected_values, rank_data); - expected_divisions.push_back(expected_divisions.back()+util::size(rank_data)); + expected_divisions.push_back(expected_divisions.back()+std::size(rank_data)); } } diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index c7dfc382250943a4a9ff760dd51495338ba7d6f4..f988f32e94676e029542608f5eaef398ec854fff 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -174,18 +174,17 @@ set(unit_sources if(ARB_WITH_GPU) list(APPEND unit_sources - test_intrin.cu test_gpu_stack.cu - test_matrix.cu - test_matrix_cpuvsgpu.cpp + test_multi_event_stream_gpu.cu test_reduce_by_key.cu - test_vector.cu + test_matrix_cpuvsgpu.cpp + test_matrix_gpu.cpp test_mc_cell_group_gpu.cpp test_multi_event_stream_gpu.cpp - test_multi_event_stream_gpu.cu test_spikes_gpu.cpp + test_vector_gpu.cpp ) endif() diff --git a/test/unit/gpu_vector.hpp b/test/unit/gpu_vector.hpp new file mode 100644 index 0000000000000000000000000000000000000000..02f08181bb9454e1bb6e761a6fec87857a55c50f --- /dev/null +++ b/test/unit/gpu_vector.hpp @@ -0,0 +1,108 @@ +#pragma once + +#include <vector> + +#include "backends/gpu/gpu_api.hpp" + +/* + * Helpers for using GPU memory in unit tests. + * + * The memory helpers can't be used in .cu files, because we don't let nvcc + * compile most of our headers to avoid compiler bugs and c++ version issues. + */ + +template <typename T> +struct gpu_ref_proxy { + T* ptr; + + gpu_ref_proxy(T* p): ptr(p) {} + + gpu_ref_proxy& operator=(const T& value) { + arb::gpu::device_memcpy(ptr, &value, sizeof(T), arb::gpu::gpuMemcpyHostToDevice); + return *this; + } + + operator T() const { + T tmp; + arb::gpu::device_memcpy(&tmp, ptr, sizeof(T), arb::gpu::gpuMemcpyDeviceToHost); + return tmp; + } +}; + +template <typename T> +class gpu_vector { + using value_type = T; + using size_type = std::size_t; + +public: + gpu_vector() = default; + + gpu_vector(size_type n) { + allocate(n); + } + + gpu_vector(const std::vector<T>& other) { + allocate(other.size()); + to_device(other.data()); + } + + ~gpu_vector() { + if (data_) arb::gpu::device_free(data_); + } + + std::vector<T> host_vector() const { + std::vector<T> v(size()); + to_host(v.data()); + return v; + } + + value_type* data() { + return data_; + } + + const value_type* data() const { + return data_; + } + + size_type size() const { + return size_; + } + + value_type operator[](size_type i) const { + value_type tmp; + arb::gpu::device_memcpy(&tmp, data_+i, sizeof(value_type), arb::gpu::gpuMemcpyDeviceToHost); + return tmp; + } + + gpu_ref_proxy<value_type> operator[](size_type i) { + return gpu_ref_proxy<value_type>(data_+i); + } + +private: + + void allocate(size_type n) { + size_ = n; + arb::gpu::device_malloc(&data_, n*sizeof(T)); + } + + void to_device(const value_type* other) { + arb::gpu::device_memcpy(data_, other, size_in_bytes(), arb::gpu::gpuMemcpyHostToDevice); + } + + void to_host(value_type* other) const { + arb::gpu::device_memcpy(other, data_, size_in_bytes(), arb::gpu::gpuMemcpyDeviceToHost); + } + + size_type size_in_bytes() const { + return size_*sizeof(value_type); + } + + void free() { + arb::gpu::device_free(data_); + size_ = 0; + data_ = nullptr; + } + + size_type size_; + value_type* data_; +}; diff --git a/test/unit/stats.hpp b/test/unit/stats.hpp index 1e26caf796a0232fe4238cf07cb557a270b06f44..c453aebf7ad3bb7ddb25bd2bf87d273e1151cb7d 100644 --- a/test/unit/stats.hpp +++ b/test/unit/stats.hpp @@ -57,7 +57,7 @@ namespace ks { template <typename Seq> double dn_statistic(const Seq& qs) { - double n = static_cast<double>(arb::util::size(qs)); + double n = static_cast<double>(std::size(qs)); double d = 0; int j = 0; for (auto q: qs) { diff --git a/test/unit/test_algorithms.cpp b/test/unit/test_algorithms.cpp index ab975e0cef8ab41a292856e02875dc6bd332837b..303996dfb1f8e3f291d70061b1211e64cbdc714f 100644 --- a/test/unit/test_algorithms.cpp +++ b/test/unit/test_algorithms.cpp @@ -483,9 +483,9 @@ template <typename Sub, typename Sup> using namespace arb; auto indices = util::index_into(sub, sup); - auto n_indices = util::size(indices); - auto n_sub = util::size(sub); - if (util::size(indices)!=util::size(sub)) { + auto n_indices = std::size(indices); + auto n_sub = std::size(sub); + if (std::size(indices)!=std::size(sub)) { return ::testing::AssertionFailure() << "index_into size " << n_indices << " does not equal sub-sequence size " << n_sub; } @@ -530,7 +530,7 @@ arb::util::range<std::reverse_iterator<I>> reverse_range(arb::util::range<I> r) TEST(algorithms, index_into) { using ivector = std::vector<std::ptrdiff_t>; - using arb::util::size; + using std::size; using arb::util::index_into; using arb::util::assign_from; using arb::util::make_range; diff --git a/test/unit/test_cycle.cpp b/test/unit/test_cycle.cpp index ec935ae1d8265cdac20cf01d22e481cb335aa841..a9d8b5e00e201fe8306d5aa1f7816bd07cd513b8 100644 --- a/test/unit/test_cycle.cpp +++ b/test/unit/test_cycle.cpp @@ -113,7 +113,7 @@ TEST(cycle_iterator, carray) { int values[] = { 4, 2, 3 }; auto cycle_iter = util::make_cyclic_iterator(std::cbegin(values), std::cend(values)); - auto values_size = util::size(values); + auto values_size = std::size(values); for (auto i = 0u; i < 2*values_size; ++i) { EXPECT_EQ(values[i % values_size], *cycle_iter++); } @@ -201,7 +201,7 @@ TEST(cycle_iterator, order) { EXPECT_TRUE(c1 <= c2); EXPECT_TRUE(c1 >= c2); - c2 += util::size(values); + c2 += std::size(values); EXPECT_TRUE(c1 < c2); EXPECT_FALSE(c2 < c1); diff --git a/test/unit/test_fvm_layout.cpp b/test/unit/test_fvm_layout.cpp index dc5f9527e8cd3764f632a0dd4abae9672e9929e0..0f55d42991f9c80279df36fb895fbce4e5cd4a4c 100644 --- a/test/unit/test_fvm_layout.cpp +++ b/test/unit/test_fvm_layout.cpp @@ -176,9 +176,9 @@ struct exp_instance { template <typename Seq> exp_instance(int cv, const Seq& tgts, double e, double tau): - cv(cv), multiplicity(util::size(tgts)), e(e), tau(tau) + cv(cv), multiplicity(std::size(tgts)), e(e), tau(tau) { - targets.reserve(util::size(tgts)); + targets.reserve(std::size(tgts)); for (auto t: tgts) targets.push_back(t); util::sort(targets); } diff --git a/test/unit/test_intrin.cpp b/test/unit/test_intrin.cpp index e0b3d3855912fac0fe96eec128044354ddda5a1e..4910cdb904d397e68d3f17217f370771ed5424b5 100644 --- a/test/unit/test_intrin.cpp +++ b/test/unit/test_intrin.cpp @@ -10,7 +10,7 @@ using namespace arb::multicore; using arb::util::make_span; -using arb::util::size; +using std::size; constexpr double dqnan = std::numeric_limits<double>::quiet_NaN(); constexpr double dmax = std::numeric_limits<double>::max(); diff --git a/test/unit/test_intrin.cu b/test/unit/test_intrin.cu index 50ae74a7f254ae1a0f81f6e369cf2e12a0c4a924..b8d1da9c9544755407713a80f5a4a01746c0a86f 100644 --- a/test/unit/test_intrin.cu +++ b/test/unit/test_intrin.cu @@ -4,9 +4,8 @@ #include "backends/gpu/gpu_api.hpp" #include "backends/gpu/math_cu.hpp" -#include "memory/memory.hpp" -#include "util/rangeutil.hpp" -#include "util/span.hpp" + +#include "gpu_vector.hpp" namespace kernels { template <typename T> @@ -45,15 +44,13 @@ namespace kernels { TEST(gpu_intrinsics, gpu_atomic_add) { int expected = (128*129)/2; - arb::memory::device_vector<float> f(1); - f[0] = 0.f; + gpu_vector<float> f(std::vector<float>{0.f}); kernels::test_atomic_add<<<1, 128>>>(f.data()); EXPECT_EQ(float(expected), f[0]); - arb::memory::device_vector<double> d(1); - d[0] = 0.f; + gpu_vector<double> d(std::vector<double>{0.}); kernels::test_atomic_add<<<1, 128>>>(d.data()); @@ -64,15 +61,13 @@ TEST(gpu_intrinsics, gpu_atomic_add) { TEST(gpu_intrinsics, gpu_atomic_sub) { int expected = -(128*129)/2; - arb::memory::device_vector<float> f(1); - f[0] = 0.f; + gpu_vector<float> f(std::vector<float>{0.f}); kernels::test_atomic_sub<<<1, 128>>>(f.data()); EXPECT_EQ(float(expected), f[0]); - arb::memory::device_vector<double> d(1); - d[0] = 0.f; + gpu_vector<double> d(std::vector<double>{0.}); kernels::test_atomic_sub<<<1, 128>>>(d.data()); @@ -97,38 +92,36 @@ TEST(gpu_intrinsics, minmax) { { 0, -inf, -inf, 0}, }; - const auto n = arb::util::size(inputs); - - arb::memory::device_vector<double> lhs(n); - arb::memory::device_vector<double> rhs(n); - arb::memory::device_vector<double> result(n); + const int n = inputs.size(); - using arb::util::make_span; + gpu_vector<double> lhs(n); + gpu_vector<double> rhs(n); + gpu_vector<double> result(n); - for (auto i: make_span(0, n)) { + for (int i=0; i<n; ++i) { lhs[i] = inputs[i].lhs; rhs[i] = inputs[i].rhs; } // test min kernels::test_min<<<1, n>>>(lhs.data(), rhs.data(), result.data()); - for (auto i: make_span(0, n)) { + for (int i=0; i<n; ++i) { EXPECT_EQ(double(result[i]), inputs[i].expected_min); } kernels::test_min<<<1, n>>>(rhs.data(), lhs.data(), result.data()); - for (auto i: make_span(0, n)) { + for (int i=0; i<n; ++i) { EXPECT_EQ(double(result[i]), inputs[i].expected_min); } // test max kernels::test_max<<<1, n>>>(lhs.data(), rhs.data(), result.data()); - for (auto i: make_span(0, n)) { + for (int i=0; i<n; ++i) { EXPECT_EQ(double(result[i]), inputs[i].expected_max); } kernels::test_max<<<1, n>>>(rhs.data(), lhs.data(), result.data()); - for (auto i: make_span(0, n)) { + for (int i=0; i<n; ++i) { EXPECT_EQ(double(result[i]), inputs[i].expected_max); } } @@ -137,16 +130,15 @@ TEST(gpu_intrinsics, exprelr) { constexpr double dmin = std::numeric_limits<double>::min(); constexpr double dmax = std::numeric_limits<double>::max(); constexpr double deps = std::numeric_limits<double>::epsilon(); - double inputs[] = {-1., -0., 0., 1., -dmax, -dmin, dmin, dmax, -deps, deps, 10*deps, 100*deps, 1000*deps}; + std::vector<double> inputs{-1., -0., 0., 1., -dmax, -dmin, dmin, dmax, -deps, deps, 10*deps, 100*deps, 1000*deps}; - auto n = arb::util::size(inputs); - arb::memory::device_vector<double> x(arb::memory::host_view<double>(inputs, n)); - arb::memory::device_vector<double> result(n); + auto n = inputs.size(); + gpu_vector<double> x(inputs); + gpu_vector<double> result(n); kernels::test_exprelr<<<1,n>>>(x.data(), result.data()); - auto index = arb::util::make_span(0, n); - for (auto i: index) { + for (unsigned i=0; i<n; ++i) { auto x = inputs[i]; double expected = std::fabs(x)<deps? 1.0: x/std::expm1(x); double error = std::fabs(expected-double(result[i])); diff --git a/test/unit/test_matrix_cpuvsgpu.cpp b/test/unit/test_matrix_cpuvsgpu.cpp index 07601e1a58cdd238cbebcea7a223d652a6556237..ed751b21a2d957cc4b189b6e5788dba005cab147 100644 --- a/test/unit/test_matrix_cpuvsgpu.cpp +++ b/test/unit/test_matrix_cpuvsgpu.cpp @@ -131,7 +131,7 @@ TEST(matrix, assemble) m_gpu.assemble(on_gpu(dt), gpu_array(group_size, -64), gpu_array(group_size, 10), gpu_array(group_size, 3)); m_gpu.solve(x_d); auto result_g = on_host(x_d); - + // Compare the GPU and CPU results. // Cast result to float, because we are happy to ignore small differencs EXPECT_TRUE(seq_almost_eq<float>(result_h, result_g)); diff --git a/test/unit/test_matrix.cu b/test/unit/test_matrix_gpu.cpp similarity index 100% rename from test/unit/test_matrix.cu rename to test/unit/test_matrix_gpu.cpp diff --git a/test/unit/test_partition.cpp b/test/unit/test_partition.cpp index d8c160c498eecad5197b2a10f002c583afe33a3e..cd67bc2f4efc0d72f81392130465ff8e2e98055b 100644 --- a/test/unit/test_partition.cpp +++ b/test/unit/test_partition.cpp @@ -83,7 +83,7 @@ TEST(partition, partition_view_non_numeric) { TEST(partition, make_partition_in_place) { unsigned sizes[] = { 7, 3, 0, 2 }; - unsigned part_store[util::size(sizes)+1]; + unsigned part_store[std::size(sizes)+1]; auto p = util::make_partition(util::partition_in_place, part_store, sizes, 10u); ASSERT_EQ(4u, p.size()); diff --git a/test/unit/test_piecewise.cpp b/test/unit/test_piecewise.cpp index 65b74361eb74186dffab9ce942ba6310a04560ff..cac245819ff05cb46f951d81d1e14408d8738333 100644 --- a/test/unit/test_piecewise.cpp +++ b/test/unit/test_piecewise.cpp @@ -239,21 +239,21 @@ TEST(piecewise, equal_range) { ASSERT_EQ(er0.first, er0.second); auto er1 = p.equal_range(1.0); - ASSERT_EQ(1u, er1.second-er1.first); + ASSERT_EQ(1, er1.second-er1.first); EXPECT_EQ(10, er1.first->second); auto er2 = p.equal_range(2.0); - ASSERT_EQ(2u, er2.second-er2.first); + ASSERT_EQ(2, er2.second-er2.first); auto iter = er2.first; EXPECT_EQ(10, iter++->second); EXPECT_EQ(9, iter->second); auto er3_5 = p.equal_range(3.5); - ASSERT_EQ(1u, er3_5.second-er3_5.first); + ASSERT_EQ(1, er3_5.second-er3_5.first); EXPECT_EQ(8, er3_5.first->second); auto er4 = p.equal_range(4.0); - ASSERT_EQ(1u, er4.second-er4.first); + ASSERT_EQ(1, er4.second-er4.first); EXPECT_EQ(8, er4.first->second); auto er5 = p.equal_range(5.0); @@ -267,13 +267,13 @@ TEST(piecewise, equal_range) { ASSERT_EQ(er0.first, er0.second); auto er1 = p.equal_range(1.0); - ASSERT_EQ(2u, er1.second-er1.first); + ASSERT_EQ(2, er1.second-er1.first); auto iter = er1.first; EXPECT_EQ(10, iter++->second); EXPECT_EQ(11, iter++->second); auto er2 = p.equal_range(2.0); - ASSERT_EQ(4u, er2.second-er2.first); + ASSERT_EQ(4, er2.second-er2.first); iter = er2.first; EXPECT_EQ(11, iter++->second); EXPECT_EQ(12, iter++->second); @@ -281,7 +281,7 @@ TEST(piecewise, equal_range) { EXPECT_EQ(14, iter++->second); auto er3 = p.equal_range(3.0); - ASSERT_EQ(2u, er3.second-er3.first); + ASSERT_EQ(2, er3.second-er3.first); iter = er3.first; EXPECT_EQ(14, iter++->second); EXPECT_EQ(15, iter++->second); diff --git a/test/unit/test_range.cpp b/test/unit/test_range.cpp index ff1e22765734a64b0b5aaca867cd2ff0dd0259f9..9b6a14f1c123bf78a17b4316d2b5067d3ace8159 100644 --- a/test/unit/test_range.cpp +++ b/test/unit/test_range.cpp @@ -421,7 +421,7 @@ TEST(range, assign_from) { { std::vector<int> copy = util::assign_from(in); - for (auto i=0u; i<util::size(in); ++i) { + for (auto i=0u; i<std::size(in); ++i) { EXPECT_EQ(in[i], copy[i]); } } @@ -429,7 +429,7 @@ TEST(range, assign_from) { { std::vector<int> copy = util::assign_from( util::transform_view(in, [](int i) {return 2*i;})); - for (auto i=0u; i<util::size(in); ++i) { + for (auto i=0u; i<std::size(in); ++i) { EXPECT_EQ(2*in[i], copy[i]); } } diff --git a/test/unit/test_reduce_by_key.cu b/test/unit/test_reduce_by_key.cu index 5d164a25252aaf98a2aff6aabf452929b1ae2ed4..30998cd1e8327da63f6a9a51b5fd5a74f3d6797d 100644 --- a/test/unit/test_reduce_by_key.cu +++ b/test/unit/test_reduce_by_key.cu @@ -1,11 +1,11 @@ #include "../gtest.h" +#include <algorithm> #include <vector> #include <backends/gpu/reduce_by_key.hpp> -#include <memory/memory.hpp> -#include <util/span.hpp> -#include <util/rangeutil.hpp> + +#include "gpu_vector.hpp" using namespace arb; @@ -25,37 +25,35 @@ std::vector<T> reduce(const std::vector<T>& in, size_t n_out, const std::vector< EXPECT_EQ(in.size(), index.size()); EXPECT_TRUE(std::is_sorted(index.begin(), index.end())); - using array = memory::device_vector<T>; - using iarray = memory::device_vector<int>; + using array = gpu_vector<T>; + using iarray = gpu_vector<int>; int n = in.size(); - array src = memory::make_const_view(in); - iarray idx = memory::make_const_view(index); - array dst(n_out, 0); + array src(in); + iarray idx(index); + array dst(std::vector<T>(n_out, 0)); unsigned grid_dim = (n-1)/block_dim + 1; reduce_kernel<<<grid_dim, block_dim>>>(src.data(), dst.data(), idx.data(), n); - std::vector<T> out(n_out); - memory::copy(dst, memory::make_view(out)); - - return out; + return dst.host_vector(); } TEST(reduce_by_key, no_repetitions) { int n = 64; + std::vector<int> index(n); + for (int i=0; i<n; ++i) index[i] = i; + { std::vector<float> in(n, 1); - std::vector<int> index = util::assign_from(util::make_span(0, n)); auto out = reduce(in, n, index); for (auto o: out) EXPECT_EQ(o, 1.0f); } { std::vector<double> in(n, 1); - std::vector<int> index = util::assign_from(util::make_span(0, n)); auto out = reduce(in, n, index); for (auto o: out) EXPECT_EQ(o, 1.0); @@ -76,7 +74,8 @@ TEST(reduce_by_key, single_repeated_index) // Perform reduction of an ascending sequence of {1,2,3,...,n} // The expected result is n*(n+1)/2 for (auto n: {1, 2, 7, 31, 32, 33, 63, 64, 65, 128}) { - std::vector<double> in = util::assign_from(util::make_span(1, n+1)); + std::vector<double> in(n); + for (int i=0; i<n; ++i) in[i] = i+1; std::vector<int> index(n, 0); auto out = reduce(in, 1, index); @@ -86,8 +85,10 @@ TEST(reduce_by_key, single_repeated_index) TEST(reduce_by_key, scatter) { + // A monotonic sequence of keys with repetitions and gaps, for a reduction + // onto an array of length 12. + std::size_t n = 12; std::vector<int> index = {0,0,0,1,2,2,2,2,3,3,7,7,7,7,7,11}; - unsigned n = util::max_value(index)+1; std::vector<double> in(index.size(), 1); std::vector<double> expected = {3., 1., 4., 2., 0., 0., 0., 5., 0., 0., 0., 1.}; @@ -125,28 +126,27 @@ std::vector<T> reduce_twice(const std::vector<T>& in, size_t n_out, const std::v EXPECT_EQ(in.size(), index.size()); EXPECT_TRUE(std::is_sorted(index.begin(), index.end())); - using array = memory::device_vector<T>; - using iarray = memory::device_vector<int>; + using array = gpu_vector<T>; + using iarray = gpu_vector<int>; int n = in.size(); - array src = memory::make_const_view(in); - iarray idx = memory::make_const_view(index); - array dst(n_out, 0); + array src(in); + iarray idx(index); + array dst(std::vector<T>(n_out, 0)); unsigned grid_dim = (n-1)/block_dim + 1; reduce_twice_kernel<<<grid_dim, block_dim>>>(src.data(), dst.data(), idx.data(), n); - std::vector<T> out(n_out); - memory::copy(dst, memory::make_view(out)); - - return out; + return dst.host_vector(); } TEST(reduce_by_key, scatter_twice) { + // A monotonic sequence of keys with repetitions and gaps, for a reduction + // onto an array of length 12. + std::size_t n = 12; std::vector<int> index = {0,0,0,1,2,2,3,7,7,7,11}; - unsigned n = util::max_value(index)+1; std::vector<double> in(index.size(), 1); std::vector<double> expected = {6., 2., 4., 2., 0., 0., 0., 6., 0., 0., 0., 2.}; diff --git a/test/unit/test_scope_exit.cpp b/test/unit/test_scope_exit.cpp index 7f1890f09b31a4a11c7a8aa16069fce2ba07dd78..ec65fad3abf72b5a88834328179e5ae282318324 100644 --- a/test/unit/test_scope_exit.cpp +++ b/test/unit/test_scope_exit.cpp @@ -16,7 +16,8 @@ TEST(scope_exit, basic) { } TEST(scope_exit, noexceptcall) { - auto guard1 = on_scope_exit([] {}); + bool chill = true; + auto guard1 = on_scope_exit([&] { if (!chill) throw 0; }); using G1 = decltype(guard1); EXPECT_FALSE(noexcept(guard1.~G1())); diff --git a/test/unit/test_transform.cpp b/test/unit/test_transform.cpp index 105c07a58bc7e3367be7d09e7123b770c1ad3db7..eaf008be87ab95abfa2ee0a4c449860785da9d55 100644 --- a/test/unit/test_transform.cpp +++ b/test/unit/test_transform.cpp @@ -19,7 +19,7 @@ TEST(transform, transform_view) { auto r = util::transform_view(fl, [](int i) { return i*i+0.5; }); - EXPECT_EQ(5u, util::size(r)); + EXPECT_EQ(5u, std::size(r)); EXPECT_EQ(16.5, *(std::next(std::begin(r), 1))); std::copy(r.begin(), r.end(), std::back_inserter(result)); @@ -122,7 +122,7 @@ TEST(indirect, nocopy) { TEST(indirect, nomove) { testing::nomove<double> data[6]; - for (unsigned i=0; i<util::size(data); ++i) data[i].value = 10.+i; + for (unsigned i=0; i<std::size(data); ++i) data[i].value = 10.+i; unsigned map_reverse[6] = {5, 4, 3, 2, 1, 0}; auto reversed = util::indirect_view(data, map_reverse); @@ -156,7 +156,7 @@ TEST(indirect, modifying) { // permuted[4] = data[1] // permuted[5] = data[0] - for (unsigned i = 0; i<util::size(permuted); ++i) { + for (unsigned i = 0; i<std::size(permuted); ++i) { permuted[i] = 10.+i; } std::vector<double> expected = {15., 14., 12.}; diff --git a/test/unit/test_vector.cu b/test/unit/test_vector_gpu.cpp similarity index 76% rename from test/unit/test_vector.cu rename to test/unit/test_vector_gpu.cpp index 5a195e48a9160d684376eae2f17d16e05d7c26d8..54a4fd8a114cfbf1e5e7370e6e0723b66cc11a52 100644 --- a/test/unit/test_vector.cu +++ b/test/unit/test_vector_gpu.cpp @@ -38,21 +38,6 @@ TEST(vector, make_host_devicevector) { EXPECT_TRUE((std::is_same<int, target_type::value_type>::value)); } -// test that memory::on_gpu correctly makes a view of a device vector -/* -TEST(vector, make_gpu_devicevector) { - memory::device_vector<int> dvec(10); - auto view = memory::on_gpu(dvec); - using target_type = std::decay_t<decltype(view)>; - EXPECT_EQ(view.size(), dvec.size()); - EXPECT_EQ(view.data(), dvec.data()); - EXPECT_TRUE(memory::util::is_on_gpu<target_type>()); - EXPECT_FALSE(memory::util::is_on_host<target_type>()); - - EXPECT_TRUE((std::is_same<int, target_type::value_type>::value)); -} -*/ - // // fill // @@ -72,21 +57,6 @@ TEST(vector, fill_gpu) { EXPECT_EQ(value, double(v[i])); } } - - // fill an array view - /* - memory::device_vector<float> ubervec(N); - for (auto n : make_span(0u, N)) { - float value = float((n+1)/2.f); - // make a view of a sub-range of the std::vector ubervec - auto v = ubervec(0, n); - memory::fill(v, value); - - for (auto i: make_span(0u, n)) { - EXPECT_EQ(float(v[i]), value); - } - } - */ } //