diff --git a/CMakeLists.txt b/CMakeLists.txt index 4220519f3dbcc51ee27ad5e73ef732b287506277..57d4bdfe78bda2d7f60d85d0d62f76c3b4177ccf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,9 +7,9 @@ enable_language(CXX) # Hide warnings about mixing old and new signatures for target_link_libraries. # These can't be avoided, because the FindCUDA packed provided by CMake before # version 3.9 uses the old signature, while other packages use the new signature. -if ("${CMAKE_VERSION}" MATCHES "^3.[0-8].") +#if ("${CMAKE_VERSION}" MATCHES "^3.[0-8].") cmake_policy(SET CMP0023 OLD) -endif() +#endif() # save incoming CXX flags for forwarding to modcc external project set(SAVED_CXX_FLAGS "${CMAKE_CXX_FLAGS}") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a0c5ac1d3d19c577b7e1f6b9571234e7604a176..db49ff64af85d978b2b650668020c6eda73b847f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,6 +28,7 @@ set(CUDA_SOURCES backends/gpu/fvm.cu backends/gpu/multi_event_stream.cu backends/gpu/fill.cu + backends/gpu/kernels/test_thresholds.cu backends/gpu/kernels/take_samples.cu ) diff --git a/src/backends/gpu/fvm.hpp b/src/backends/gpu/fvm.hpp index 7002c7aefafb03370911f98e93920cb655c705c2..35be89afe78810f81604d04675698751b33edc3c 100644 --- a/src/backends/gpu/fvm.hpp +++ b/src/backends/gpu/fvm.hpp @@ -92,8 +92,7 @@ struct backend { return mech_map_.count(name)>0; } - using threshold_watcher = - nest::mc::gpu::threshold_watcher<value_type, size_type>; + using threshold_watcher = nest::mc::gpu::threshold_watcher; // perform min/max reductions on 'array' type template <typename V> diff --git a/src/backends/gpu/kernels/detail.hpp b/src/backends/gpu/kernels/detail.hpp index e33e933bc36d47b98cc3716eca3349b08f0f8b81..d2c1be1fd63562510c5610e8d187056eed780b5e 100644 --- a/src/backends/gpu/kernels/detail.hpp +++ b/src/backends/gpu/kernels/detail.hpp @@ -48,8 +48,8 @@ constexpr inline unsigned block_count(unsigned n, unsigned block_size) { // The smallest size of a buffer required to store n items in such that the // buffer has size that is a multiple of block_dim. -constexpr inline unsigned padded_size (unsigned n, unsigned block_dim) { - return n%block_dim ? n+block_dim-(n%block_dim): n; +constexpr inline unsigned padded_size(unsigned n, unsigned block_dim) { + return block_dim*block_count(n, block_dim); } // Placeholders to use for mark padded locations in data structures that use diff --git a/src/backends/gpu/kernels/stack.hpp b/src/backends/gpu/kernels/stack.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d303144b5093ef793beb9748a62ab4e3c5513d13 --- /dev/null +++ b/src/backends/gpu/kernels/stack.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include "../stack_common.hpp" + +namespace nest { +namespace mc { +namespace gpu { + +template <typename T> +__device__ +void push_back(stack_storage<T>& s, const T& value) { + // Atomically increment the stores counter. The atomicAdd returns + // the value of stores before the increment, which is the location + // at which this thread can store value. + unsigned position = atomicAdd(&(s.stores), 1u); + + // It is possible that stores>capacity. In this case, only capacity + // entries are stored, and additional values are lost. The stores + // contains the total number of attempts to push. + if (position<s.capacity) { + s.data[position] = value; + } + + // Note: there are no guards against s.stores overflowing: in which + // case the stores counter would start again from 0, and values would + // be overwritten from the front of the stack. +} + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/kernels/test_thresholds.cu b/src/backends/gpu/kernels/test_thresholds.cu new file mode 100644 index 0000000000000000000000000000000000000000..946a0e2974f23c62cea9126f29f5872ddfacbd0f --- /dev/null +++ b/src/backends/gpu/kernels/test_thresholds.cu @@ -0,0 +1,79 @@ +#include <backends/fvm_types.hpp> + +#include "detail.hpp" +#include "stack.hpp" + +namespace nest { +namespace mc { +namespace gpu { + +/// kernel used to test for threshold crossing test code. +/// params: +/// t : current time (ms) +/// t_prev : time of last test (ms) +/// size : number of values to test +/// is_crossed : crossing state at time t_prev (true or false) +/// prev_values : values at sample points (see index) sampled at t_prev +/// index : index with locations in values to test for crossing +/// values : values at t_prev +/// thresholds : threshold values to watch for crossings +__global__ +void test_thresholds_kernel( + const fvm_size_type* cv_to_cell, const fvm_value_type* t_after, const fvm_value_type* t_before, + int size, + stack_storage<threshold_crossing>& stack, + fvm_size_type* is_crossed, fvm_value_type* prev_values, + const fvm_size_type* cv_index, const fvm_value_type* values, const fvm_value_type* thresholds) +{ + int i = threadIdx.x + blockIdx.x*blockDim.x; + + bool crossed = false; + float crossing_time; + + if (i<size) { + // Test for threshold crossing + const auto cv = cv_index[i]; + const auto cell = cv_to_cell[cv]; + const auto v_prev = prev_values[i]; + const auto v = values[cv]; + const auto thresh = thresholds[i]; + + if (!is_crossed[i]) { + if (v>=thresh) { + // The threshold has been passed, so estimate the time using + // linear interpolation + auto pos = (thresh - v_prev)/(v - v_prev); + crossing_time = impl::lerp(t_before[cell], t_after[cell], pos); + + is_crossed[i] = 1; + crossed = true; + } + } + else if (v<thresh) { + is_crossed[i]=0; + } + + prev_values[i] = v; + } + + if (crossed) { + push_back(stack, {fvm_size_type(i), crossing_time}); + } +} + +void test_thresholds( + const fvm_size_type* cv_to_cell, const fvm_value_type* t_after, const fvm_value_type* t_before, + int size, + stack_storage<threshold_crossing>& stack, + fvm_size_type* is_crossed, fvm_value_type* prev_values, + const fvm_size_type* cv_index, const fvm_value_type* values, const fvm_value_type* thresholds) +{ + constexpr int block_dim = 128; + const int grid_dim = impl::block_count(size, block_dim); + test_thresholds_kernel<<<grid_dim, block_dim>>>( + cv_to_cell, t_after, t_before, size, stack, is_crossed, prev_values, cv_index, values, thresholds); +} + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/kernels/test_thresholds.hpp b/src/backends/gpu/kernels/test_thresholds.hpp index 4c23d13d67f6a0df63bf326e75c645c763f0f807..c7b8526c5324f427c0a5e28bb388704c929fd540 100644 --- a/src/backends/gpu/kernels/test_thresholds.hpp +++ b/src/backends/gpu/kernels/test_thresholds.hpp @@ -1,63 +1,19 @@ #pragma once +#include <backends/fvm_types.hpp> + +#include "stack.hpp" + namespace nest { namespace mc { namespace gpu { -/// kernel used to test for threshold crossing test code. -/// params: -/// t : current time (ms) -/// t_prev : time of last test (ms) -/// size : number of values to test -/// is_crossed : crossing state at time t_prev (true or false) -/// prev_values : values at sample points (see index) sampled at t_prev -/// index : index with locations in values to test for crossing -/// values : values at t_prev -/// thresholds : threshold values to watch for crossings -template <typename T, typename I, typename Stack> -__global__ -void test_thresholds( - const I* cv_to_cell, const T* t_after, const T* t_before, +extern void test_thresholds( + const fvm_size_type* cv_to_cell, const fvm_value_type* t_after, const fvm_value_type* t_before, int size, - Stack& stack, - I* is_crossed, T* prev_values, - const I* cv_index, const T* values, const T* thresholds) -{ - int i = threadIdx.x + blockIdx.x*blockDim.x; - - bool crossed = false; - float crossing_time; - - if (i<size) { - // Test for threshold crossing - const auto cv = cv_index[i]; - const auto cell = cv_to_cell[cv]; - const auto v_prev = prev_values[i]; - const auto v = values[cv]; - const auto thresh = thresholds[i]; - - if (!is_crossed[i]) { - if (v>=thresh) { - // The threshold has been passed, so estimate the time using - // linear interpolation - auto pos = (thresh - v_prev)/(v - v_prev); - crossing_time = impl::lerp(t_before[cell], t_after[cell], pos); - - is_crossed[i] = 1; - crossed = true; - } - } - else if (v<thresh) { - is_crossed[i]=0; - } - - prev_values[i] = v; - } - - if (crossed) { - stack.push_back({I(i), crossing_time}); - } -} + stack_storage<threshold_crossing>& stack, + fvm_size_type* is_crossed, fvm_value_type* prev_values, + const fvm_size_type* cv_index, const fvm_value_type* values, const fvm_value_type* thresholds); } // namespace gpu } // namespace mc diff --git a/src/backends/gpu/stack.hpp b/src/backends/gpu/stack.hpp index 2cad1d10ba55b4189c7a438e3bed4401d88da2c4..ce00d9bc7a5ad2f7cbcd7542782f58a04c19919b 100644 --- a/src/backends/gpu/stack.hpp +++ b/src/backends/gpu/stack.hpp @@ -1,6 +1,9 @@ #pragma once +#include <algorithm> + #include <memory/allocator.hpp> +#include "stack_common.hpp" namespace nest { namespace mc { @@ -22,91 +25,95 @@ namespace gpu { template <typename T> class stack { using value_type = T; - using allocator = memory::managed_allocator<value_type>; - - // The number of items of type value_type that can be stored in the stack - unsigned capacity_; - - // The number of items that have been stored - unsigned size_; - - // Memory containing the value buffer - // Stored in managed memory to facilitate host-side access of values - // pushed from kernels on the device. - value_type* data_; + template <typename U> + using allocator = memory::managed_allocator<U>; + + using storage_type = stack_storage<value_type>; + storage_type* storage_; + + storage_type* create_storage(unsigned n) { + auto p = allocator<storage_type>().allocate(1); + p->capacity = n; + p->stores = 0; + p->data = allocator<value_type>().allocate(n); + return p; + } public: + stack& operator=(const stack& other) = delete; + stack(const stack& other) = delete; + + stack(): storage_(create_storage(0)) {} - stack(unsigned capacity): - capacity_(capacity), size_(0u) - { - data_ = allocator().allocate(capacity_); + stack(stack&& other): storage_(create_storage(0)) { + std::swap(storage_, other.storage_); } - ~stack() { - allocator().deallocate(data_, capacity_); + stack& operator=(stack&& other) { + std::swap(storage_, other.storage_); + return *this; } - // Append a new value to the stack. - // The value will only be appended if do_push is true. - __device__ - void push_back(const value_type& value) { - // Atomically increment the size_ counter. The atomicAdd returns - // the value of size_ before the increment, which is the location - // at which this thread can store value. - unsigned position = atomicAdd(&size_, 1u); + explicit stack(unsigned capacity): storage_(create_storage(capacity)) {} - // It is possible that size_>capacity_. In this case, only capacity_ - // entries are stored, and additional values are lost. The size_ - // will contain the total number of attempts to push, - if (position<capacity_) { - data_[position] = value; - } + ~stack() { + allocator<value_type>().deallocate(storage_->data, storage_->capacity); + allocator<storage_type>().deallocate(storage_, 1); } - __host__ void clear() { - size_ = 0; + storage_->stores = 0u; } // The number of items that have been pushed back on the stack. - // size may exceed capacity, which indicates that the caller attempted + // This may exceed capacity, which indicates that the caller attempted // to push back more values than there was space to store. - __host__ __device__ + unsigned pushes() const { + return storage_->stores; + } + + bool overflow() const { + return storage_->stores>capacity(); + } + + // The number of values stored in the stack. unsigned size() const { - return size_; + return std::min(storage_->stores, storage_->capacity); } // The maximum number of items that can be stored in the stack. - __host__ __device__ unsigned capacity() const { - return capacity_; + return storage_->capacity; + } + + storage_type& storage() { + return *storage_; } value_type& operator[](unsigned i) { - EXPECTS(i<size_ && i<capacity_); - return data_[i]; + EXPECTS(i<size()); + return storage_->data[i]; } value_type& operator[](unsigned i) const { - EXPECTS(i<size_ && i<capacity_); - return data_[i]; + EXPECTS(i<size()); + return storage_->data[i]; } value_type* begin() { - return data_; + return storage_->data; } const value_type* begin() const { - return data_; + return storage_->data; } value_type* end() { - // Take care of the case where size_>capacity_. - return data_ + (size_>capacity_? capacity_: size_); + // Take care of the case where size>capacity. + return storage_->data + size(); } const value_type* end() const { - // Take care of the case where size_>capacity_. - return data_ + (size_>capacity_? capacity_: size_); + // Take care of the case where size>capacity. + return storage_->data + size(); } }; diff --git a/src/backends/gpu/stack_common.hpp b/src/backends/gpu/stack_common.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7473007976a7a44a66f6bb95330f672b5ccba535 --- /dev/null +++ b/src/backends/gpu/stack_common.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include <backends/fvm_types.hpp> + +namespace nest { +namespace mc { +namespace gpu { + +// stores a single crossing event +struct threshold_crossing { + fvm_size_type index; // index of variable + fvm_value_type time; // time of crossing + + friend bool operator==(threshold_crossing l, threshold_crossing r) { + return l.index==r.index && l.time==r.time; + } +}; + +// Concrete storage of gpu stack datatype. +// The stack datatype resides in host memory, and holds a pointer to the +// stack_storage in managed memory, which can be accessed by both host and +// gpu code. +template <typename T> +struct stack_storage { + using value_type = T; + + // The number of items of type value_type that can be stored in the stack + unsigned capacity; + + // The number of items that have been stored. + // This may exceed capacity if more stores were attempted than it is + // possible to store, in which case only the first capacity values are valid. + unsigned stores; + + // Memory containing the value buffer + value_type* data; +}; + + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/threshold_watcher.hpp b/src/backends/gpu/threshold_watcher.hpp index 39a2a9e8dd5df308cf6db13799fdcb1579a92f85..b63ec72ee73e11935e700882b788dbfb94619340 100644 --- a/src/backends/gpu/threshold_watcher.hpp +++ b/src/backends/gpu/threshold_watcher.hpp @@ -6,6 +6,7 @@ #include "managed_ptr.hpp" #include "stack.hpp" +#include "backends/fvm_types.hpp" #include "kernels/test_thresholds.hpp" namespace nest { @@ -14,27 +15,23 @@ namespace gpu { /// threshold crossing logic /// used as part of spike detection back end -template <typename T, typename I> class threshold_watcher { public: - using value_type = T; - using size_type = I; + using value_type = fvm_value_type; + using size_type = fvm_size_type; - using array = memory::device_vector<T>; - using iarray = memory::device_vector<I>; + using array = memory::device_vector<value_type>; + using iarray = memory::device_vector<size_type>; using const_view = typename array::const_view_type; using const_iview = typename iarray::const_view_type; - /// stores a single crossing event - struct threshold_crossing { - size_type index; // index of variable - value_type time; // time of crossing - }; - using stack_type = stack<threshold_crossing>; threshold_watcher() = default; + threshold_watcher(threshold_watcher&& other) = default; + threshold_watcher& operator=(threshold_watcher&& other) = default; + threshold_watcher( const_iview vec_ci, const_view vec_t_before, @@ -51,7 +48,9 @@ public: thresholds_(memory::make_const_view(thresh)), prev_values_(values), is_crossed_(size()), - stack_(make_managed_ptr<stack_type>(10*size())) + // TODO: allocates enough space for 10 spikes per watch. + // A more robust approach might be needed to avoid overflows. + stack_(10*size()) { reset(); } @@ -59,7 +58,7 @@ public: /// Remove all stored crossings that were detected in previous calls /// to test() void clear_crossings() { - stack_->clear(); + stack_.clear(); } /// Reset state machine for each detector. @@ -89,7 +88,10 @@ public: } const std::vector<threshold_crossing> crossings() const { - return std::vector<threshold_crossing>(stack_->begin(), stack_->end()); + if (stack_.overflow()) { + throw std::runtime_error("GPU spike buffer overflow."); + } + return std::vector<threshold_crossing>(stack_.begin(), stack_.end()); } /// Tests each target for changed threshold state. @@ -97,18 +99,17 @@ public: /// crossed since current time t, and the last time the test was /// performed. void test() { - constexpr int block_dim = 128; - const int grid_dim = (size()+block_dim-1)/block_dim; - test_thresholds<<<grid_dim, block_dim>>>( + test_thresholds( cv_to_cell_.data(), t_after_.data(), t_before_.data(), size(), - *stack_, + stack_.storage(), is_crossed_.data(), prev_values_.data(), cv_index_.data(), values_.data(), thresholds_.data()); - // Check that the number of spikes has not exceeded - // the capacity of the stack. - EXPECTS(stack_->size() <= stack_->capacity()); + // Check that the number of spikes has not exceeded capacity. + // ATTENTION: requires cudaDeviceSynchronize to avoid simultaneous + // host-device managed memory access. + EXPECTS((cudaDeviceSynchronize(), !stack_.overflow())); } /// the number of threashold values that are being monitored @@ -131,7 +132,7 @@ private: array prev_values_; // values at previous sample time: on gpu iarray is_crossed_; // bool flag for state of each watch: on gpu - managed_ptr<stack_type> stack_; + stack_type stack_; }; } // namespace gpu diff --git a/src/communication/communicator.hpp b/src/communication/communicator.hpp index e6a5073969e075a785f5cfcadbddf7c292ab7918..8a974d856a94367fed2b8f2862652d04f19ea98d 100644 --- a/src/communication/communicator.hpp +++ b/src/communication/communicator.hpp @@ -1,15 +1,16 @@ #pragma once #include <algorithm> +#include <functional> #include <iostream> -#include <vector> #include <random> -#include <functional> +#include <utility> +#include <vector> #include <algorithms.hpp> #include <common_types.hpp> -#include <connection.hpp> #include <communication/gathered_vector.hpp> +#include <connection.hpp> #include <domain_decomposition.hpp> #include <event_queue.hpp> #include <recipe.hpp> @@ -55,7 +56,7 @@ public: // For caching information about each cell struct gid_info { - using connection_list = decltype(rec.connections_on(0)); + using connection_list = decltype(std::declval<recipe>().connections_on(0)); cell_gid_type gid; cell_gid_type local_group; connection_list conns; diff --git a/src/memory/allocator.hpp b/src/memory/allocator.hpp index 9add1a67bdd715baa0b0dc95592ea9dfaa02253b..3f77c08fa89d290ca0cd3c7d4971516b77057be1 100644 --- a/src/memory/allocator.hpp +++ b/src/memory/allocator.hpp @@ -148,7 +148,7 @@ namespace impl { void* ptr = reinterpret_cast<void *> (aligned_malloc<char, Alignment>(size)); - if(ptr == nullptr) { + if (!ptr) { return nullptr; } @@ -166,7 +166,7 @@ namespace impl { } void free_policy(void *ptr) { - if(ptr == nullptr) { + if (!ptr) { return; } cudaHostUnregister(ptr); @@ -189,9 +189,12 @@ namespace impl { static_assert(1024%Alignment==0, "CUDA managed memory is always aligned on 1024 byte boundaries"); void* allocate_policy(std::size_t n) { + if (!n) { + return nullptr; + } void* ptr; auto status = cudaMallocManaged(&ptr, n); - if(status != cudaSuccess) { + if (status != cudaSuccess) { LOG_ERROR("memory:: unable to allocate managed memory"); ptr = nullptr; } @@ -208,7 +211,9 @@ namespace impl { } void free_policy(void* p) { - cudaFree(p); + if (p) { + cudaFree(p); + } } }; @@ -278,11 +283,14 @@ public: } pointer allocate(size_type cnt, typename std::allocator<void>::const_pointer = 0) { - return reinterpret_cast<T*>(allocate_policy(cnt*sizeof(T))); + if (cnt) { + return reinterpret_cast<T*>(allocate_policy(cnt*sizeof(T))); + } + return nullptr; } - void deallocate(pointer p, size_type) { - if( p!=nullptr ) { + void deallocate(pointer p, size_type cnt) { + if (p) { free_policy(p); } } @@ -334,6 +342,13 @@ namespace util { return std::string("device_policy"); } }; + + template <> + struct type_printer<impl::cuda::managed_policy<>>{ + static std::string print() { + return std::string("managed_policy"); + } + }; #endif template <typename T, typename Policy> diff --git a/src/memory/definitions.hpp b/src/memory/definitions.hpp index 6254315d71515e423fc87a0446ee7a8f69312f01..3b0b32778a7e5b463d49b3b8358debfb6442d1ac 100644 --- a/src/memory/definitions.hpp +++ b/src/memory/definitions.hpp @@ -2,6 +2,7 @@ #include <cstddef> #include <sstream> +#include <typeinfo> namespace nest { namespace mc { @@ -67,7 +68,7 @@ namespace util { template <typename T> struct type_printer{ static std::string print() { - return std::string("T"); + return typeid(T).name(); } }; diff --git a/src/util/compat.hpp b/src/util/compat.hpp index 85d54ad254e5286bc49bb06bfbebda66a4b8ef10..977bce1205489d5ecd258119e106b98b326121c3 100644 --- a/src/util/compat.hpp +++ b/src/util/compat.hpp @@ -7,8 +7,7 @@ namespace compat { -template<int major=0, int minor=0, int patchlevel=0> -constexpr bool using_intel_compiler() { +constexpr bool using_intel_compiler(int major=0, int minor=0, int patchlevel=0) { #if defined(__INTEL_COMPILER) return __INTEL_COMPILER >= major*100 + minor && __INTEL_COMPILER_UPDATE >= patchlevel; @@ -17,12 +16,10 @@ constexpr bool using_intel_compiler() { #endif } -template<int major=0, int minor=0, int patchlevel=0> -constexpr bool using_gnu_compiler() { +constexpr bool using_gnu_compiler(int major=0, int minor=0, int patchlevel=0) { #if defined(__GNUC__) - constexpr int available = __GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__; - constexpr int required = major*10000 + minor*100 + patchlevel; - return available >= required; + return (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) + > (major*10000 + minor*100 + patchlevel); #else return false; #endif diff --git a/tests/unit/test_gpu_stack.cu b/tests/unit/test_gpu_stack.cu index ad20a4d5a1e2732f333041fac793e175c67505bc..343d4142abba91b98bdfb1382ad9438cc99a0fc7 100644 --- a/tests/unit/test_gpu_stack.cu +++ b/tests/unit/test_gpu_stack.cu @@ -1,5 +1,6 @@ #include "../gtest.h" +#include <backends/gpu/kernels/stack.hpp> #include <backends/gpu/stack.hpp> #include <backends/gpu/managed_ptr.hpp> @@ -18,9 +19,9 @@ TEST(stack, construction) { namespace kernels { template <typename F> __global__ - void push_back(gpu::stack<int>& s, F f) { + void push_back(gpu::stack_storage<int>& s, F f) { if (f(threadIdx.x)) { - s.push_back(threadIdx.x); + nest::mc::gpu::push_back(s, int(threadIdx.x)); } } @@ -52,28 +53,58 @@ TEST(stack, push_back) { const unsigned n = 10; EXPECT_TRUE(n%2 == 0); // require n is even for tests to work - auto s = gpu::make_managed_ptr<stack>(n); + auto s = stack(n); + auto& sstorage = s.storage(); - kernels::push_back<<<1, n>>>(*s, kernels::all_ftor()); + kernels::push_back<<<1, n>>>(sstorage, kernels::all_ftor()); cudaDeviceSynchronize(); - EXPECT_EQ(n, s->size()); - for (auto i=0; i<int(s->size()); ++i) { - EXPECT_EQ(i, (*s)[i]); + EXPECT_EQ(n, s.size()); + for (auto i=0; i<int(s.size()); ++i) { + EXPECT_EQ(i, s[i]); } - s->clear(); - kernels::push_back<<<1, n>>>(*s, kernels::even_ftor()); + s.clear(); + kernels::push_back<<<1, n>>>(sstorage, kernels::even_ftor()); cudaDeviceSynchronize(); - EXPECT_EQ(n/2, s->size()); - for (auto i=0; i<int(s->size())/2; ++i) { - EXPECT_EQ(2*i, (*s)[i]); + EXPECT_EQ(n/2, s.size()); + for (auto i=0; i<int(s.size())/2; ++i) { + EXPECT_EQ(2*i, s[i]); } - s->clear(); - kernels::push_back<<<1, n>>>(*s, kernels::odd_ftor()); + s.clear(); + kernels::push_back<<<1, n>>>(sstorage, kernels::odd_ftor()); cudaDeviceSynchronize(); - EXPECT_EQ(n/2, s->size()); - for (auto i=0; i<int(s->size())/2; ++i) { - EXPECT_EQ(2*i+1, (*s)[i]); + EXPECT_EQ(n/2, s.size()); + for (auto i=0; i<int(s.size())/2; ++i) { + EXPECT_EQ(2*i+1, s[i]); } } + +TEST(stack, overflow) { + using T = int; + using stack = gpu::stack<T>; + + const unsigned n = 10; + auto s = stack(n); + auto& sstorage = s.storage(); + EXPECT_FALSE(s.overflow()); + + // push 2n items into a stack of size n + kernels::push_back<<<1, 2*n>>>(sstorage, kernels::all_ftor()); + cudaDeviceSynchronize(); + EXPECT_EQ(n, s.size()); + EXPECT_EQ(2*n, s.pushes()); + EXPECT_TRUE(s.overflow()); +} + +TEST(stack, empty) { + using T = int; + using stack = gpu::stack<T>; + + stack s(0u); + + EXPECT_EQ(s.size(), 0u); + EXPECT_EQ(s.capacity(), 0u); + + EXPECT_EQ(s.storage().data, nullptr); +} diff --git a/tests/unit/test_spikes.cpp b/tests/unit/test_spikes.cpp index 5e759025138a493ebdc449b3ce7ac9c69c87e248..cd9929841ba64ea9df19187e176c00a1f234bd0f 100644 --- a/tests/unit/test_spikes.cpp +++ b/tests/unit/test_spikes.cpp @@ -18,7 +18,6 @@ using backend = USE_BACKEND; #endif TEST(spikes, threshold_watcher) { - using backend = multicore::backend; using size_type = backend::size_type; using value_type = backend::value_type; using array = backend::array; @@ -61,7 +60,7 @@ TEST(spikes, threshold_watcher) { // test again at t=1, with unchanged values // - nothing should change - util::fill(time_after, 1.); + memory::fill(time_after, 1.); watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_TRUE(watch.is_crossed(1)); @@ -72,7 +71,7 @@ TEST(spikes, threshold_watcher) { // - 2nd watch should now stop spiking memory::fill(values, 0.); memory::copy(time_after, time_before); - util::fill(time_after, 2.); + memory::fill(time_after, 2.); watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); @@ -100,7 +99,7 @@ TEST(spikes, threshold_watcher) { // - all watches should stop spiking memory::fill(values, 0.); memory::copy(time_after, time_before); - util::fill(time_after, 4.); + memory::fill(time_after, 4.); watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); @@ -111,7 +110,7 @@ TEST(spikes, threshold_watcher) { // - watch 3 should be spiking values[index[2]] = 6.; memory::copy(time_after, time_before); - util::fill(time_after, 5.); + memory::fill(time_after, 5.); watch.test(); EXPECT_FALSE(watch.is_crossed(0)); EXPECT_FALSE(watch.is_crossed(1)); @@ -144,7 +143,7 @@ TEST(spikes, threshold_watcher) { // memory::fill(values, 0); values[index[0]] = 10.; // first watch should be intialized to spiking state - util::fill(time_before, 0.); + memory::fill(time_before, 0.); watch.reset(); EXPECT_EQ(watch.crossings().size(), 0u); EXPECT_TRUE(watch.is_crossed(0));