From ddbece13ba9df2cd1a30b4c5fdfdffce4de31e08 Mon Sep 17 00:00:00 2001 From: Ben Cumming <louncharf@gmail.com> Date: Mon, 25 Sep 2017 13:15:44 +0200 Subject: [PATCH] Finish Seperable CUDA compilation (#356) Separate compilation for all CUDA code. * Move all CUDA kernels to their own .cu files, together with C++ function wrappers. * Compile all CUDA .cu files to a single static library. * Merge gpu and multicore backend validation tests. * Simply and clean up cruft from CMakeLists.txt files. --- mechanisms/CMakeLists.txt | 5 +- miniapp/CMakeLists.txt | 18 +- miniapp/miniapp.cu | 1 - modcc/cudaprinter.cpp | 7 +- modcc/modcc.cpp | 8 +- src/CMakeLists.txt | 27 +-- src/backends/fvm.hpp | 9 +- src/backends/gpu/{fvm.cu => fvm.cpp} | 0 src/backends/gpu/fvm.hpp | 20 +- ...assemble_matrix.hpp => assemble_matrix.cu} | 54 ++++- src/backends/gpu/kernels/detail.hpp | 49 ++-- src/backends/gpu/kernels/interleave.cu | 45 ++++ src/backends/gpu/kernels/interleave.hpp | 31 ++- .../{solve_matrix.hpp => solve_matrix.cu} | 38 ++- src/backends/gpu/kernels/stim_current.cu | 48 ++++ src/backends/gpu/kernels/time_ops.cu | 80 +++++++ src/backends/gpu/kernels/time_ops.hpp | 113 --------- src/backends/gpu/matrix_state_flat.hpp | 40 ++-- src/backends/gpu/matrix_state_interleaved.hpp | 66 ++++-- src/backends/gpu/stim_current.hpp | 16 ++ src/backends/gpu/stimulus.hpp | 39 +--- src/backends/gpu/time_ops.hpp | 25 ++ tests/unit/CMakeLists.txt | 10 +- tests/unit/test_matrix.cu | 4 + tests/unit/test_multi_event_stream.cu | 45 +--- tests/validation/CMakeLists.txt | 56 ++--- tests/validation/validate_ball_and_stick.cpp | 218 +++++++++++++++++- tests/validation/validate_ball_and_stick.cu | 26 --- tests/validation/validate_ball_and_stick.hpp | 191 --------------- tests/validation/validate_kinetic.cpp | 115 ++++++++- tests/validation/validate_kinetic.cu | 13 -- tests/validation/validate_kinetic.hpp | 99 -------- tests/validation/validate_soma.cpp | 70 +++++- tests/validation/validate_soma.cu | 9 - tests/validation/validate_soma.hpp | 64 ----- tests/validation/validate_synapses.cpp | 98 +++++++- tests/validation/validate_synapses.cu | 16 -- tests/validation/validate_synapses.hpp | 84 ------- 38 files changed, 978 insertions(+), 879 deletions(-) delete mode 100644 miniapp/miniapp.cu rename src/backends/gpu/{fvm.cu => fvm.cpp} (100%) rename src/backends/gpu/kernels/{assemble_matrix.hpp => assemble_matrix.cu} (71%) create mode 100644 src/backends/gpu/kernels/interleave.cu rename src/backends/gpu/kernels/{solve_matrix.hpp => solve_matrix.cu} (70%) create mode 100644 src/backends/gpu/kernels/stim_current.cu create mode 100644 src/backends/gpu/kernels/time_ops.cu delete mode 100644 src/backends/gpu/kernels/time_ops.hpp create mode 100644 src/backends/gpu/stim_current.hpp create mode 100644 src/backends/gpu/time_ops.hpp delete mode 100644 tests/validation/validate_ball_and_stick.cu delete mode 100644 tests/validation/validate_ball_and_stick.hpp delete mode 100644 tests/validation/validate_kinetic.cu delete mode 100644 tests/validation/validate_kinetic.hpp delete mode 100644 tests/validation/validate_soma.cu delete mode 100644 tests/validation/validate_soma.hpp delete mode 100644 tests/validation/validate_synapses.cu delete mode 100644 tests/validation/validate_synapses.hpp diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt index bed251ad..dbd1e1a9 100644 --- a/mechanisms/CMakeLists.txt +++ b/mechanisms/CMakeLists.txt @@ -47,10 +47,11 @@ build_modules( # Make a library with the implementations of the mechanism kernels if(NMC_WITH_CUDA) - # make list of the .cu files that + # make list of the .cu files that implement the mechanism kernels foreach(mech ${mechanisms}) - set(cuda_mech_sources ${cuda_mech_sources} ${mech_dir}/${mech}_impl.cu) + list(APPEND cuda_mech_sources ${mech_dir}/${mech}_impl.cu) endforeach() + # compile the .cu files into a library cuda_add_library(gpu_mechanisms ${cuda_mech_sources}) diff --git a/miniapp/CMakeLists.txt b/miniapp/CMakeLists.txt index 95c42b1f..c309edd9 100644 --- a/miniapp/CMakeLists.txt +++ b/miniapp/CMakeLists.txt @@ -1,5 +1,3 @@ -set(HEADERS -) set(MINIAPP_SOURCES miniapp.cpp io.cpp @@ -7,15 +5,8 @@ set(MINIAPP_SOURCES morphology_pool.cpp trace.cpp ) -set(MINIAPP_SOURCES_CUDA - miniapp.cu - io.cpp - miniapp_recipes.cpp - morphology_pool.cpp - trace.cpp -) -add_executable(miniapp.exe ${MINIAPP_SOURCES} ${HEADERS}) +add_executable(miniapp.exe ${MINIAPP_SOURCES}) target_link_libraries(miniapp.exe LINK_PUBLIC ${NESTMC_LIBRARIES}) target_link_libraries(miniapp.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES}) @@ -25,7 +16,8 @@ if(NMC_WITH_MPI) set_property(TARGET miniapp.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") endif() -set_target_properties(miniapp.exe - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/miniapp" +set_target_properties( + miniapp.exe + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/miniapp" ) diff --git a/miniapp/miniapp.cu b/miniapp/miniapp.cu deleted file mode 100644 index 53bd563e..00000000 --- a/miniapp/miniapp.cu +++ /dev/null @@ -1 +0,0 @@ -#include "miniapp.cpp" diff --git a/modcc/cudaprinter.cpp b/modcc/cudaprinter.cpp index 16decd8c..08e5f090 100644 --- a/modcc/cudaprinter.cpp +++ b/modcc/cudaprinter.cpp @@ -44,9 +44,8 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) buffer().add_line("#pragma once"); buffer().add_line("#include <backends/event.hpp>"); buffer().add_line("#include <backends/fvm_types.hpp>"); - buffer().add_line("#include <backends/gpu/kernels/detail.hpp>"); - buffer().add_line("#include <backends/gpu/kernels/reduce_by_key.hpp>"); buffer().add_line("#include <backends/multi_event_stream_state.hpp>"); + buffer().add_line("#include <backends/gpu/kernels/detail.hpp>"); buffer().add_line(); buffer().add_line("namespace nest{ namespace mc{ namespace gpu{"); @@ -132,6 +131,9 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) // kernels buffer().add_line("#include \"" + module_name_ + "_impl.hpp\""); buffer().add_line(); + buffer().add_line("#include <backends/gpu/intrinsics.hpp>"); + buffer().add_line("#include <backends/gpu/kernels/reduce_by_key.hpp>"); + buffer().add_line(); buffer().add_line("namespace nest{ namespace mc{ namespace gpu{"); buffer().add_line("namespace kernels {"); buffer().increase_indentation(); @@ -209,7 +211,6 @@ CUDAPrinter::CUDAPrinter(Module &m, bool o) buffer().add_line("#include <algorithms.hpp>"); buffer().add_line("#include <backends/event.hpp>"); buffer().add_line("#include <backends/fvm_types.hpp>"); - buffer().add_line("#include <backends/gpu/intrinsics.hpp>"); buffer().add_line("#include <backends/gpu/multi_event_stream.hpp>"); buffer().add_line("#include <util/pprintf.hpp>"); buffer().add_line(); diff --git a/modcc/modcc.cpp b/modcc/modcc.cpp index 904c09ce..a588638b 100644 --- a/modcc/modcc.cpp +++ b/modcc/modcc.cpp @@ -208,18 +208,14 @@ int main(int argc, char **argv) { } } - catch(compiler_exception e) { + catch(compiler_exception& e) { std::cerr << red("internal compiler error: ") << white("this means a bug in the compiler," " please report to modcc developers\n") << e.what() << " @ " << e.location() << "\n"; exit(1); } - catch(std::runtime_error e) { - std::cerr << red("error: ") << e.what() << "\n"; - exit(1); - } - catch(std::exception e) { + catch(std::exception& e) { std::cerr << red("internal compiler error: ") << white("this means a bug in the compiler," " please report to modcc developers\n") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 69c0192e..2e06e3ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,6 @@ set(BASE_SOURCES backends/multicore/fvm.cpp + cell_group_factory.cpp common_types_io.cpp cell.cpp event_binner.cpp @@ -25,21 +26,18 @@ set(BASE_SOURCES util/unwind.cpp ) set(CUDA_SOURCES - backends/gpu/fvm.cu - backends/gpu/multi_event_stream.cu + backends/gpu/fvm.cpp backends/gpu/fill.cu - backends/gpu/kernels/test_thresholds.cu + backends/gpu/multi_event_stream.cu + backends/gpu/kernels/assemble_matrix.cu + backends/gpu/kernels/interleave.cu + backends/gpu/kernels/solve_matrix.cu + backends/gpu/kernels/stim_current.cu backends/gpu/kernels/take_samples.cu + backends/gpu/kernels/test_thresholds.cu + backends/gpu/kernels/time_ops.cu ) -# The cell_group_factory acts like an interface between the -# front end and back end. -if(NMC_WITH_CUDA) - set(CUDA_SOURCES ${CUDA_SOURCES} cell_group_factory.cu) -else() - set(BASE_SOURCES ${BASE_SOURCES} cell_group_factory.cpp) -endif() - if(NMC_WITH_MPI) set(BASE_SOURCES ${BASE_SOURCES} communication/mpi.cpp) elseif(NMC_WITH_DRYRUN) @@ -55,12 +53,7 @@ list(APPEND NESTMC_LIBRARIES nestmc) if(NMC_WITH_CUDA) cuda_add_library(gpu ${CUDA_SOURCES}) - # FIXME - # The gpu library uses symbols fron nestmc, so we have to - # add nestmc to the end. This is a temporary hack that will - # go away when we have properly seperable front and back end - # compilation. - list(APPEND NESTMC_LIBRARIES gpu nestmc) + list(APPEND NESTMC_LIBRARIES gpu) endif() if (NMC_AUTO_RUN_MODCC_ON_CHANGES) diff --git a/src/backends/fvm.hpp b/src/backends/fvm.hpp index a12ce30e..7b94edb5 100644 --- a/src/backends/fvm.hpp +++ b/src/backends/fvm.hpp @@ -36,13 +36,8 @@ struct null_backend: public multicore::backend { } // namespace mc } // namespace nest -// FIXME: This include is where cuda-specific code leaks into the main application. -// e.g.: CUDA kernels, functions marked __host__ __device__, etc. -// Hence why it is guarded with NMC_HAVE_CUDA, and not, NMC_HAVE_GPU, like elsewhere in -// the code. When we implement separate compilation of CUDA, this should be guarded with -// NMC_HAVE_GPU, and the NMC_HAVE_CUDA flag depricated. -#ifdef NMC_HAVE_CUDA - #include <backends/gpu/fvm.hpp> +#ifdef NMC_HAVE_GPU +#include <backends/gpu/fvm.hpp> #else namespace nest { namespace mc { namespace gpu { using backend = null_backend; diff --git a/src/backends/gpu/fvm.cu b/src/backends/gpu/fvm.cpp similarity index 100% rename from src/backends/gpu/fvm.cu rename to src/backends/gpu/fvm.cpp diff --git a/src/backends/gpu/fvm.hpp b/src/backends/gpu/fvm.hpp index b9ca2254..ac439785 100644 --- a/src/backends/gpu/fvm.hpp +++ b/src/backends/gpu/fvm.hpp @@ -10,13 +10,13 @@ #include <memory/memory.hpp> #include <util/rangeutil.hpp> -#include "kernels/time_ops.hpp" #include "kernels/take_samples.hpp" #include "matrix_state_interleaved.hpp" -#include "matrix_state_flat.hpp" +//#include "matrix_state_flat.hpp" #include "multi_event_stream.hpp" #include "stimulus.hpp" #include "threshold_watcher.hpp" +#include "time_ops.hpp" namespace nest { namespace mc { @@ -105,20 +105,17 @@ struct backend { // perform element-wise comparison on 'array' type against `t_test`. template <typename V> static bool any_time_before(const memory::device_vector<V>& t, V t_test) { - // Note: benchmarking (on a P100) indicates that using the gpu::any_time_before - // function is slower than the copy, unless we're running over ten thousands of - // cells per cell group. - // - // Commenting out for now, but consider a size-dependent test or adaptive choice. - - // return gpu::any_time_before(t.size(), t.data(), t_test); + // Note: ubbench benchmarking (on a P100) indicates that copying the + // time vectors to the host is faster than a device side + // implementation unless we're running over ten thousands of cells per + // cell group. auto v_copy = memory::on_host(t); return util::minmax_value(v_copy).first<t_test; } static void update_time_to(array& time_to, const_view time, value_type dt, value_type tmax) { - nest::mc::gpu::update_time_to<value_type, size_type>(time_to.size(), time_to.data(), time.data(), dt, tmax); + nest::mc::gpu::update_time_to(time_to.size(), time_to.data(), time.data(), dt, tmax); } // set the per-cell and per-compartment dt_ from time_to_ - time_. @@ -126,7 +123,8 @@ struct backend { size_type ncell = util::size(dt_cell); size_type ncomp = util::size(dt_comp); - nest::mc::gpu::set_dt<value_type, size_type>(ncell, ncomp, dt_cell.data(), dt_comp.data(), time_to.data(), time.data(), cv_to_cell.data()); + nest::mc::gpu::set_dt( + ncell, ncomp, dt_cell.data(), dt_comp.data(), time_to.data(), time.data(), cv_to_cell.data()); } // perform sampling as described by marked events in a sample_event_stream diff --git a/src/backends/gpu/kernels/assemble_matrix.hpp b/src/backends/gpu/kernels/assemble_matrix.cu similarity index 71% rename from src/backends/gpu/kernels/assemble_matrix.hpp rename to src/backends/gpu/kernels/assemble_matrix.cu index 1a4d6a45..c6bd144d 100644 --- a/src/backends/gpu/kernels/assemble_matrix.hpp +++ b/src/backends/gpu/kernels/assemble_matrix.cu @@ -1,4 +1,4 @@ -#pragma once +#include <backends/fvm_types.hpp> #include "detail.hpp" @@ -6,6 +6,7 @@ namespace nest { namespace mc { namespace gpu { +namespace kernels { /// GPU implementatin of Hines matrix assembly /// Flat layout /// For a given time step size dt @@ -141,6 +142,57 @@ void assemble_matrix_interleaved( } } +} // namespace kernels + +void assemble_matrix_flat( + fvm_value_type* d, + fvm_value_type* rhs, + const fvm_value_type* invariant_d, + const fvm_value_type* voltage, + const fvm_value_type* current, + const fvm_value_type* cv_capacitance, + const fvm_size_type* cv_to_cell, + const fvm_value_type* dt_cell, + unsigned n) +{ + constexpr unsigned block_dim = 128; + const unsigned grid_dim = impl::block_count(n, block_dim); + + kernels::assemble_matrix_flat + <fvm_value_type, fvm_size_type> + <<<grid_dim, block_dim>>> + (d, rhs, invariant_d, voltage, current, cv_capacitance, cv_to_cell, dt_cell, n); +} + +//template <typename T, typename I, unsigned BlockWidth, unsigned LoadWidth, unsigned Threads> +void assemble_matrix_interleaved( + fvm_value_type* d, + fvm_value_type* rhs, + const fvm_value_type* invariant_d, + const fvm_value_type* voltage, + const fvm_value_type* current, + const fvm_value_type* cv_capacitance, + const fvm_size_type* sizes, + const fvm_size_type* starts, + const fvm_size_type* matrix_to_cell, + const fvm_value_type* dt_cell, + unsigned padded_size, unsigned num_mtx) +{ + constexpr unsigned bd = impl::block_dim(); + constexpr unsigned lw = impl::load_width(); + constexpr unsigned block_dim = bd*lw; + + // The number of threads is threads_per_matrix*num_mtx + const unsigned grid_dim = impl::block_count(num_mtx*lw, block_dim); + + kernels::assemble_matrix_interleaved + <fvm_value_type, fvm_size_type, bd, lw, block_dim> + <<<grid_dim, block_dim>>> + (d, rhs, invariant_d, voltage, current, cv_capacitance, + sizes, starts, matrix_to_cell, + dt_cell, padded_size, num_mtx); +} + } // namespace gpu } // namespace mc } // namespace nest diff --git a/src/backends/gpu/kernels/detail.hpp b/src/backends/gpu/kernels/detail.hpp index 35b40727..15d08a6f 100644 --- a/src/backends/gpu/kernels/detail.hpp +++ b/src/backends/gpu/kernels/detail.hpp @@ -1,7 +1,15 @@ #pragma once -#include <cstdint> #include <cfloat> +#include <cmath> +#include <cstdint> +#include <climits> + +#ifdef __CUDACC__ +# define HOST_DEVICE_IF_CUDA __host__ __device__ +#else +# define HOST_DEVICE_IF_CUDA +#endif namespace nest { namespace mc { @@ -9,34 +17,34 @@ namespace gpu { namespace impl { // Number of matrices per block in block-interleaved storage -__host__ __device__ +HOST_DEVICE_IF_CUDA constexpr inline unsigned block_dim() { return 32u; } // The number of threads per matrix in the interleave and reverse-interleave // operations. -__host__ __device__ +HOST_DEVICE_IF_CUDA constexpr inline unsigned load_width() { return 32u; } // The alignment of matrices inside the block-interleaved storage. -__host__ __device__ +HOST_DEVICE_IF_CUDA constexpr inline unsigned matrix_padding() { return load_width(); } // Number of threads per warp // This has always been 32, however it may change in future NVIDIA gpus -__host__ __device__ +HOST_DEVICE_IF_CUDA constexpr inline unsigned threads_per_warp() { return 32u; } // The minimum number of bins required to store n values where the bins have // dimension of block_size. -__host__ __device__ +HOST_DEVICE_IF_CUDA constexpr inline unsigned block_count(unsigned n, unsigned block_size) { return (n+block_size-1)/block_size; } @@ -50,29 +58,30 @@ constexpr inline unsigned padded_size(unsigned n, unsigned block_dim) { // Placeholders to use for mark padded locations in data structures that use // padding. Using such markers makes it easier to test that padding is // performed correctly. -template <typename T> __host__ __device__ constexpr T npos(); -template <> __host__ __device__ constexpr char npos<char>() { return CHAR_MAX; } -template <> __host__ __device__ constexpr unsigned char npos<unsigned char>() { return UCHAR_MAX; } -template <> __host__ __device__ constexpr short npos<short>() { return SHRT_MAX; } -template <> __host__ __device__ constexpr int npos<int>() { return INT_MAX; } -template <> __host__ __device__ constexpr long npos<long>() { return LONG_MAX; } -template <> __host__ __device__ constexpr float npos<float>() { return FLT_MAX; } -template <> __host__ __device__ constexpr double npos<double>() { return DBL_MAX; } -template <> __host__ __device__ constexpr unsigned short npos<unsigned short>() { return USHRT_MAX; } -template <> __host__ __device__ constexpr unsigned int npos<unsigned int>() { return UINT_MAX; } -template <> __host__ __device__ constexpr unsigned long npos<unsigned long>() { return ULONG_MAX; } -template <> __host__ __device__ constexpr long long npos<long long>() { return LLONG_MAX; } +#define NPOS_SPEC(type, cint) template <> HOST_DEVICE_IF_CUDA constexpr type npos<type>() { return cint; } +template <typename T> HOST_DEVICE_IF_CUDA constexpr T npos(); +NPOS_SPEC(char, CHAR_MAX) +NPOS_SPEC(unsigned char, UCHAR_MAX) +NPOS_SPEC(short, SHRT_MAX) +NPOS_SPEC(int, INT_MAX) +NPOS_SPEC(long, LONG_MAX) +NPOS_SPEC(float, FLT_MAX) +NPOS_SPEC(double, DBL_MAX) +NPOS_SPEC(unsigned short, USHRT_MAX) +NPOS_SPEC(unsigned int, UINT_MAX) +NPOS_SPEC(unsigned long, ULONG_MAX) +NPOS_SPEC(long long, LLONG_MAX) // test if value v is npos template <typename T> -__host__ __device__ +HOST_DEVICE_IF_CUDA constexpr bool is_npos(T v) { return v == npos<T>(); } /// Cuda lerp by u on [a,b]: (1-u)*a + u*b. template <typename T> -__host__ __device__ +HOST_DEVICE_IF_CUDA inline T lerp(T a, T b, T u) { return std::fma(u, b, std::fma(-u, a, a)); } diff --git a/src/backends/gpu/kernels/interleave.cu b/src/backends/gpu/kernels/interleave.cu new file mode 100644 index 00000000..cea3c723 --- /dev/null +++ b/src/backends/gpu/kernels/interleave.cu @@ -0,0 +1,45 @@ +#include <backends/fvm_types.hpp> +#include "detail.hpp" +#include "interleave.hpp" + +namespace nest { +namespace mc { +namespace gpu { + +// host side wrapper for the flat to interleaved operation +void flat_to_interleaved( + const fvm_value_type* in, + fvm_value_type* out, + const fvm_size_type* sizes, + const fvm_size_type* starts, + unsigned padded_size, + unsigned num_vec) +{ + constexpr unsigned BlockWidth = impl::block_dim(); + constexpr unsigned LoadWidth = impl::load_width(); + + flat_to_interleaved + <fvm_value_type, fvm_size_type, BlockWidth, LoadWidth> + (in, out, sizes, starts, padded_size, num_vec); +} + +// host side wrapper for the interleave to flat operation +void interleaved_to_flat( + const fvm_value_type* in, + fvm_value_type* out, + const fvm_size_type* sizes, + const fvm_size_type* starts, + unsigned padded_size, + unsigned num_vec) +{ + constexpr unsigned BlockWidth = impl::block_dim(); + constexpr unsigned LoadWidth = impl::load_width(); + + interleaved_to_flat + <fvm_value_type, fvm_size_type, BlockWidth, LoadWidth> + (in, out, sizes, starts, padded_size, num_vec); +} + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/kernels/interleave.hpp b/src/backends/gpu/kernels/interleave.hpp index 7488f38b..e6c85593 100644 --- a/src/backends/gpu/kernels/interleave.hpp +++ b/src/backends/gpu/kernels/interleave.hpp @@ -11,7 +11,7 @@ namespace gpu { // Hines matrices, see src/backends/matrix_storage.md /////////////////////////////////////////////////////////////////////////////// - +namespace kernels { // Data in a vector is to be interleaved into blocks of width BlockWidth. // The kernel assigns LoadWidth threads to each lane in the block. // @@ -107,30 +107,47 @@ void interleaved_to_flat( } } +} // namespace kernels + // host side wrapper for the flat to interleaved operation template <typename T, typename I, unsigned BlockWidth, unsigned LoadWidth> void flat_to_interleaved( - const T* in, T* out, const I* sizes, const I* starts, unsigned padded_size, unsigned num_vec) + const T* in, + T* out, + const I* sizes, + const I* starts, + unsigned padded_size, + unsigned num_vec) { constexpr unsigned Threads = BlockWidth*LoadWidth; const unsigned blocks = impl::block_count(num_vec, BlockWidth); - flat_to_interleaved<T, I, BlockWidth, LoadWidth, Threads> - <<<blocks, Threads>>> (in, out, sizes, starts, padded_size, num_vec); + kernels::flat_to_interleaved + <T, I, BlockWidth, LoadWidth, Threads> + <<<blocks, Threads>>> + (in, out, sizes, starts, padded_size, num_vec); } // host side wrapper for the interleave to flat operation template <typename T, typename I, unsigned BlockWidth, unsigned LoadWidth> void interleaved_to_flat( - const T* in, T* out, const I* sizes, const I* starts, unsigned padded_size, unsigned num_vec) + const T* in, + T* out, + const I* sizes, + const I* starts, + unsigned padded_size, + unsigned num_vec) { constexpr unsigned Threads = BlockWidth*LoadWidth; const unsigned blocks = impl::block_count(num_vec, BlockWidth); - interleaved_to_flat<T, I, BlockWidth, LoadWidth, Threads> - <<<blocks, Threads>>> (in, out, sizes, starts, padded_size, num_vec); + kernels::interleaved_to_flat + <T, I, BlockWidth, LoadWidth, Threads> + <<<blocks, Threads>>> + (in, out, sizes, starts, padded_size, num_vec); } } // namespace gpu } // namespace mc } // namespace nest + diff --git a/src/backends/gpu/kernels/solve_matrix.hpp b/src/backends/gpu/kernels/solve_matrix.cu similarity index 70% rename from src/backends/gpu/kernels/solve_matrix.hpp rename to src/backends/gpu/kernels/solve_matrix.cu index 48e4a8b9..45558fa0 100644 --- a/src/backends/gpu/kernels/solve_matrix.hpp +++ b/src/backends/gpu/kernels/solve_matrix.cu @@ -1,11 +1,14 @@ -#pragma once +#include <cassert> #include "detail.hpp" +#include <backends/fvm_types.hpp> namespace nest { namespace mc { namespace gpu { +namespace kernels { + /// GPU implementation of Hines Matrix solver. /// Flat format template <typename T, typename I> @@ -81,6 +84,39 @@ void solve_matrix_interleaved( } } +} // namespace kernels + +void solve_matrix_flat( + fvm_value_type* rhs, + fvm_value_type* d, + const fvm_value_type* u, + const fvm_size_type* p, + const fvm_size_type* cell_cv_divs, + int num_mtx) +{ + constexpr unsigned block_dim = 128; + const unsigned grid_dim = impl::block_count(num_mtx, block_dim); + kernels::solve_matrix_flat + <fvm_value_type, fvm_size_type> + <<<grid_dim, block_dim>>> + (rhs, d, u, p, cell_cv_divs, num_mtx); +} + +void solve_matrix_interleaved( + fvm_value_type* rhs, + fvm_value_type* d, + const fvm_value_type* u, + const fvm_size_type* p, + const fvm_size_type* sizes, + int padded_size, + int num_mtx) +{ + const unsigned grid_dim = impl::block_count(num_mtx, impl::block_dim()); + kernels::solve_matrix_interleaved<fvm_value_type, fvm_size_type, impl::block_dim()> + <<<grid_dim, impl::block_dim()>>> + (rhs, d, u, p, sizes, padded_size, num_mtx); +} + } // namespace gpu } // namespace mc } // namespace nest diff --git a/src/backends/gpu/kernels/stim_current.cu b/src/backends/gpu/kernels/stim_current.cu new file mode 100644 index 00000000..7e5340bc --- /dev/null +++ b/src/backends/gpu/kernels/stim_current.cu @@ -0,0 +1,48 @@ +#include <backends/fvm_types.hpp> +#include <backends/gpu/intrinsics.hpp> + +namespace nest{ +namespace mc{ +namespace gpu { + +namespace kernels { + template <typename T, typename I> + __global__ + void stim_current( + const T* delay, const T* duration, const T* amplitude, + const I* node_index, int n, const I* cell_index, const T* time, T* current) + { + using value_type = T; + using iarray = I; + + auto i = threadIdx.x + blockDim.x*blockIdx.x; + + if (i<n) { + auto t = time[cell_index[i]]; + if (t>=delay[i] && t<delay[i]+duration[i]) { + // use subtraction because the electrode currents are specified + // in terms of current into the compartment + cuda_atomic_add(current+node_index[i], -amplitude[i]); + } + } + } +} // namespace kernels + + +void stim_current( + const fvm_value_type* delay, const fvm_value_type* duration, const fvm_value_type* amplitude, + const fvm_size_type* node_index, int n, + const fvm_size_type* cell_index, const fvm_value_type* time, fvm_value_type* current) +{ + constexpr unsigned thread_dim = 192; + dim3 dim_block(thread_dim); + dim3 dim_grid((n+thread_dim-1)/thread_dim); + + kernels::stim_current<fvm_value_type, fvm_size_type><<<dim_grid, dim_block>>> + (delay, duration, amplitude, node_index, n, cell_index, time, current); + +} + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/kernels/time_ops.cu b/src/backends/gpu/kernels/time_ops.cu new file mode 100644 index 00000000..c2a108a8 --- /dev/null +++ b/src/backends/gpu/kernels/time_ops.cu @@ -0,0 +1,80 @@ +#include <backends/fvm_types.hpp> + +namespace nest { +namespace mc { +namespace gpu { + +namespace kernels { + template <typename T, typename I> + __global__ void update_time_to(I n, T* time_to, const T* time, T dt, T tmax) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + auto t = time[i]+dt; + time_to[i] = t<tmax? t: tmax; + } + } + + template <typename T> + struct less { + __device__ __host__ + bool operator()(const T& a, const T& b) const { return a<b; } + }; + + // vector minus: x = y - z + template <typename T, typename I> + __global__ void vec_minus(I n, T* x, const T* y, const T* z) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + x[i] = y[i]-z[i]; + } + } + + // vector gather: x[i] = y[index[i]] + template <typename T, typename I> + __global__ void gather(I n, T* x, const T* y, const I* index) { + int i = threadIdx.x+blockIdx.x*blockDim.x; + if (i<n) { + x[i] = y[index[i]]; + } + } +} + +void update_time_to(fvm_size_type n, + fvm_value_type* time_to, + const fvm_value_type* time, + fvm_value_type dt, + fvm_value_type tmax) +{ + if (!n) { + return; + } + + constexpr int blockwidth = 128; + int nblock = 1+(n-1)/blockwidth; + kernels::update_time_to<<<nblock, blockwidth>>> + (n, time_to, time, dt, tmax); +} + +void set_dt(fvm_size_type ncell, + fvm_size_type ncomp, + fvm_value_type* dt_cell, + fvm_value_type* dt_comp, + const fvm_value_type* time_to, + const fvm_value_type* time, + const fvm_size_type* cv_to_cell) +{ + if (!ncell || !ncomp) { + return; + } + + constexpr int blockwidth = 128; + int nblock = 1+(ncell-1)/blockwidth; + kernels::vec_minus<<<nblock, blockwidth>>>(ncell, dt_cell, time_to, time); + + nblock = 1+(ncomp-1)/blockwidth; + kernels::gather<<<nblock, blockwidth>>>(ncomp, dt_comp, dt_cell, cv_to_cell); +} + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/kernels/time_ops.hpp b/src/backends/gpu/kernels/time_ops.hpp deleted file mode 100644 index 94271034..00000000 --- a/src/backends/gpu/kernels/time_ops.hpp +++ /dev/null @@ -1,113 +0,0 @@ -#pragma once - -#include <type_traits> - -#include <cuda.h> - -#include <memory/wrappers.hpp> - -namespace nest { -namespace mc { -namespace gpu { - -namespace kernels { - template <typename T, typename I> - __global__ void update_time_to(I n, T* time_to, const T* time, T dt, T tmax) { - int i = threadIdx.x+blockIdx.x*blockDim.x; - if (i<n) { - auto t = time[i]+dt; - time_to[i] = t<tmax? t: tmax; - } - } - - // array-array comparison - template <typename T, typename I, typename Pred> - __global__ void array_reduce_any(I n, const T* x, const T* y, Pred p, int* rptr) { - int i = threadIdx.x+blockIdx.x*blockDim.x; - int cmp = i<n? p(x[i], y[i]): 0; - if (__syncthreads_or(cmp) && !threadIdx.x) { - *rptr=1; - } - } - - // array-scalar comparison - template <typename T, typename I, typename Pred> - __global__ void array_reduce_any(I n, const T* x, T y, Pred p, int* rptr) { - int i = threadIdx.x+blockIdx.x*blockDim.x; - int cmp = i<n? p(x[i], y): 0; - if (__syncthreads_or(cmp) && !threadIdx.x) { - *rptr=1; - } - } - - template <typename T> - struct less { - __device__ __host__ - bool operator()(const T& a, const T& b) const { return a<b; } - }; - - // vector minus: x = y - z - template <typename T, typename I> - __global__ void vec_minus(I n, T* x, const T* y, const T* z) { - int i = threadIdx.x+blockIdx.x*blockDim.x; - if (i<n) { - x[i] = y[i]-z[i]; - } - } - - // vector gather: x[i] = y[index[i]] - template <typename T, typename I> - __global__ void gather(I n, T* x, const T* y, const I* index) { - int i = threadIdx.x+blockIdx.x*blockDim.x; - if (i<n) { - x[i] = y[index[i]]; - } - } -} - -template <typename T, typename I> -void update_time_to(I n, T* time_to, const T* time, T dt, T tmax) { - if (!n) { - return; - } - - constexpr int blockwidth = 128; - int nblock = 1+(n-1)/blockwidth; - kernels::update_time_to<<<nblock, blockwidth>>>(n, time_to, time, dt, tmax); -} - -template <typename T, typename U, typename I> -bool any_time_before(I n, T* t0, U t1) { - static_assert(std::is_convertible<T*, U>::value || std::is_convertible<T, U>::value, - "third-argument must be a compatible scalar or pointer type"); - - static thread_local auto r = memory::device_vector<int>(1); - if (!n) { - return false; - } - - constexpr int blockwidth = 128; - int nblock = 1+(n-1)/blockwidth; - - r[0] = 0; - kernels::array_reduce_any<<<nblock, blockwidth>>>(n, t0, t1, kernels::less<T>(), r.data()); - return r[0]; -} - -template <typename T, typename I> -void set_dt(I ncell, I ncomp, T* dt_cell, T* dt_comp, const T* time_to, const T* time, const I* cv_to_cell) { - if (!ncell || !ncomp) { - return; - } - - constexpr int blockwidth = 128; - int nblock = 1+(ncell-1)/blockwidth; - kernels::vec_minus<<<nblock, blockwidth>>>(ncell, dt_cell, time_to, time); - - nblock = 1+(ncomp-1)/blockwidth; - kernels::gather<<<nblock, blockwidth>>>(ncomp, dt_comp, dt_cell, cv_to_cell); -} - -} // namespace gpu -} // namespace mc -} // namespace nest diff --git a/src/backends/gpu/matrix_state_flat.hpp b/src/backends/gpu/matrix_state_flat.hpp index dc7f9a40..a68ecad8 100644 --- a/src/backends/gpu/matrix_state_flat.hpp +++ b/src/backends/gpu/matrix_state_flat.hpp @@ -1,18 +1,35 @@ #pragma once +#include <backends/fvm_types.hpp> #include <memory/memory.hpp> #include <memory/wrappers.hpp> #include <util/span.hpp> #include <util/partition.hpp> #include <util/rangeutil.hpp> -#include "kernels/solve_matrix.hpp" -#include "kernels/assemble_matrix.hpp" - namespace nest { namespace mc { namespace gpu { +void solve_matrix_flat( + fvm_value_type* rhs, + fvm_value_type* d, + const fvm_value_type* u, + const fvm_size_type* p, + const fvm_size_type* cell_cv_divs, + int num_mtx); + +void assemble_matrix_flat( + fvm_value_type* d, + fvm_value_type* rhs, + const fvm_value_type* invariant_d, + const fvm_value_type* voltage, + const fvm_value_type* current, + const fvm_value_type* cv_capacitance, + const fvm_size_type* cv_to_cell, + const fvm_value_type* dt_cell, + unsigned n); + /// matrix state template <typename T, typename I> struct matrix_state_flat { @@ -95,25 +112,16 @@ struct matrix_state_flat { // voltage [mV] // current [nA] void assemble(const_view dt_cell, const_view voltage, const_view current) { - // determine the grid dimensions for the kernel - auto const n = voltage.size(); - auto const block_dim = 128; - auto const grid_dim = impl::block_count(n, block_dim); - - assemble_matrix_flat<value_type, size_type><<<grid_dim, block_dim>>> ( + // perform assembly on the gpu + assemble_matrix_flat( d.data(), rhs.data(), invariant_d.data(), voltage.data(), current.data(), cv_capacitance.data(), cv_to_cell.data(), dt_cell.data(), size()); } void solve() { - // determine the grid dimensions for the kernel - auto const block_dim = 128; - auto const grid_dim = impl::block_count(num_matrices(), block_dim); - // perform solve on gpu - solve_matrix_flat<value_type, size_type><<<grid_dim, block_dim>>> ( - rhs.data(), d.data(), u.data(), parent_index.data(), - cell_cv_divs.data(), num_matrices()); + solve_matrix_flat(rhs.data(), d.data(), u.data(), parent_index.data(), + cell_cv_divs.data(), num_matrices()); } std::size_t size() const { diff --git a/src/backends/gpu/matrix_state_interleaved.hpp b/src/backends/gpu/matrix_state_interleaved.hpp index a6a127e8..054af9e5 100644 --- a/src/backends/gpu/matrix_state_interleaved.hpp +++ b/src/backends/gpu/matrix_state_interleaved.hpp @@ -1,19 +1,61 @@ #pragma once +#include <backends/fvm_types.hpp> #include <memory/memory.hpp> #include <util/debug.hpp> #include <util/span.hpp> #include <util/partition.hpp> #include <util/rangeutil.hpp> +#include <util/indirect.hpp> -#include "kernels/solve_matrix.hpp" -#include "kernels/assemble_matrix.hpp" -#include "kernels/interleave.hpp" +#include "kernels/detail.hpp" namespace nest { namespace mc { namespace gpu { +// host side wrapper for interleaved matrix assembly kernel +void assemble_matrix_interleaved( + fvm_value_type* d, + fvm_value_type* rhs, + const fvm_value_type* invariant_d, + const fvm_value_type* voltage, + const fvm_value_type* current, + const fvm_value_type* cv_capacitance, + const fvm_size_type* sizes, + const fvm_size_type* starts, + const fvm_size_type* matrix_to_cell, + const fvm_value_type* dt_cell, + unsigned padded_size, unsigned num_mtx); + +// host side wrapper for interleaved matrix solver kernel +void solve_matrix_interleaved( + fvm_value_type* rhs, + fvm_value_type* d, + const fvm_value_type* u, + const fvm_size_type* p, + const fvm_size_type* sizes, + int padded_size, + int num_mtx); + +// host side wrapper for the flat to interleaved operation +void flat_to_interleaved( + const fvm_value_type* in, + fvm_value_type* out, + const fvm_size_type* sizes, + const fvm_size_type* starts, + unsigned padded_size, + unsigned num_vec); + +// host side wrapper for the interleave to flat operation +void interleaved_to_flat( + const fvm_value_type* in, + fvm_value_type* out, + const fvm_size_type* sizes, + const fvm_size_type* starts, + unsigned padded_size, + unsigned num_vec); + // A helper that performs the interleave operation on host memory. template <typename T, typename I> std::vector<T> flat_to_interleaved( @@ -213,15 +255,7 @@ struct matrix_state_interleaved { // voltage [mV] // current [nA] void assemble(const_view dt_cell, const_view voltage, const_view current) { - constexpr auto bd = impl::block_dim(); - constexpr auto lw = impl::load_width(); - constexpr auto block_dim = bd*lw; - - // The number of threads is threads_per_matrix*num_mtx - const auto num_blocks = impl::block_count(num_matrices()*lw, block_dim); - - assemble_matrix_interleaved<value_type, size_type, bd, lw, block_dim> - <<<num_blocks, block_dim>>> + assemble_matrix_interleaved (d.data(), rhs.data(), invariant_d.data(), voltage.data(), current.data(), cv_capacitance.data(), matrix_sizes.data(), matrix_index.data(), @@ -232,14 +266,12 @@ struct matrix_state_interleaved { void solve() { // Perform the Hines solve. - auto const grid_dim = impl::block_count(num_matrices(), impl::block_dim()); - solve_matrix_interleaved<value_type, size_type, impl::block_dim()> - <<<grid_dim, impl::block_dim()>>> - (rhs.data(), d.data(), u.data(), parent_index.data(), matrix_sizes.data(), + solve_matrix_interleaved( + rhs.data(), d.data(), u.data(), parent_index.data(), matrix_sizes.data(), padded_matrix_size(), num_matrices()); // Copy the solution from interleaved to front end storage. - interleaved_to_flat<value_type, size_type, impl::block_dim(), impl::load_width()> + interleaved_to_flat (rhs.data(), solution_.data(), matrix_sizes.data(), matrix_index.data(), padded_matrix_size(), num_matrices()); } diff --git a/src/backends/gpu/stim_current.hpp b/src/backends/gpu/stim_current.hpp new file mode 100644 index 00000000..6d339755 --- /dev/null +++ b/src/backends/gpu/stim_current.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include <backends/fvm_types.hpp> + +namespace nest{ +namespace mc{ +namespace gpu { + +void stim_current( + const fvm_value_type* delay, const fvm_value_type* duration, const fvm_value_type* amplitude, + const fvm_size_type* node_index, int n, + const fvm_size_type* cell_index, const fvm_value_type* time, fvm_value_type* current); + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/src/backends/gpu/stimulus.hpp b/src/backends/gpu/stimulus.hpp index 9b2da514..651e903c 100644 --- a/src/backends/gpu/stimulus.hpp +++ b/src/backends/gpu/stimulus.hpp @@ -5,37 +5,13 @@ #include <mechanism.hpp> #include <algorithms.hpp> +#include "stim_current.hpp" #include <util/pprintf.hpp> -#include "intrinsics.hpp" - namespace nest{ namespace mc{ namespace gpu { -namespace kernels { - template <typename T, typename I> - __global__ - void stim_current( - const T* delay, const T* duration, const T* amplitude, - const I* node_index, int n, const I* cell_index, const T* time, T* current) - { - using value_type = T; - using iarray = I; - - auto i = threadIdx.x + blockDim.x*blockIdx.x; - - if (i<n) { - auto t = time[cell_index[i]]; - if (t>=delay[i] && t<delay[i]+duration[i]) { - // use subtraction because the electrode currents are specified - // in terms of current into the compartment - cuda_atomic_add(current+node_index[i], -amplitude[i]); - } - } - } -} // namespace kernels - template<class Backend> class stimulus : public mechanism<Backend> { public: @@ -107,16 +83,9 @@ public: // don't launch a kernel if there are no stimuli if (!size()) return; - auto n = size(); - auto thread_dim = 192; - dim3 dim_block(thread_dim); - dim3 dim_grid((n+thread_dim-1)/thread_dim ); - - kernels::stim_current<value_type, size_type><<<dim_grid, dim_block>>>( - delay.data(), duration.data(), amplitude.data(), - node_index_.data(), n, vec_ci_.data(), vec_t_.data(), - vec_i_.data() - ); + stim_current(delay.data(), duration.data(), amplitude.data(), + node_index_.data(), size(), vec_ci_.data(), vec_t_.data(), + vec_i_.data()); } diff --git a/src/backends/gpu/time_ops.hpp b/src/backends/gpu/time_ops.hpp new file mode 100644 index 00000000..a851a7c4 --- /dev/null +++ b/src/backends/gpu/time_ops.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include <backends/fvm_types.hpp> + +namespace nest { +namespace mc { +namespace gpu { + +void update_time_to(fvm_size_type n, + fvm_value_type* time_to, + const fvm_value_type* time, + fvm_value_type dt, + fvm_value_type tmax); + +void set_dt(fvm_size_type ncell, + fvm_size_type ncomp, + fvm_value_type* dt_cell, + fvm_value_type* dt_comp, + const fvm_value_type* time_to, + const fvm_value_type* time, + const fvm_size_type* cv_to_cell); + +} // namespace gpu +} // namespace mc +} // namespace nest diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 600cfcd6..5d58b80a 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -93,7 +93,7 @@ endif() set(TARGETS test.exe) -add_executable(test.exe ${TEST_SOURCES} ${HEADERS}) +add_executable(test.exe ${TEST_SOURCES}) target_compile_definitions(test.exe PUBLIC "-DDATADIR=\"${PROJECT_SOURCE_DIR}/data\"") if (NMC_AUTO_RUN_MODCC_ON_CHANGES) @@ -104,7 +104,7 @@ target_include_directories(test.exe PRIVATE "${mech_proto_dir}/..") if(NMC_WITH_CUDA) set(TARGETS ${TARGETS} test_cuda.exe) - cuda_add_executable(test_cuda.exe ${TEST_CUDA_SOURCES} ${HEADERS}) + cuda_add_executable(test_cuda.exe ${TEST_CUDA_SOURCES}) endif() foreach(target ${TARGETS}) @@ -122,9 +122,3 @@ foreach(target ${TARGETS}) RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" ) endforeach() - -# Temporary hack (pending PR #266) -if(NMC_WITH_CUDA) - target_link_libraries(test_cuda.exe LINK_PUBLIC nestmc) -endif() - diff --git a/tests/unit/test_matrix.cu b/tests/unit/test_matrix.cu index 3e7a730d..7e5bf6cc 100644 --- a/tests/unit/test_matrix.cu +++ b/tests/unit/test_matrix.cu @@ -12,6 +12,10 @@ #include <memory/memory.hpp> #include <util/span.hpp> +#include <backends/gpu/matrix_state_flat.hpp> +#include <backends/gpu/matrix_state_interleaved.hpp> +#include <backends/gpu/kernels/interleave.hpp> + #include <cuda.h> using namespace nest::mc; diff --git a/tests/unit/test_multi_event_stream.cu b/tests/unit/test_multi_event_stream.cu index cb1f83e7..bf0177e9 100644 --- a/tests/unit/test_multi_event_stream.cu +++ b/tests/unit/test_multi_event_stream.cu @@ -7,7 +7,7 @@ #include <backends/event.hpp> #include <backends/gpu/multi_event_stream.hpp> -#include <backends/gpu/kernels/time_ops.hpp> +#include <backends/gpu/time_ops.hpp> #include <memory/wrappers.hpp> #include <util/rangeutil.hpp> @@ -217,7 +217,7 @@ TEST(multi_event_stream, time_if_before) { std::vector<double> after; for (unsigned i = 0; i<n_cell; ++i) { - before[i] = 0.1+i/(double)n_cell; + before[i] = 0.1+i/(double)n_cell; } memory::device_vector<double> t = memory::on_gpu(before); @@ -230,45 +230,18 @@ TEST(multi_event_stream, time_if_before) { // on cell_2 to restrict corresponding element of t. for (unsigned i = 0; i<n_cell; ++i) { - before[i] = 2.1+0.5*i/(double)n_cell; + before[i] = 2.1+0.5*i/(double)n_cell; } t = memory::make_view(before); m.event_time_if_before(t); util::assign(after, memory::on_host(t)); for (unsigned i = 0; i<n_cell; ++i) { - if (i==cell_2) { - EXPECT_EQ(2., after[i]); - } - else { - EXPECT_EQ(before[i], after[i]); - } + if (i==cell_2) { + EXPECT_EQ(2., after[i]); + } + else { + EXPECT_EQ(before[i], after[i]); + } } } - -TEST(multi_event_stream, any_time_before) { - constexpr std::size_t n = 10000; - std::minstd_rand R; - std::uniform_real_distribution<float> g(0, 10); - - std::vector<double> t(n); - std::generate(t.begin(), t.end(), [&]{ return g(R); }); - - memory::device_vector<double> t0 = memory::on_gpu(t); - - double tmin = *std::min_element(t.begin(), t.end()); - EXPECT_TRUE(gpu::any_time_before(n, t0.data(), tmin+0.01)); - EXPECT_FALSE(gpu::any_time_before(n, t0.data(), tmin)); - - memory::device_vector<double> t1 = memory::on_gpu(t); - EXPECT_FALSE(gpu::any_time_before(n, t0.data(), t1.data())); - - t[2*n/3] += 20; - t1 = memory::on_gpu(t); - EXPECT_TRUE(gpu::any_time_before(n, t0.data(), t1.data())); - - t[2*n/3] -= 30; - t1 = memory::on_gpu(t); - EXPECT_FALSE(gpu::any_time_before(n, t0.data(), t1.data())); -} - diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt index e013a998..45773d26 100644 --- a/tests/validation/CMakeLists.txt +++ b/tests/validation/CMakeLists.txt @@ -14,21 +14,6 @@ set(VALIDATION_SOURCES validate.cpp ) -set(VALIDATION_CUDA_SOURCES - # unit tests - validate_soma.cu - validate_ball_and_stick.cu - validate_kinetic.cu - validate_synapses.cu - - # support code - validation_data.cpp - trace_analysis.cpp - - # unit test driver - validate.cpp -) - if(NMC_VALIDATION_DATA_DIR) if ("${CMAKE_VERSION}" MATCHES "^3.[789].") message(WARNING "CMake ${CMAKE_VERSION} has broken FindCUDA; omitting NMC_DATADIR define.") @@ -38,33 +23,22 @@ if(NMC_VALIDATION_DATA_DIR) endif() add_executable(validate.exe ${VALIDATION_SOURCES}) -set(TARGETS validate.exe) -if(NMC_WITH_CUDA) - cuda_add_executable(validate_cuda.exe ${VALIDATION_CUDA_SOURCES}) - list(APPEND TARGETS validate_cuda.exe) - target_link_libraries(validate_cuda.exe LINK_PUBLIC gpu) -endif() - - -foreach(target ${TARGETS}) - target_link_libraries(${target} LINK_PUBLIC gtest) - target_link_libraries(${target} LINK_PUBLIC ${NESTMC_LIBRARIES}) - target_link_libraries(${target} LINK_PUBLIC ${EXTERNAL_LIBRARIES}) - - if(NMC_WITH_MPI) - target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES}) - set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") - endif() +target_link_libraries(validate.exe LINK_PUBLIC gtest) +target_link_libraries(validate.exe LINK_PUBLIC ${NESTMC_LIBRARIES}) +target_link_libraries(validate.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES}) - set_target_properties( - ${target} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" - ) +if(NMC_WITH_MPI) + target_link_libraries(validate.exe LINK_PUBLIC ${MPI_C_LIBRARIES}) + set_property(TARGET validate.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") +endif() - if(NMC_BUILD_VALIDATION_DATA) - add_dependencies(${target} validation_data) - endif() -endforeach() +set_target_properties( + validate.exe + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" +) +if(NMC_BUILD_VALIDATION_DATA) + add_dependencies(validate.exe validation_data) +endif() diff --git a/tests/validation/validate_ball_and_stick.cpp b/tests/validation/validate_ball_and_stick.cpp index e4c22efa..9d1a99dc 100644 --- a/tests/validation/validate_ball_and_stick.cpp +++ b/tests/validation/validate_ball_and_stick.cpp @@ -1,26 +1,228 @@ -#include "../gtest.h" -#include "validate_ball_and_stick.hpp" +#include <iostream> +#include <cell.hpp> +#include <common_types.hpp> #include <fvm_multicell.hpp> +#include <load_balance.hpp> +#include <hardware/node_info.hpp> +#include <hardware/gpu.hpp> +#include <model.hpp> +#include <recipe.hpp> +#include <segment.hpp> +#include <simple_sampler.hpp> +#include <util/meta.hpp> +#include <util/path.hpp> +#include <json/json.hpp> + +#include "../common_cells.hpp" +#include "../simple_recipes.hpp" +#include "convergence_test.hpp" +#include "trace_analysis.hpp" +#include "validation_data.hpp" + +#include "../gtest.h" + +using namespace nest::mc; + +struct probe_point { + const char* label; + segment_location where; +}; + +template <typename ProbePointSeq> +void run_ncomp_convergence_test( + const char* model_name, + const util::path& ref_data_path, + backend_kind backend, + const cell& c, + ProbePointSeq& probe_points, + float t_end=100.f) +{ + using namespace nest::mc; + + auto max_ncomp = g_trace_io.max_ncomp(); + auto dt = g_trace_io.min_dt(); + auto sample_dt = g_trace_io.sample_dt(); + + nlohmann::json meta = { + {"name", "membrane voltage"}, + {"model", model_name}, + {"dt", dt}, + {"sim", "nestmc"}, + {"units", "mV"}, + {"backend_kind", to_string(backend)} + }; + + auto exclude = stimulus_ends(c); -const auto backend = nest::mc::backend_kind::multicore; + auto n_probe = util::size(probe_points); + std::vector<probe_label> plabels; + plabels.reserve(n_probe); + for (unsigned i = 0; i<n_probe; ++i) { + plabels.push_back(probe_label{probe_points[i].label, {0u, i}}); + } + + convergence_test_runner<int> runner("ncomp", plabels, meta); + runner.load_reference_data(ref_data_path); + + for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) { + for (auto& seg: c.segments()) { + if (!seg->is_soma()) { + seg->set_compartments(ncomp); + } + } + cable1d_recipe rec{c}; + for (const auto& p: probe_points) { + rec.add_probe(0, 0, cell_probe_address{p.where, cell_probe_address::membrane_voltage}); + } + + hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); + auto decomp = partition_load_balance(rec, nd); + model m(rec, decomp); + + runner.run(m, ncomp, sample_dt, t_end, dt, exclude); + } + runner.report(); + runner.assert_all_convergence(); +} + +void validate_ball_and_stick(nest::mc::backend_kind backend) { + using namespace nest::mc; + + cell c = make_cell_ball_and_stick(); + probe_point points[] = { + {"soma.mid", {0u, 0.5}}, + {"dend.mid", {1u, 0.5}}, + {"dend.end", {1u, 1.0}} + }; + + run_ncomp_convergence_test( + "ball_and_stick", + "neuron_ball_and_stick.json", + backend, + c, + points); +} + +void validate_ball_and_taper(nest::mc::backend_kind backend) { + using namespace nest::mc; + + cell c = make_cell_ball_and_taper(); + probe_point points[] = { + {"soma.mid", {0u, 0.5}}, + {"taper.mid", {1u, 0.5}}, + {"taper.end", {1u, 1.0}} + }; + + run_ncomp_convergence_test( + "ball_and_taper", + "neuron_ball_and_taper.json", + backend, + c, + points); +} + +void validate_ball_and_3stick(nest::mc::backend_kind backend) { + using namespace nest::mc; + + cell c = make_cell_ball_and_3stick(); + probe_point points[] = { + {"soma.mid", {0u, 0.5}}, + {"dend1.mid", {1u, 0.5}}, + {"dend1.end", {1u, 1.0}}, + {"dend2.mid", {2u, 0.5}}, + {"dend2.end", {2u, 1.0}}, + {"dend3.mid", {3u, 0.5}}, + {"dend3.end", {3u, 1.0}} + }; + + run_ncomp_convergence_test( + "ball_and_3stick", + "neuron_ball_and_3stick.json", + backend, + c, + points); +} + +void validate_rallpack1(nest::mc::backend_kind backend) { + using namespace nest::mc; + + cell c = make_cell_simple_cable(); + probe_point points[] = { + {"cable.x0.0", {1u, 0.0}}, + {"cable.x0.3", {1u, 0.3}}, + {"cable.x1.0", {1u, 1.0}} + }; + + run_ncomp_convergence_test( + "rallpack1", + "numeric_rallpack1.json", + backend, + c, + points, + 250.f); +} + +void validate_ball_and_squiggle(nest::mc::backend_kind backend) { + using namespace nest::mc; + + cell c = make_cell_ball_and_squiggle(); + probe_point points[] = { + {"soma.mid", {0u, 0.5}}, + {"dend.mid", {1u, 0.5}}, + {"dend.end", {1u, 1.0}} + }; + +#if 0 + // *temporarily* disabled: compartment division policy will + // be moved into backend policy classes. + + run_ncomp_convergence_test<lowered_cell_div<div_compartment_sampler>>( + "ball_and_squiggle_sampler", + "neuron_ball_and_squiggle.json", + c, + samplers); +#endif + + run_ncomp_convergence_test( + "ball_and_squiggle_integrator", + "neuron_ball_and_squiggle.json", + backend, + c, + points); +} TEST(ball_and_stick, neuron_ref) { - validate_ball_and_stick(backend); + validate_ball_and_stick(backend_kind::multicore); + if (hw::num_gpus()) { + validate_ball_and_stick(backend_kind::gpu); + } } TEST(ball_and_taper, neuron_ref) { - validate_ball_and_taper(backend); + validate_ball_and_taper(backend_kind::multicore); + if (hw::num_gpus()) { + validate_ball_and_taper(backend_kind::gpu); + } } TEST(ball_and_3stick, neuron_ref) { - validate_ball_and_3stick(backend); + validate_ball_and_3stick(backend_kind::multicore); + if (hw::num_gpus()) { + validate_ball_and_3stick(backend_kind::gpu); + } } TEST(rallpack1, numeric_ref) { - validate_rallpack1(backend); + validate_rallpack1(backend_kind::multicore); + if (hw::num_gpus()) { + validate_rallpack1(backend_kind::gpu); + } } TEST(ball_and_squiggle, neuron_ref) { - validate_ball_and_squiggle(backend); + validate_ball_and_squiggle(backend_kind::multicore); + if (hw::num_gpus()) { + validate_ball_and_squiggle(backend_kind::gpu); + } } diff --git a/tests/validation/validate_ball_and_stick.cu b/tests/validation/validate_ball_and_stick.cu deleted file mode 100644 index 1ad415c8..00000000 --- a/tests/validation/validate_ball_and_stick.cu +++ /dev/null @@ -1,26 +0,0 @@ -#include "../gtest.h" -#include "validate_ball_and_stick.hpp" - -#include <fvm_multicell.hpp> - -const auto backend = nest::mc::backend_kind::gpu; - -TEST(ball_and_stick, neuron_ref) { - validate_ball_and_stick(backend); -} - -TEST(ball_and_taper, neuron_ref) { - validate_ball_and_taper(backend); -} - -TEST(ball_and_3stick, neuron_ref) { - validate_ball_and_3stick(backend); -} - -TEST(rallpack1, numeric_ref) { - validate_rallpack1(backend); -} - -TEST(ball_and_squiggle, neuron_ref) { - validate_ball_and_squiggle(backend); -} diff --git a/tests/validation/validate_ball_and_stick.hpp b/tests/validation/validate_ball_and_stick.hpp deleted file mode 100644 index 516d82c7..00000000 --- a/tests/validation/validate_ball_and_stick.hpp +++ /dev/null @@ -1,191 +0,0 @@ -#include <json/json.hpp> - -#include <cell.hpp> -#include <common_types.hpp> -#include <fvm_multicell.hpp> -#include <load_balance.hpp> -#include <hardware/node_info.hpp> -#include <model.hpp> -#include <recipe.hpp> -#include <segment.hpp> -#include <simple_sampler.hpp> -#include <util/meta.hpp> -#include <util/path.hpp> - -#include "../gtest.h" - -#include "../common_cells.hpp" -#include "../simple_recipes.hpp" -#include "convergence_test.hpp" -#include "trace_analysis.hpp" -#include "validation_data.hpp" - -#include <iostream> - -struct probe_point { - const char* label; - nest::mc::segment_location where; -}; - -template <typename ProbePointSeq> -void run_ncomp_convergence_test( - const char* model_name, - const nest::mc::util::path& ref_data_path, - nest::mc::backend_kind backend, - const nest::mc::cell& c, - ProbePointSeq& probe_points, - float t_end=100.f) -{ - using namespace nest::mc; - - auto max_ncomp = g_trace_io.max_ncomp(); - auto dt = g_trace_io.min_dt(); - auto sample_dt = g_trace_io.sample_dt(); - - nlohmann::json meta = { - {"name", "membrane voltage"}, - {"model", model_name}, - {"dt", dt}, - {"sim", "nestmc"}, - {"units", "mV"}, - {"backend_kind", to_string(backend)} - }; - - auto exclude = stimulus_ends(c); - - auto n_probe = util::size(probe_points); - std::vector<probe_label> plabels; - plabels.reserve(n_probe); - for (unsigned i = 0; i<n_probe; ++i) { - plabels.push_back(probe_label{probe_points[i].label, {0u, i}}); - } - - convergence_test_runner<int> runner("ncomp", plabels, meta); - runner.load_reference_data(ref_data_path); - - for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) { - for (auto& seg: c.segments()) { - if (!seg->is_soma()) { - seg->set_compartments(ncomp); - } - } - cable1d_recipe rec{c}; - for (const auto& p: probe_points) { - rec.add_probe(0, 0, cell_probe_address{p.where, cell_probe_address::membrane_voltage}); - } - - hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); - auto decomp = partition_load_balance(rec, nd); - model m(rec, decomp); - - runner.run(m, ncomp, sample_dt, t_end, dt, exclude); - } - runner.report(); - runner.assert_all_convergence(); -} - -void validate_ball_and_stick(nest::mc::backend_kind backend) { - using namespace nest::mc; - - cell c = make_cell_ball_and_stick(); - probe_point points[] = { - {"soma.mid", {0u, 0.5}}, - {"dend.mid", {1u, 0.5}}, - {"dend.end", {1u, 1.0}} - }; - - run_ncomp_convergence_test( - "ball_and_stick", - "neuron_ball_and_stick.json", - backend, - c, - points); -} - -void validate_ball_and_taper(nest::mc::backend_kind backend) { - using namespace nest::mc; - - cell c = make_cell_ball_and_taper(); - probe_point points[] = { - {"soma.mid", {0u, 0.5}}, - {"taper.mid", {1u, 0.5}}, - {"taper.end", {1u, 1.0}} - }; - - run_ncomp_convergence_test( - "ball_and_taper", - "neuron_ball_and_taper.json", - backend, - c, - points); -} - -void validate_ball_and_3stick(nest::mc::backend_kind backend) { - using namespace nest::mc; - - cell c = make_cell_ball_and_3stick(); - probe_point points[] = { - {"soma.mid", {0u, 0.5}}, - {"dend1.mid", {1u, 0.5}}, - {"dend1.end", {1u, 1.0}}, - {"dend2.mid", {2u, 0.5}}, - {"dend2.end", {2u, 1.0}}, - {"dend3.mid", {3u, 0.5}}, - {"dend3.end", {3u, 1.0}} - }; - - run_ncomp_convergence_test( - "ball_and_3stick", - "neuron_ball_and_3stick.json", - backend, - c, - points); -} - -void validate_rallpack1(nest::mc::backend_kind backend) { - using namespace nest::mc; - - cell c = make_cell_simple_cable(); - probe_point points[] = { - {"cable.x0.0", {1u, 0.0}}, - {"cable.x0.3", {1u, 0.3}}, - {"cable.x1.0", {1u, 1.0}} - }; - - run_ncomp_convergence_test( - "rallpack1", - "numeric_rallpack1.json", - backend, - c, - points, - 250.f); -} - -void validate_ball_and_squiggle(nest::mc::backend_kind backend) { - using namespace nest::mc; - - cell c = make_cell_ball_and_squiggle(); - probe_point points[] = { - {"soma.mid", {0u, 0.5}}, - {"dend.mid", {1u, 0.5}}, - {"dend.end", {1u, 1.0}} - }; - -#if 0 - // *temporarily* disabled: compartment division policy will - // be moved into backend policy classes. - - run_ncomp_convergence_test<lowered_cell_div<div_compartment_sampler>>( - "ball_and_squiggle_sampler", - "neuron_ball_and_squiggle.json", - c, - samplers); -#endif - - run_ncomp_convergence_test( - "ball_and_squiggle_integrator", - "neuron_ball_and_squiggle.json", - backend, - c, - points); -} diff --git a/tests/validation/validate_kinetic.cpp b/tests/validation/validate_kinetic.cpp index a9d95faa..6815a91c 100644 --- a/tests/validation/validate_kinetic.cpp +++ b/tests/validation/validate_kinetic.cpp @@ -1,13 +1,118 @@ -#include "validate_kinetic.hpp" - #include "../gtest.h" -const auto backend = nest::mc::backend_kind::multicore; +#include <json/json.hpp> + +#include <common_types.hpp> +#include <cell.hpp> +#include <fvm_multicell.hpp> +#include <hardware/node_info.hpp> +#include <hardware/gpu.hpp> +#include <load_balance.hpp> +#include <model.hpp> +#include <recipe.hpp> +#include <simple_sampler.hpp> +#include <util/rangeutil.hpp> + +#include "../common_cells.hpp" +#include "../simple_recipes.hpp" + +#include "convergence_test.hpp" +#include "trace_analysis.hpp" +#include "validation_data.hpp" + +void run_kinetic_dt( + nest::mc::backend_kind backend, + nest::mc::cell& c, + float t_end, + nlohmann::json meta, + const std::string& ref_file) +{ + using namespace nest::mc; + + float sample_dt = g_trace_io.sample_dt(); + + cable1d_recipe rec{c}; + rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage}); + probe_label plabels[1] = {"soma.mid", {0u, 0u}}; + + meta["sim"] = "nestmc"; + meta["backend_kind"] = to_string(backend); + + convergence_test_runner<float> runner("dt", plabels, meta); + runner.load_reference_data(ref_file); + + hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); + auto decomp = partition_load_balance(rec, nd); + model model(rec, decomp); + + auto exclude = stimulus_ends(c); + + // use dt = 0.05, 0.02, 0.01, 0.005, 0.002, ... + double max_oo_dt = std::round(1.0/g_trace_io.min_dt()); + for (double base = 100; ; base *= 10) { + for (double multiple: {5., 2., 1.}) { + double oo_dt = base/multiple; + if (oo_dt>max_oo_dt) goto end; + + model.reset(); + float dt = float(1./oo_dt); + runner.run(model, dt, sample_dt, t_end, dt, exclude); + } + } + +end: + runner.report(); + runner.assert_all_convergence(); +} + +void validate_kinetic_kin1(nest::mc::backend_kind backend) { + using namespace nest::mc; + + // 20 µm diameter soma with single mechanism, current probe + cell c; + auto soma = c.add_soma(10); + soma->add_mechanism(std::string("test_kin1")); + + nlohmann::json meta = { + {"model", "test_kin1"}, + {"name", "membrane current"}, + {"units", "nA"} + }; + + run_kinetic_dt(backend, c, 100.f, meta, "numeric_kin1.json"); +} + +void validate_kinetic_kinlva(nest::mc::backend_kind backend) { + using namespace nest::mc; + + // 20 µm diameter soma with single mechanism, current probe + cell c; + auto soma = c.add_soma(10); + c.add_stimulus({0,0.5}, {20., 130., -0.025}); + soma->add_mechanism(std::string("test_kinlva")); + + nlohmann::json meta = { + {"model", "test_kinlva"}, + {"name", "membrane voltage"}, + {"units", "mV"} + }; + + run_kinetic_dt(backend, c, 300.f, meta, "numeric_kinlva.json"); +} + + +using namespace nest::mc; TEST(kinetic, kin1_numeric_ref) { - validate_kinetic_kin1(backend); + validate_kinetic_kin1(backend_kind::multicore); + if (hw::num_gpus()) { + validate_kinetic_kin1(nest::mc::backend_kind::gpu); + } } TEST(kinetic, kinlva_numeric_ref) { - validate_kinetic_kinlva(backend); + validate_kinetic_kinlva(backend_kind::multicore); + if (hw::num_gpus()) { + validate_kinetic_kinlva(nest::mc::backend_kind::gpu); + } } diff --git a/tests/validation/validate_kinetic.cu b/tests/validation/validate_kinetic.cu deleted file mode 100644 index afc51031..00000000 --- a/tests/validation/validate_kinetic.cu +++ /dev/null @@ -1,13 +0,0 @@ -#include "validate_kinetic.hpp" - -#include "../gtest.h" - -const auto backend = nest::mc::backend_kind::gpu; - -TEST(kinetic, kin1_numeric_ref) { - validate_kinetic_kin1(backend); -} - -TEST(kinetic, kinlva_numeric_ref) { - validate_kinetic_kinlva(backend); -} diff --git a/tests/validation/validate_kinetic.hpp b/tests/validation/validate_kinetic.hpp deleted file mode 100644 index 91932493..00000000 --- a/tests/validation/validate_kinetic.hpp +++ /dev/null @@ -1,99 +0,0 @@ -#include <json/json.hpp> - -#include <common_types.hpp> -#include <cell.hpp> -#include <fvm_multicell.hpp> -#include <hardware/node_info.hpp> -#include <load_balance.hpp> -#include <model.hpp> -#include <recipe.hpp> -#include <simple_sampler.hpp> -#include <util/rangeutil.hpp> - -#include "../common_cells.hpp" -#include "../simple_recipes.hpp" - -#include "convergence_test.hpp" -#include "trace_analysis.hpp" -#include "validation_data.hpp" - -void run_kinetic_dt( - nest::mc::backend_kind backend, - nest::mc::cell& c, - float t_end, - nlohmann::json meta, - const std::string& ref_file) -{ - using namespace nest::mc; - - float sample_dt = g_trace_io.sample_dt(); - - cable1d_recipe rec{c}; - rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage}); - probe_label plabels[1] = {"soma.mid", {0u, 0u}}; - - meta["sim"] = "nestmc"; - meta["backend_kind"] = to_string(backend); - - convergence_test_runner<float> runner("dt", plabels, meta); - runner.load_reference_data(ref_file); - - hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); - auto decomp = partition_load_balance(rec, nd); - model model(rec, decomp); - - auto exclude = stimulus_ends(c); - - // use dt = 0.05, 0.02, 0.01, 0.005, 0.002, ... - double max_oo_dt = std::round(1.0/g_trace_io.min_dt()); - for (double base = 100; ; base *= 10) { - for (double multiple: {5., 2., 1.}) { - double oo_dt = base/multiple; - if (oo_dt>max_oo_dt) goto end; - - model.reset(); - float dt = float(1./oo_dt); - runner.run(model, dt, sample_dt, t_end, dt, exclude); - } - } - -end: - runner.report(); - runner.assert_all_convergence(); -} - -void validate_kinetic_kin1(nest::mc::backend_kind backend) { - using namespace nest::mc; - - // 20 µm diameter soma with single mechanism, current probe - cell c; - auto soma = c.add_soma(10); - soma->add_mechanism(std::string("test_kin1")); - - nlohmann::json meta = { - {"model", "test_kin1"}, - {"name", "membrane current"}, - {"units", "nA"} - }; - - run_kinetic_dt(backend, c, 100.f, meta, "numeric_kin1.json"); -} - -void validate_kinetic_kinlva(nest::mc::backend_kind backend) { - using namespace nest::mc; - - // 20 µm diameter soma with single mechanism, current probe - cell c; - auto soma = c.add_soma(10); - c.add_stimulus({0,0.5}, {20., 130., -0.025}); - soma->add_mechanism(std::string("test_kinlva")); - - nlohmann::json meta = { - {"model", "test_kinlva"}, - {"name", "membrane voltage"}, - {"units", "mV"} - }; - - run_kinetic_dt(backend, c, 300.f, meta, "numeric_kinlva.json"); -} - diff --git a/tests/validation/validate_soma.cpp b/tests/validation/validate_soma.cpp index acb74861..a41f4353 100644 --- a/tests/validation/validate_soma.cpp +++ b/tests/validation/validate_soma.cpp @@ -1,10 +1,74 @@ -#include "validate_soma.hpp" +#include <json/json.hpp> + +#include <common_types.hpp> +#include <cell.hpp> +#include <fvm_multicell.hpp> +#include <hardware/gpu.hpp> +#include <hardware/node_info.hpp> +#include <load_balance.hpp> +#include <model.hpp> +#include <recipe.hpp> +#include <simple_sampler.hpp> +#include <util/rangeutil.hpp> + +#include "../common_cells.hpp" +#include "../simple_recipes.hpp" + +#include "convergence_test.hpp" +#include "trace_analysis.hpp" +#include "validation_data.hpp" #include "../gtest.h" +using namespace nest::mc; + +void validate_soma(backend_kind backend) { + float sample_dt = g_trace_io.sample_dt(); + + cell c = make_cell_soma_only(); + + cable1d_recipe rec{c}; + rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage}); + probe_label plabels[1] = {"soma.mid", {0u, 0u}}; + + hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); + auto decomp = partition_load_balance(rec, nd); + model m(rec, decomp); -const auto backend = nest::mc::backend_kind::multicore; + nlohmann::json meta = { + {"name", "membrane voltage"}, + {"model", "soma"}, + {"sim", "nestmc"}, + {"units", "mV"}, + {"backend_kind", to_string(backend)} + }; + + convergence_test_runner<float> runner("dt", plabels, meta); + runner.load_reference_data("numeric_soma.json"); + + float t_end = 100.f; + + // use dt = 0.05, 0.02, 0.01, 0.005, 0.002, ... + double max_oo_dt = std::round(1.0/g_trace_io.min_dt()); + for (double base = 100; ; base *= 10) { + for (double multiple: {5., 2., 1.}) { + double oo_dt = base/multiple; + if (oo_dt>max_oo_dt) goto end; + + m.reset(); + float dt = float(1./oo_dt); + runner.run(m, dt, sample_dt, t_end, dt, {}); + } + } +end: + + runner.report(); + runner.assert_all_convergence(); +} TEST(soma, numeric_ref) { - validate_soma(backend); + validate_soma(backend_kind::multicore); + if (hw::num_gpus()) { + validate_soma(backend_kind::gpu); + } } diff --git a/tests/validation/validate_soma.cu b/tests/validation/validate_soma.cu deleted file mode 100644 index f726155d..00000000 --- a/tests/validation/validate_soma.cu +++ /dev/null @@ -1,9 +0,0 @@ -#include "validate_soma.hpp" - -#include "../gtest.h" - -const auto backend = nest::mc::backend_kind::gpu; - -TEST(soma, numeric_ref) { - validate_soma(backend); -} diff --git a/tests/validation/validate_soma.hpp b/tests/validation/validate_soma.hpp deleted file mode 100644 index edc985b8..00000000 --- a/tests/validation/validate_soma.hpp +++ /dev/null @@ -1,64 +0,0 @@ -#include <json/json.hpp> - -#include <common_types.hpp> -#include <cell.hpp> -#include <fvm_multicell.hpp> -#include <hardware/node_info.hpp> -#include <load_balance.hpp> -#include <model.hpp> -#include <recipe.hpp> -#include <simple_sampler.hpp> -#include <util/rangeutil.hpp> - -#include "../common_cells.hpp" -#include "../simple_recipes.hpp" - -#include "convergence_test.hpp" -#include "trace_analysis.hpp" -#include "validation_data.hpp" - -void validate_soma(nest::mc::backend_kind backend) { - using namespace nest::mc; - - float sample_dt = g_trace_io.sample_dt(); - - cell c = make_cell_soma_only(); - - cable1d_recipe rec{c}; - rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage}); - probe_label plabels[1] = {"soma.mid", {0u, 0u}}; - - hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); - auto decomp = partition_load_balance(rec, nd); - model m(rec, decomp); - - nlohmann::json meta = { - {"name", "membrane voltage"}, - {"model", "soma"}, - {"sim", "nestmc"}, - {"units", "mV"}, - {"backend_kind", to_string(backend)} - }; - - convergence_test_runner<float> runner("dt", plabels, meta); - runner.load_reference_data("numeric_soma.json"); - - float t_end = 100.f; - - // use dt = 0.05, 0.02, 0.01, 0.005, 0.002, ... - double max_oo_dt = std::round(1.0/g_trace_io.min_dt()); - for (double base = 100; ; base *= 10) { - for (double multiple: {5., 2., 1.}) { - double oo_dt = base/multiple; - if (oo_dt>max_oo_dt) goto end; - - m.reset(); - float dt = float(1./oo_dt); - runner.run(m, dt, sample_dt, t_end, dt, {}); - } - } -end: - - runner.report(); - runner.assert_all_convergence(); -} diff --git a/tests/validation/validate_synapses.cpp b/tests/validation/validate_synapses.cpp index 1a8bbf03..557a7470 100644 --- a/tests/validation/validate_synapses.cpp +++ b/tests/validation/validate_synapses.cpp @@ -1,16 +1,102 @@ +#include <cell.hpp> +#include <cell_group.hpp> #include <fvm_multicell.hpp> +#include <hardware/node_info.hpp> +#include <hardware/gpu.hpp> +#include <json/json.hpp> +#include <load_balance.hpp> +#include <model.hpp> +#include <recipe.hpp> +#include <simple_sampler.hpp> +#include <util/path.hpp> #include "../gtest.h" -#include "validate_synapses.hpp" -const auto backend = nest::mc::backend_kind::multicore; +#include "../common_cells.hpp" +#include "../simple_recipes.hpp" + +#include "convergence_test.hpp" +#include "trace_analysis.hpp" +#include "validation_data.hpp" + +using namespace nest::mc; + +void run_synapse_test( + const char* syn_type, + const util::path& ref_data_path, + backend_kind backend, + float t_end=70.f, + float dt=0.001) +{ + auto max_ncomp = g_trace_io.max_ncomp(); + nlohmann::json meta = { + {"name", "membrane voltage"}, + {"model", syn_type}, + {"sim", "nestmc"}, + {"units", "mV"}, + {"backend_kind", to_string(backend)} + }; + + cell c = make_cell_ball_and_stick(false); // no stimuli + parameter_list syn_default(syn_type); + c.add_synapse({1, 0.5}, syn_default); + + // injected spike events + std::vector<postsynaptic_spike_event> synthetic_events = { + {{0u, 0u}, 10.0, 0.04}, + {{0u, 0u}, 20.0, 0.04}, + {{0u, 0u}, 40.0, 0.04} + }; + + // exclude points of discontinuity from linf analysis + std::vector<float> exclude = {10.f, 20.f, 40.f}; + + float sample_dt = g_trace_io.sample_dt(); + probe_label plabels[3] = { + {"soma.mid", {0u, 0u}}, + {"dend.mid", {0u, 1u}}, + {"dend.end", {0u, 2u}} + }; + + convergence_test_runner<int> runner("ncomp", plabels, meta); + runner.load_reference_data(ref_data_path); + + hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); + for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) { + c.cable(1)->set_compartments(ncomp); + + cable1d_recipe rec{c}; + // soma.mid + rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage}); + // dend.mid + rec.add_probe(0, 0, cell_probe_address{{1, 0.5}, cell_probe_address::membrane_voltage}); + // dend.end + rec.add_probe(0, 0, cell_probe_address{{1, 1.0}, cell_probe_address::membrane_voltage}); + + auto decomp = partition_load_balance(rec, nd); + model m(rec, decomp); + m.group(0).enqueue_events(synthetic_events); + + runner.run(m, ncomp, sample_dt, t_end, dt, exclude); + } + runner.report(); + runner.assert_all_convergence(); +} TEST(simple_synapse, expsyn_neuron_ref) { - SCOPED_TRACE("expsyn"); - run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend); + SCOPED_TRACE("expsyn-multicore"); + run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::multicore); + if (hw::num_gpus()) { + SCOPED_TRACE("expsyn-gpu"); + run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend_kind::gpu); + } } TEST(simple_synapse, exp2syn_neuron_ref) { - SCOPED_TRACE("exp2syn"); - run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend); + SCOPED_TRACE("exp2syn-multicore"); + run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::multicore); + if (hw::num_gpus()) { + SCOPED_TRACE("exp2syn-gpu"); + run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend_kind::gpu); + } } diff --git a/tests/validation/validate_synapses.cu b/tests/validation/validate_synapses.cu deleted file mode 100644 index 886bbdfb..00000000 --- a/tests/validation/validate_synapses.cu +++ /dev/null @@ -1,16 +0,0 @@ -#include <fvm_multicell.hpp> - -#include "../gtest.h" -#include "validate_synapses.hpp" - -const auto backend = nest::mc::backend_kind::gpu; - -TEST(simple_synapse, expsyn_neuron_ref) { - SCOPED_TRACE("expsyn"); - run_synapse_test("expsyn", "neuron_simple_exp_synapse.json", backend); -} - -TEST(simple_synapse, exp2syn_neuron_ref) { - SCOPED_TRACE("exp2syn"); - run_synapse_test("exp2syn", "neuron_simple_exp2_synapse.json", backend); -} diff --git a/tests/validation/validate_synapses.hpp b/tests/validation/validate_synapses.hpp deleted file mode 100644 index 429da370..00000000 --- a/tests/validation/validate_synapses.hpp +++ /dev/null @@ -1,84 +0,0 @@ -#include <json/json.hpp> - -#include <cell.hpp> -#include <cell_group.hpp> -#include <fvm_multicell.hpp> -#include <hardware/node_info.hpp> -#include <load_balance.hpp> -#include <model.hpp> -#include <recipe.hpp> -#include <simple_sampler.hpp> -#include <util/path.hpp> - -#include "../gtest.h" - -#include "../common_cells.hpp" -#include "../simple_recipes.hpp" - -#include "convergence_test.hpp" -#include "trace_analysis.hpp" -#include "validation_data.hpp" - -void run_synapse_test( - const char* syn_type, - const nest::mc::util::path& ref_data_path, - nest::mc::backend_kind backend, - float t_end=70.f, - float dt=0.001) -{ - using namespace nest::mc; - - auto max_ncomp = g_trace_io.max_ncomp(); - nlohmann::json meta = { - {"name", "membrane voltage"}, - {"model", syn_type}, - {"sim", "nestmc"}, - {"units", "mV"}, - {"backend_kind", to_string(backend)} - }; - - cell c = make_cell_ball_and_stick(false); // no stimuli - parameter_list syn_default(syn_type); - c.add_synapse({1, 0.5}, syn_default); - - // injected spike events - std::vector<postsynaptic_spike_event> synthetic_events = { - {{0u, 0u}, 10.0, 0.04}, - {{0u, 0u}, 20.0, 0.04}, - {{0u, 0u}, 40.0, 0.04} - }; - - // exclude points of discontinuity from linf analysis - std::vector<float> exclude = {10.f, 20.f, 40.f}; - - float sample_dt = g_trace_io.sample_dt(); - probe_label plabels[3] = { - {"soma.mid", {0u, 0u}}, - {"dend.mid", {0u, 1u}}, - {"dend.end", {0u, 2u}} - }; - - convergence_test_runner<int> runner("ncomp", plabels, meta); - runner.load_reference_data(ref_data_path); - - hw::node_info nd(1, backend==backend_kind::gpu? 1: 0); - for (int ncomp = 10; ncomp<max_ncomp; ncomp*=2) { - c.cable(1)->set_compartments(ncomp); - - cable1d_recipe rec{c}; - // soma.mid - rec.add_probe(0, 0, cell_probe_address{{0, 0.5}, cell_probe_address::membrane_voltage}); - // dend.mid - rec.add_probe(0, 0, cell_probe_address{{1, 0.5}, cell_probe_address::membrane_voltage}); - // dend.end - rec.add_probe(0, 0, cell_probe_address{{1, 1.0}, cell_probe_address::membrane_voltage}); - - auto decomp = partition_load_balance(rec, nd); - model m(rec, decomp); - m.group(0).enqueue_events(synthetic_events); - - runner.run(m, ncomp, sample_dt, t_end, dt, exclude); - } - runner.report(); - runner.assert_all_convergence(); -} -- GitLab