diff --git a/.gitignore b/.gitignore index c1833bba173b6133b0970eb5dcd6cae4a4c69791..ff2696ed75fe48bd54e9d654d2c139a2d9fa1c6b 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ *.swo *.swn *.swq +*.swm +*.swl # tar files *.tar diff --git a/CMakeLists.txt b/CMakeLists.txt index 44e40b639c2328b5512ad4c557a877d4d9cbec8f..8da46487de7eb56194909cbcb81a1262ff2a6444 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,7 +51,7 @@ if(WITH_MPI) endif() # Profiler support -set(WITH_PROFILING OFF CACHE BOOL "use built in profiling of miniapp" ) +set(WITH_PROFILING OFF CACHE BOOL "use built-in profiling of miniapp" ) if(WITH_PROFILING) add_definitions(-DWITH_PROFILING) endif() @@ -62,24 +62,45 @@ if(SYSTEM_CRAY) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -dynamic") endif() -# Cray systems -set(TARGET_KNL OFF CACHE BOOL "target Intel KNL architecture") -if(TARGET_KNL) +# vectorization target +set(VECTORIZE_TARGET "none" CACHE STRING "CPU target for vectorization {KNL,AVX,AVX2}") + +if(VECTORIZE_TARGET STREQUAL "KNL") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_KNL}") +elseif(VECTORIZE_TARGET STREQUAL "AVX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_AVX}") +elseif(VECTORIZE_TARGET STREQUAL "AVX2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXXOPT_AVX2}") endif() -# targets for extermal dependencies -include(ExternalProject) -externalproject_add(modparser - PREFIX ${CMAKE_BINARY_DIR}/external - CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/external" - "-DCMAKE_CXX_FLAGS=${SAVED_CXX_FLAGS}" - "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" - BINARY_DIR "${CMAKE_BINARY_DIR}/external/modparser" - STAMP_DIR "${CMAKE_BINARY_DIR}/external/" - TMP_DIR "${CMAKE_BINARY_DIR}/external/tmp" - SOURCE_DIR "${CMAKE_SOURCE_DIR}/external/modparser" +# whether to generate optimized kernels from NMODL +set(USE_OPTIMIZED_KERNELS OFF CACHE BOOL "generate optimized code that vectorizes with the Intel compiler") + +# Only build modcc if it has not already been installed. +# This is useful if cross compiling for KNL, when it is not desirable to compile +# modcc with the same flags that are used for the KNL target. +find_program(MODCC_BIN modcc) +set(modcc "${MODCC_BIN}") +set(use_external_modcc OFF BOOL) + +# the modcc executable was not found, so build our own copy +if(MODCC_BIN STREQUAL "MODCC_BIN-NOTFOUND") + include(ExternalProject) + externalproject_add(modparser + PREFIX ${CMAKE_BINARY_DIR}/external + CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/external" + "-DCMAKE_CXX_FLAGS=${SAVED_CXX_FLAGS}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" + BINARY_DIR "${CMAKE_BINARY_DIR}/external/modparser" + STAMP_DIR "${CMAKE_BINARY_DIR}/external/" + TMP_DIR "${CMAKE_BINARY_DIR}/external/tmp" + SOURCE_DIR "${CMAKE_SOURCE_DIR}/external/modparser" ) + # Set up environment to use the version of modcc that is compiled + # as the ExternalProject above. + set(use_external_modcc ON) + set(modcc "${CMAKE_BINARY_DIR}/external/bin/modcc") +endif() include_directories(${CMAKE_SOURCE_DIR}/external) @@ -88,7 +109,6 @@ include_directories(${CMAKE_SOURCE_DIR}/include) include_directories(${CMAKE_SOURCE_DIR}/src) include_directories(${CMAKE_SOURCE_DIR}/miniapp) include_directories(${CMAKE_SOURCE_DIR}) - if( "${WITH_TBB}" STREQUAL "ON" ) include_directories(${TBB_INCLUDE_DIRS}) endif() diff --git a/README.md b/README.md index dc2e7d720fa5452c15f2db6b2e7ad6001928cb85..32091a9364a52a70985abe288b50bfcc47d53d54 100644 --- a/README.md +++ b/README.md @@ -77,3 +77,147 @@ cmake <path to CMakeLists.txt> -DWITH_TBB=ON -DSYSTEM_CRAY=ON cmake <path to CMakeLists.txt> -DWITH_TBB=ON -DWITH_MPI=ON -DSYSTEM_CRAY=ON ``` + +# targetting KNL + +## build modparser + +The source to source compiler "modparser" that generates the C++/CUDA kernels for the ion channels and synapses is in a separate repository. +It is included in our project as a git submodule, and by default it will be built with the same compiler and flags that are used to build the miniapp and tests. + +This can cause problems if we are cross compiling, e.g. for KNL, because the modparser compiler might not be runnable on the compilation node. +CMake will look for the source to source compiler executable, `modcc`, in the `PATH` environment variable, and will use the version if finds instead of building its own. + +Modparser requires a C++11 compiler, and has been tested on GCC, Intel, and Clang compilers + - if the default compiler on your is some ancient version of gcc you might need to load a module/set the CC and CXX environment variables. + +```bash +git clone git@github.com:eth-cscs/modparser.git +cd modparser + +# example of setting a C++11 compiler +export CXX=`which gcc-4.8` + +cmake . +make -j + +# set path and test that you can see modcc +export PATH=`pwd`/bin:$PATH +which modcc +``` + +## set up environment + +- source the intel compilers +- source the TBB vars +- I have only tested with the latest stable version from online, not the version that comes installed sometimes with the Intel compilers. + +## build miniapp + +```bash +# clone the repo and set up the submodules +git clone TODO +cd cell_algorithms +git submodule init +git submodule update + +# make a path for out of source build +mkdir build_knl +cd build_knl + +## build miniapp + +# clone the repo and set up the submodules +git clone https://github.com/bcumming/cell_algorithms.git +cd cell_algorithms + +# checkout the knl branch +git checkout knl + +# setup submodules +git submodule init +git submodule update + +# make a path for out of source build +mkdir build_knl +cd build_knl + +# run cmake with all the magic flags +export CC=`which icc` +export CXX=`which icpc` +cmake .. -DCMAKE_BUILD_TYPE=release -DWITH_TBB=ON -DWITH_PROFILING=ON -DVECTORIZE_TARGET=KNL -DUSE_OPTIMIZED_KERNELS=ON +make -j +``` + +The flags passed into cmake are described: + - `-DCMAKE_BUILD_TYPE=release` : build in release mode with `-O3`. + - `-WITH_TBB=ON` : use TBB for threading on multicore + - `-DWITH_PROFILING=ON` : use internal profilers that print profiling report at end + - `-DVECTORIZE_TARGET=KNL` : generate AVX512 instructions, alternatively you can use: + - `AVX2` for Haswell & Broadwell + - `AVX` for Sandy Bridge and Ivy Bridge + - `-DUSE_OPTIMIZED_KERNELS=ON` : tell the source to source compiler to generate optimized kernels that use Intel extensions + - without these vectorized code will not be generated. + +## run tests + +Run some unit tests +```bash +cd tests +./test.exe +cd .. +``` + +## run miniapp + +The miniapp is the target for benchmarking. +First, we can run a small problem to check the build. +For the small test run, the parameters have the following meaning + - `-n 1000` : 1000 cells + - `-s 200` : 200 synapses per cell + - `-t 20` : simulated for 20ms + - `-p 0` : no file output of voltage traces + +The number of cells is the number of discrete tasks that are distributed to the threads in each large time integration period. +The number of synapses per cell is the amount of computational work per cell/task. +Realistic cells have anywhere in the range of 1,000-10,000 synapses per cell. + +```bash +cd miniapp + +# a small run to check that everything works +./miniapp.exe -n 1000 -s 200 -t 20 -p 0 + +# a larger run for generating meaninful benchmarks +./miniapp.exe -n 2000 -s 2000 -t 100 -p 0 +``` + +This generates the following profiler output (some reformatting to make the table work): + +``` + --------------------------------------- + | small | large | +-------------|-------------------|-------------------| +total | 0.791 100.0 | 38.593 100.0 | + stepping | 0.738 93.3 | 36.978 95.8 | + matrix | 0.406 51.3 | 6.034 15.6 | + solve | 0.308 38.9 | 4.534 11.7 | + setup | 0.082 10.4 | 1.260 3.3 | + other | 0.016 2.0 | 0.240 0.6 | + state | 0.194 24.5 | 23.235 60.2 | + expsyn | 0.158 20.0 | 22.679 58.8 | + hh | 0.014 1.7 | 0.215 0.6 | + pas | 0.003 0.4 | 0.053 0.1 | + other | 0.019 2.4 | 0.287 0.7 | + current | 0.107 13.5 | 7.106 18.4 | + expsyn | 0.047 5.9 | 6.118 15.9 | + pas | 0.028 3.5 | 0.476 1.2 | + hh | 0.006 0.7 | 0.096 0.2 | + other | 0.026 3.3 | 0.415 1.1 | + events | 0.005 0.6 | 0.125 0.3 | + sampling | 0.003 0.4 | 0.051 0.1 | + other | 0.024 3.0 | 0.428 1.1 | + other | 0.053 6.7 | 1.614 4.2 | +----------------------------------------------------- +``` + diff --git a/cmake/CompilerOptions.cmake b/cmake/CompilerOptions.cmake index 6dc09b961cc6a71ce69b1bf9cba5d6821e6949e8..c92c14c45eea9f6969c6395bc67964fa37f16b30 100644 --- a/cmake/CompilerOptions.cmake +++ b/cmake/CompilerOptions.cmake @@ -17,8 +17,9 @@ if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU") # compiler flags for generating KNL-specific AVX512 instructions # supported in gcc 4.9.x and later - #set(CXXOPT_KNL "-mavx512f") set(CXXOPT_KNL "-march=knl") + set(CXXOPT_AVX "-mavx") + set(CXXOPT_AVX2 "-march=core-avx2") endif() if(${CMAKE_CXX_COMPILER_ID} MATCHES "Intel") @@ -29,5 +30,7 @@ if(${CMAKE_CXX_COMPILER_ID} MATCHES "Intel") # compiler flags for generating KNL-specific AVX512 instructions set(CXXOPT_KNL "-xMIC-AVX512") + set(CXXOPT_AVX "-mavx") + set(CXXOPT_AVX2 "-march=core-avx2") endif() diff --git a/external/vector b/external/vector index 8284611f05b0fbe21a1f84630e2726015cb1d96d..c8678e80660cd63b293ade80ece8eed2249c1b06 160000 --- a/external/vector +++ b/external/vector @@ -1 +1 @@ -Subproject commit 8284611f05b0fbe21a1f84630e2726015cb1d96d +Subproject commit c8678e80660cd63b293ade80ece8eed2249c1b06 diff --git a/mechanisms/CMakeLists.txt b/mechanisms/CMakeLists.txt index cf183d75545c011dbb5fbb3436221ab7ab4d0933..80925e24320fe3a01d0c2eee24e5494e58ecd14a 100644 --- a/mechanisms/CMakeLists.txt +++ b/mechanisms/CMakeLists.txt @@ -1,14 +1,31 @@ +# the list of built-in mechanisms to be provided by default set(mechanisms pas hh expsyn exp2syn) -set(modcc "${CMAKE_BINARY_DIR}/external/bin/modcc") +# set the flags for the modcc compiler that converts NMODL +# files to C++/CUDA source. +set(modcc_flags "-t cpu") +if(USE_OPTIMIZED_KERNELS) # generate optimized kernels + set(modcc_flags ${modcc_flags} -O) +endif() +# generate source for each mechanism foreach(mech ${mechanisms}) set(mod "${CMAKE_CURRENT_SOURCE_DIR}/mod/${mech}.mod") set(hpp "${CMAKE_CURRENT_SOURCE_DIR}/${mech}.hpp") - add_custom_command(OUTPUT "${hpp}" - DEPENDS modparser "${mod}" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" - COMMAND "${modcc}" -t cpu -o "${hpp}" "${mod}") + if(use_external_modcc) + add_custom_command( + OUTPUT "${hpp}" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + COMMAND ${modcc} ${modcc_flags} ${mod} -o ${hpp} + ) + else() + add_custom_command( + OUTPUT "${hpp}" + DEPENDS modparser "${mod}" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + COMMAND ${modcc} ${modcc_flags} ${mod} -o ${hpp} + ) + endif() set_source_files_properties("${hpp}" PROPERTIES GENERATED TRUE) list(APPEND all_mod_hpps "${hpp}") endforeach() diff --git a/miniapp/io.cpp b/miniapp/io.cpp index 27b7b8bde17bfa3375b6fc9ccbf82a1d24070a32..344fd56135b232bef3563d6a3cc61b570868a360 100644 --- a/miniapp/io.cpp +++ b/miniapp/io.cpp @@ -19,7 +19,7 @@ namespace TCLAP { struct ArgTraits<nest::mc::util::optional<V>> { using ValueCategory = ValueLike; }; -} +} // namespace TCLAP namespace nest { namespace mc { @@ -122,7 +122,7 @@ cl_options read_options(int argc, char** argv) { 0.025, // dt false, // all_to_all false, // probe_soma_only - 1.0, // probe_ratio + 0.0, // probe_ratio "trace_", // trace_prefix util::nothing, // trace_max_gid diff --git a/miniapp/miniapp.cpp b/miniapp/miniapp.cpp index 4545ddce8677e2ff59f0232c890e96132282f14a..bc4133cb15ceecc5c96b8b7649f286980acb232f 100644 --- a/miniapp/miniapp.cpp +++ b/miniapp/miniapp.cpp @@ -96,9 +96,11 @@ int main(int argc, char** argv) { } // inject some artificial spikes, 1 per 20 neurons. - cell_gid_type spike_cell = 20*((cell_range.first+19)/20); - for (; spike_cell<cell_range.second; spike_cell+=20) { - m.add_artificial_spike({spike_cell,0u}); + std::vector<cell_gid_type> local_sources; + cell_gid_type first_spike_cell = 20*((cell_range.first+19)/20); + for (auto c=first_spike_cell; c<cell_range.second; c+=20) { + local_sources.push_back(c); + m.add_artificial_spike({c, 0}); } // attach samplers to all probes @@ -110,13 +112,25 @@ int main(int argc, char** argv) { } traces.push_back(make_trace(probe.id, probe.probe)); - m.attach_sampler(probe.id, make_trace_sampler(traces.back().get(),sample_dt)); + m.attach_sampler(probe.id, make_trace_sampler(traces.back().get(), sample_dt)); + } + + // dummy run of the model for one step to ensure that profiling is consistent + m.run(options.dt, options.dt); + + // reset the model + m.reset(); + // rest the source spikes + for (auto source : local_sources) { + m.add_artificial_spike({source, 0}); } // run model m.run(options.tfinal, options.dt); - util::profiler_output(0.001); + // output profile and diagnostic feedback + auto const num_steps = options.tfinal / options.dt; + util::profiler_output(0.001, m.num_cells()*num_steps); std::cout << "there were " << m.num_spikes() << " spikes\n"; // save traces diff --git a/src/cell_group.hpp b/src/cell_group.hpp index b0bbccbcb46757afe72695413cbc525b0d86eb9a..77276f49e52f12dc2b8205a7d24113d7409365f4 100644 --- a/src/cell_group.hpp +++ b/src/cell_group.hpp @@ -62,8 +62,10 @@ public: } void reset() { - remove_samplers(); - cell_.reset(); + clear_spikes(); + clear_events(); + reset_samplers(); + //initialize_cells(); for (auto& spike_source: spike_sources_) { spike_source.source.reset(cell_, 0.f); } @@ -144,16 +146,30 @@ public: spikes_.clear(); } + void clear_events() { + events_.clear(); + } + void add_sampler(cell_member_type probe_id, sampler_function s, time_type start_time = 0) { EXPECTS(probe_id.gid==gid_base_); auto sampler_index = uint32_t(samplers_.size()); samplers_.push_back({probe_handles_[probe_id.index], s}); + sampler_start_times_.push_back(start_time); sample_events_.push({sampler_index, start_time}); } void remove_samplers() { sample_events_.clear(); samplers_.clear(); + sampler_start_times_.clear(); + } + + void reset_samplers() { + // clear all pending sample events and reset to start at time 0 + sample_events_.clear(); + for(uint32_t i=0u; i<samplers_.size(); ++i) { + sample_events_.push({i, sampler_start_times_[i]}); + } } value_type probe(cell_member_type probe_id) const { @@ -178,6 +194,7 @@ private: /// pending samples to be taken event_queue<sample_event<time_type>> sample_events_; + std::vector<time_type> sampler_start_times_; /// the global id of the first target (e.g. a synapse) in this group index_type first_target_gid_; diff --git a/src/communication/communicator.hpp b/src/communication/communicator.hpp index 50ce644c2bc62904625fd74c68fe7a98de059395..13d4a54303da74aefbcab0bc4d0ecc3636d7f472 100644 --- a/src/communication/communicator.hpp +++ b/src/communication/communicator.hpp @@ -6,13 +6,13 @@ #include <random> #include <functional> -#include <spike.hpp> -#include <util/double_buffer.hpp> #include <algorithms.hpp> #include <connection.hpp> +#include <communication/gathered_vector.hpp> #include <event_queue.hpp> #include <spike.hpp> #include <util/debug.hpp> +#include <util/double_buffer.hpp> namespace nest { namespace mc { @@ -70,7 +70,7 @@ public: /// must be called after all connections have been added void construct() { if (!std::is_sorted(connections_.begin(), connections_.end())) { - std::sort(connections_.begin(), connections_.end()); + threading::sort(connections_); } } @@ -87,29 +87,26 @@ public: /// Perform exchange of spikes. /// /// Takes as input the list of local_spikes that were generated on the calling domain. - /// - /// Returns a vector of event queues, with one queue for each local cell group. The - /// events in each queue are all events that must be delivered to targets in that cell - /// group as a result of the global spike exchange. - std::vector<event_queue> exchange(const std::vector<spike_type>& local_spikes, - std::function<void(const std::vector<spike_type>&)> global_export_callback) - { + /// Returns the full global set of vectors, along with meta data about their partition + gathered_vector<spike_type> exchange(const std::vector<spike_type>& local_spikes) { // global all-to-all to gather a local copy of the global spike list on each node. auto global_spikes = communication_policy_.gather_spikes( local_spikes ); num_spikes_ += global_spikes.size(); + return global_spikes; + } - global_export_callback(global_spikes); - - // check each global spike in turn to see it generates local events. - // if so, make the events and insert them into the appropriate event list. + /// Check each global spike in turn to see it generates local events. + /// If so, make the events and insert them into the appropriate event list. + /// Return a vector that contains the event queues for each local cell group. + /// + /// Returns a vector of event queues, with one queue for each local cell group. The + /// events in each queue are all events that must be delivered to targets in that cell + /// group as a result of the global spike exchange. + std::vector<event_queue> make_event_queues(const gathered_vector<spike_type>& global_spikes) { auto queues = std::vector<event_queue>(num_groups_local()); - - for (auto spike : global_spikes) { + for (auto spike : global_spikes.values()) { // search for targets - auto targets = - std::equal_range( - connections_.begin(), connections_.end(), spike.source - ); + auto targets = std::equal_range(connections_.begin(), connections_.end(), spike.source); // generate an event for each target for (auto it=targets.first; it!=targets.second; ++it) { @@ -121,8 +118,7 @@ public: return queues; } - /// Returns the total number of global spikes over the duration - /// of the simulation + /// Returns the total number of global spikes over the duration of the simulation uint64_t num_spikes() const { return num_spikes_; } const std::vector<connection_type>& connections() const { @@ -133,6 +129,10 @@ public: return communication_policy_; } + void reset() { + num_spikes_ = 0; + } + private: std::size_t cell_group_index(cell_gid_type cell_gid) const { // this will be more elaborate when there is more than one cell per cell group diff --git a/src/communication/gathered_vector.hpp b/src/communication/gathered_vector.hpp new file mode 100644 index 0000000000000000000000000000000000000000..cf48192f628b665713a25b2b1a53f0df3af86936 --- /dev/null +++ b/src/communication/gathered_vector.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include <cstdint> +#include <numeric> +#include <vector> + +#include <algorithms.hpp> + +namespace nest { +namespace mc { + +template <typename T> +class gathered_vector { +public: + using value_type = T; + using count_type = unsigned; + + gathered_vector(std::vector<value_type>&& v, std::vector<count_type>&& p) : + values_(std::move(v)), + partition_(std::move(p)) + { + EXPECTS(std::is_sorted(partition_.begin(), partition_.end())); + EXPECTS(std::size_t(partition_.back()) == v.size()); + } + + /// the partition of distribution + const std::vector<count_type>& partition() const { + return partition_; + } + + /// the number of entries in the gathered vector in partiion i + count_type count(std::size_t i) const { + return partition_[i+1] - partition_[i]; + } + + /// the values in the gathered vector + const std::vector<value_type>& values() const { + return values_; + } + + /// the size of the gathered vector + std::size_t size() const { + return values_.size(); + } + +private: + std::vector<value_type> values_; + std::vector<count_type> partition_; +}; + +} // namespace mc +} // namespace nest diff --git a/src/communication/mpi.hpp b/src/communication/mpi.hpp index 5be71b6381bf8311bcab85d5396d64ba2c3eb26b..9ebb0ca412d83dbb600471417beb299fd1a55a8b 100644 --- a/src/communication/mpi.hpp +++ b/src/communication/mpi.hpp @@ -10,6 +10,7 @@ #include <mpi.h> #include <algorithms.hpp> +#include <communication/gathered_vector.hpp> namespace nest { namespace mc { @@ -121,6 +122,39 @@ namespace mpi { return buffer; } + /// Gather all of a distributed vector + /// Retains the meta data (i.e. vector partition) + template <typename T> + gathered_vector<T> gather_all_with_partition(const std::vector<T>& values) { + using gathered_type = gathered_vector<T>; + using count_type = typename gathered_vector<T>::count_type; + using traits = mpi_traits<T>; + + // We have to use int for the count and displs vectors instead + // of count_type because these are used as arguments to MPI_Allgatherv + // which expects int arguments. + auto counts = gather_all(int(values.size())); + for (auto& c : counts) { + c *= traits::count(); + } + auto displs = algorithms::make_index(counts); + + std::vector<T> buffer(displs.back()/traits::count()); + + MPI_Allgatherv( + // send buffer + values.data(), counts[rank()], traits::mpi_type(), + // receive buffer + buffer.data(), counts.data(), displs.data(), traits::mpi_type(), + MPI_COMM_WORLD + ); + + return gathered_type( + std::move(buffer), + std::vector<count_type>(displs.begin(), displs.end()) + ); + } + template <typename T> T reduce(T value, MPI_Op op, int root) { using traits = mpi_traits<T>; diff --git a/src/communication/mpi_global_policy.hpp b/src/communication/mpi_global_policy.hpp index 0e9ec33370695990e2e2fa0d8d03d4a1e8b26c26..d12beb6372abb01526186162247b45db6ddf75b9 100644 --- a/src/communication/mpi_global_policy.hpp +++ b/src/communication/mpi_global_policy.hpp @@ -10,6 +10,7 @@ #include <algorithms.hpp> #include <common_types.hpp> +#include <communication/gathered_vector.hpp> #include <communication/mpi.hpp> #include <spike.hpp> @@ -18,10 +19,10 @@ namespace mc { namespace communication { struct mpi_global_policy { - template <typename I, typename T> - static std::vector<spike<I, T>> - gather_spikes(const std::vector<spike<I,T>>& local_spikes) { - return mpi::gather_all(local_spikes); + template <typename Spike> + static gathered_vector<Spike> + gather_spikes(const std::vector<Spike>& local_spikes) { + return mpi::gather_all_with_partition(local_spikes); } static int id() { return mpi::rank(); } @@ -38,6 +39,11 @@ struct mpi_global_policy { return nest::mc::mpi::reduce(value, MPI_MAX); } + template <typename T> + static T sum(T value) { + return nest::mc::mpi::reduce(value, MPI_SUM); + } + template < typename T, typename = typename std::enable_if<std::is_integral<T>::value> diff --git a/src/communication/serial_global_policy.hpp b/src/communication/serial_global_policy.hpp index 9af5eae3fd0dd84be71cb6346018c607fa9ab3df..73d7b96b378da6880ea337f702cc13995d2f05ec 100644 --- a/src/communication/serial_global_policy.hpp +++ b/src/communication/serial_global_policy.hpp @@ -4,6 +4,7 @@ #include <type_traits> #include <vector> +#include <communication/gathered_vector.hpp> #include <spike.hpp> namespace nest { @@ -11,10 +12,10 @@ namespace mc { namespace communication { struct serial_global_policy { - template <typename I, typename T> - static const std::vector<spike<I, T>>& - gather_spikes(const std::vector<spike<I, T>>& local_spikes) { - return local_spikes; + template <typename Spike> + static gathered_vector<Spike> + gather_spikes(const std::vector<Spike>& local_spikes) { + return gathered_vector<Spike>(std::vector<Spike>(local_spikes), {0u, 1u}); } static int id() { @@ -35,6 +36,11 @@ struct serial_global_policy { return value; } + template <typename T> + static T sum(T value) { + return value; + } + template < typename T, typename = typename std::enable_if<std::is_integral<T>::value> diff --git a/src/model.hpp b/src/model.hpp index 4fa778e06cf366826774fe4c980f1270b52c3cb1..5d0ce4fd8bcbe010cec3629e4b5d1adde38bcd96 100644 --- a/src/model.hpp +++ b/src/model.hpp @@ -85,7 +85,20 @@ public: for (auto& group: cell_groups_) { group.reset(); } + communicator_.reset(); + + for(auto& q : current_events()) { + q.clear(); + } + for(auto& q : future_events()) { + q.clear(); + } + + current_spikes().clear(); + previous_spikes().clear(); + + util::profilers_restart(); } time_type run(time_type tfinal, time_type dt) { @@ -130,18 +143,23 @@ public: // the previous integration period, generating the postsynaptic // events that must be delivered at the start of the next // integration period at the latest. - - //TODO: - //An improvement might be : - //the exchange method simply exchanges spikes, and does not generate the event queues.It returns a struct that has both 1) the global spike list 2) an integer vector that describes the distribution of spikes across the ranks - //another method called something like build_queues that takes this spike info and returns the local spikes - // and the callbacks can then be called on the spike information directly in the model. auto exchange = [&] () { - PE("stepping", "exchange"); + PE("stepping", "communciation"); + + PE("exchange"); auto local_spikes = previous_spikes().gather(); + auto global_spikes = communicator_.exchange(local_spikes); + PL(); + + PE("spike output"); local_export_callback_(local_spikes); - future_events() = - communicator_.exchange(local_spikes, global_export_callback_); + global_export_callback_(global_spikes.values()); + PL(); + + PE("events"); + future_events() = communicator_.make_event_queues(global_spikes); + PL(); + PL(2); }; @@ -154,6 +172,7 @@ public: t_ = tuntil; } + return t_; } @@ -177,8 +196,16 @@ public: const std::vector<probe_record>& probes() const { return probes_; } - std::size_t num_spikes() const { return communicator_.num_spikes(); } - std::size_t num_groups() const { return cell_groups_.size(); } + std::size_t num_spikes() const { + return communicator_.num_spikes(); + } + std::size_t num_groups() const { + return cell_groups_.size(); + } + std::size_t num_cells() const { + // TODO: fix when the assumption that there is one cell per cell group is no longer valid + return num_groups(); + } // register a callback that will perform a export of the global // spike vector diff --git a/src/profiling/profiler.cpp b/src/profiling/profiler.cpp index 1e30449ab4b225385570292bea527fbcc6981c3d..83c15d78fa7f4c5d6a076d4ff534ef77fc891a57 100644 --- a/src/profiling/profiler.cpp +++ b/src/profiling/profiler.cpp @@ -1,9 +1,9 @@ #include <numeric> -#include "profiler.hpp" -#include "util/debug.hpp" - +#include <common_types.hpp> #include <communication/global_policy.hpp> +#include <profiling/profiler.hpp> +#include <util/debug.hpp> namespace nest { namespace mc { @@ -241,6 +241,17 @@ void profiler::stop() { deactivate(); } +void profiler::restart() { + if (!is_activated()) { + start(); + return; + } + deactivate(); + root_region_.clear(); + start(); +} + + profiler_node profiler::performance_tree() { if (is_activated()) { stop(); @@ -280,15 +291,22 @@ void profiler_leave(int nlevels) { get_profiler().leave(nlevels); } -// iterate over all profilers and ensure that they have the same start stop times -void stop_profilers() { +/// iterate over all profilers and ensure that they have the same start stop times +void profilers_stop() { for (auto& p : data::profilers_) { p.stop(); } } -void profiler_output(double threshold) { - stop_profilers(); +/// iterate over all profilers and reset +void profilers_restart() { + for (auto& p : data::profilers_) { + p.restart(); + } +} + +void profiler_output(double threshold, std::size_t num_local_work_items) { + profilers_stop(); // Find the earliest start time and latest stop time over all profilers // This can be used to calculate the wall time for this communicator. @@ -323,6 +341,11 @@ void profiler_output(double threshold) { auto ncomms = communication::global_policy::size(); auto comm_rank = communication::global_policy::id(); bool print = comm_rank==0 ? true : false; + + // calculate the throughput in terms of work items per second + auto local_throughput = num_local_work_items / wall_time; + auto global_throughput = communication::global_policy::sum(local_throughput); + if(print) { std::cout << " ---------------------------------------------------- \n"; std::cout << "| profiler |\n"; @@ -335,7 +358,6 @@ void profiler_output(double threshold) { std::snprintf( line, sizeof(line), "%-18s%10d\n", "communicators", int(ncomms)); - std::cout << line; std::snprintf( line, sizeof(line), "%-18s%10d\n", "threads", int(nthreads)); @@ -345,6 +367,15 @@ void profiler_output(double threshold) { "thread efficiency", float(efficiency)); std::cout << line << "\n"; p.print(std::cout, threshold); + std::cout << "\n"; + std::snprintf( + line, sizeof(line), "%-18s%10s%10s\n", + "", "local", "global"); + std::cout << line; + std::snprintf( + line, sizeof(line), "%-18s%10d%10d\n", + "throughput", int(local_throughput), int(global_throughput)); + std::cout << line; std::cout << "\n\n"; } @@ -354,6 +385,7 @@ void profiler_output(double threshold) { as_json["threads"] = nthreads; as_json["efficiency"] = efficiency; as_json["communicators"] = ncomms; + as_json["throughput"] = unsigned(local_throughput); as_json["rank"] = comm_rank; as_json["regions"] = p.as_json(); @@ -368,8 +400,9 @@ void profiler_stop() {} void profiler_enter(const char*) {} void profiler_leave() {} void profiler_leave(int) {} -void stop_profilers() {} -void profiler_output(double threshold) {} +void profilers_stop() {} +void profiler_output(double threshold, std::size_t num_local_work_items) {} +void profilers_restart() {}; #endif } // namespace util diff --git a/src/profiling/profiler.hpp b/src/profiling/profiler.hpp index 4332925a5b3ee46c8c07164a80c4acf5abd72989..a905416c49d295706bed32b7e0149948fa8f47d6 100644 --- a/src/profiling/profiler.hpp +++ b/src/profiling/profiler.hpp @@ -121,6 +121,11 @@ public: bool has_subregions() const { return subregions_.size() > 0; } + void clear() { + subregions_.clear(); + start_time(); + } + size_t hash() const { return hash_; } region_type* subregion(const char* n); @@ -170,6 +175,10 @@ public: /// stop (deactivate) the profiler void stop(); + /// restart the profiler + /// remove all trace information and restart timer for the root region + void restart(); + /// the time stamp at which the profiler was started (avtivated) timer_type::time_point start_time() const { return start_time_; } @@ -231,10 +240,13 @@ void profiler_leave(); void profiler_leave(int nlevels); /// iterate and stop them -void stop_profilers(); +void profilers_stop(); + +/// reset profilers +void profilers_restart(); /// print the collated profiler to std::cout -void profiler_output(double threshold); +void profiler_output(double threshold, std::size_t num_local_work_items); } // namespace util } // namespace mc diff --git a/src/thread_private_spike_store.hpp b/src/thread_private_spike_store.hpp index 1e14ab104619a42b8aa9f014172bb5716576017c..3f320fdd93709c622696e889f80c16588eb5ad66 100644 --- a/src/thread_private_spike_store.hpp +++ b/src/thread_private_spike_store.hpp @@ -69,6 +69,18 @@ private : threading::enumerable_thread_specific<std::vector<spike_type>>; local_spike_store_type buffers_; + +public : + using iterator = typename local_spike_store_type::iterator; + using const_iterator = typename local_spike_store_type::const_iterator; + + // make the container iterable + // we iterate of threads, not individual containers + + iterator begin() { return buffers_.begin(); } + iterator end() { return buffers_.begin(); } + const_iterator begin() const { return buffers_.begin(); } + const_iterator end() const { return buffers_.begin(); } }; } // namespace mc diff --git a/src/threading/serial.hpp b/src/threading/serial.hpp index c18d4800e3c457e140f0fe6d2ca671aa6ccac241..de9e3180271da68440b600918081e155fc34a1fb 100644 --- a/src/threading/serial.hpp +++ b/src/threading/serial.hpp @@ -4,6 +4,7 @@ #error "this header can only be loaded if WITH_SERIAL is set" #endif +#include <algorithm> #include <array> #include <chrono> #include <string> @@ -19,10 +20,10 @@ namespace threading { template <typename T> class enumerable_thread_specific { std::array<T, 1> data; - using iterator_type = typename std::array<T, 1>::iterator; - using const_iterator_type = typename std::array<T, 1>::const_iterator; - public : +public : + using iterator = typename std::array<T, 1>::iterator; + using const_iterator = typename std::array<T, 1>::const_iterator; enumerable_thread_specific() = default; @@ -39,14 +40,14 @@ class enumerable_thread_specific { auto size() -> decltype(data.size()) const { return data.size(); } - iterator_type begin() { return data.begin(); } - iterator_type end() { return data.end(); } + iterator begin() { return data.begin(); } + iterator end() { return data.end(); } - const_iterator_type begin() const { return data.begin(); } - const_iterator_type end() const { return data.end(); } + const_iterator begin() const { return data.begin(); } + const_iterator end() const { return data.end(); } - const_iterator_type cbegin() const { return data.cbegin(); } - const_iterator_type cend() const { return data.cend(); } + const_iterator cbegin() const { return data.cbegin(); } + const_iterator cend() const { return data.cend(); } }; @@ -62,10 +63,24 @@ struct parallel_for { } }; +template <typename RandomIt> +void sort(RandomIt begin, RandomIt end) { + std::sort(begin, end); +} + +template <typename RandomIt, typename Compare> +void sort(RandomIt begin, RandomIt end, Compare comp) { + std::sort(begin, end, comp); +} + +template <typename Container> +void sort(Container& c) { + std::sort(c.begin(), c.end()); +} + template <typename T> using parallel_vector = std::vector<T>; - inline std::string description() { return "serial"; } diff --git a/src/threading/tbb.hpp b/src/threading/tbb.hpp index 853ceefde731cb1cd4db323541132c935011c73a..91a7b59b44ed6eaa8394da7fd5f4fa6586556a84 100644 --- a/src/threading/tbb.hpp +++ b/src/threading/tbb.hpp @@ -51,6 +51,21 @@ using parallel_vector = tbb::concurrent_vector<T>; using task_group = tbb::task_group; +template <typename RandomIt> +void sort(RandomIt begin, RandomIt end) { + tbb::parallel_sort(begin, end); +} + +template <typename RandomIt, typename Compare> +void sort(RandomIt begin, RandomIt end, Compare comp) { + tbb::parallel_sort(begin, end, comp); +} + +template <typename Container> +void sort(Container& c) { + tbb::parallel_sort(c.begin(), c.end()); +} + } // threading } // mc } // nest diff --git a/src/util/range.hpp b/src/util/range.hpp index 7777444d0f65d7feebc03f8f3e988a234462cdf6..2439ca74ee785fac1f909e92b3189bbfcd6f5da2 100644 --- a/src/util/range.hpp +++ b/src/util/range.hpp @@ -87,9 +87,9 @@ struct range { std::swap(right, other.right); } - decltype(*left) front() const { return *left; } + auto front() const -> decltype(*left) { return *left; } - decltype(*left) back() const { return *upto(left, right); } + auto back() const -> decltype(*left) { return *upto(left, right); } template <typename V = iterator> enable_if_t<is_random_access_iterator<V>::value, decltype(*left)> @@ -200,7 +200,7 @@ public: // forward and input iterator requirements - auto operator*() const -> decltype(*iter()) { return *iter(); } + auto operator*() const -> decltype(*(this->iter())) { return *iter(); } I operator->() const { return e_.template ptr<0>(); } @@ -271,7 +271,7 @@ public: return iter()-x.iter(); } - auto operator[](difference_type n) const -> decltype(*iter()){ + auto operator[](difference_type n) const -> decltype(*(this->iter())) { return *(iter()+n); } diff --git a/tests/global_communication/CMakeLists.txt b/tests/global_communication/CMakeLists.txt index 82bb6583ea1310db1657e2a5110143fc484abd86..cc1e1af8b6567a2fc887815fa4eaeb25c561841e 100644 --- a/tests/global_communication/CMakeLists.txt +++ b/tests/global_communication/CMakeLists.txt @@ -3,6 +3,8 @@ set(HEADERS ) set(COMMUNICATION_SOURCES test_exporter_spike_file.cpp + test_communicator.cpp + # unit test driver test.cpp ) @@ -13,19 +15,20 @@ set(TARGETS global_communication.exe) foreach(target ${TARGETS}) target_link_libraries(${target} LINK_PUBLIC cellalgo gtest) - + if(WITH_TBB) - target_link_libraries(${target} LINK_PUBLIC ${TBB_LIBRARIES}) + target_link_libraries(${target} LINK_PUBLIC ${TBB_LIBRARIES}) endif() if(WITH_MPI) - target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES}) - set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") + target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES}) + set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") endif() - set_target_properties(${target} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" + set_target_properties( + ${target} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" ) endforeach() diff --git a/tests/global_communication/mpi_listener.hpp b/tests/global_communication/mpi_listener.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e33aaebb5cea2168baead70e3418567c10bcdd6e --- /dev/null +++ b/tests/global_communication/mpi_listener.hpp @@ -0,0 +1,155 @@ +#pragma once + +#include <cstdio> +#include <ostream> +#include <stdexcept> + +#include <communication/global_policy.hpp> + +#include "gtest.h" + +/// A specialized listener desinged for printing test results with MPI. +/// +/// When tests are run with MPI, one instance of each test is run on +/// each rank. The default behavior of Google Test is for each test +/// instance to print to stdout. With more than one MPI rank, this creates +/// the usual MPI mess of output. +/// +/// This specialization has the first rank (rank 0) print to stdout, and all MPI +/// ranks print their output to separate text files. +/// For each test a message is printed showing +/// - detailed messages about errors on rank 0 +/// - a head count of errors that occured on other MPI ranks + +class mpi_listener : public testing::EmptyTestEventListener { +private: + using UnitTest = testing::UnitTest; + using TestCase = testing::TestCase; + using TestInfo = testing::TestInfo; + using TestPartResult = testing::TestPartResult; + + int rank_; + int size_; + std::ofstream fid_; + char buffer_[1024]; + int test_case_failures_; + int test_case_tests_; + int test_failures_; + + bool does_print() const { + return rank_==0; + } + + void print(const char* s) { + if (fid_) { + fid_ << s; + } + if (does_print()) { + std::cout << s; + } + } + + void print(const std::string& s) { + print(s.c_str()); + } + + /// convenience function that handles the logic of using snprintf + /// and forwarding the results to file and/or stdout. + /// + /// TODO : it might be an idea to use a resizeable buffer + template <typename... Args> + void printf_helper(const char* s, Args&&... args) { + snprintf(buffer_, sizeof(buffer_), s, std::forward<Args>(args)...); + print(buffer_); + } + +public: + mpi_listener(std::string f_base="") { + rank_ = nest::mc::communication::global_policy::id(); + size_ = nest::mc::communication::global_policy::size(); + + if (f_base.empty()) { + return; + } + std::string fname = f_base + "_" + std::to_string(rank_) + ".txt"; + fid_.open(fname); + if (!fid_) { + throw std::runtime_error("could not open file " + fname + " for test output"); + } + } + + /// Messages that are printed at the start and end of the test program. + /// i.e. once only. + virtual void OnTestProgramStart(const UnitTest&) override { + printf_helper("*** test output for rank %d of %d\n\n", rank_, size_); + } + virtual void OnTestProgramEnd(const UnitTest&) override { + printf_helper("*** end test output for rank %d of %d\n", rank_, size_); + } + + /// Messages that are printed at the start and end of each test case. + /// On startup a counter that counts the number of tests that fail in + /// this test case is initialized to zero, and will be incremented for each + /// test that fails. + virtual void OnTestCaseStart(const TestCase& test_case) override { + test_case_failures_ = 0; + test_case_tests_ = 0; + } + virtual void OnTestCaseEnd(const TestCase& test_case) override { + printf_helper( + "[PASSED %3d; FAILED %3d] of %3d tests in %s\n\n", + test_case_tests_-test_case_failures_, + test_case_failures_, + test_case_tests_, + test_case.name() + ); + } + + // Called before a test starts. + virtual void OnTestStart(const TestInfo& test_info) override { + printf_helper( " TEST %s::%s\n", test_info.test_case_name(), test_info.name()); + test_failures_ = 0; + } + + // Called after a failed assertion or a SUCCEED() invocation. + virtual void OnTestPartResult(const TestPartResult& test_part_result) override { + const char* banner = "--------------------------------------------------------------------------------"; + + // indent all lines in the summary by 4 spaces + std::string summary = " " + std::string(test_part_result.summary()); + auto pos = summary.find("\n"); + while (pos!=summary.size() && pos!=std::string::npos) { + summary.replace(pos, 1, "\n "); + pos = summary.find("\n", pos+1); + } + + printf_helper( + " LOCAL_%s\n %s\n %s:%d\n%s\n %s\n", + test_part_result.failed() ? "FAIL" : "SUCCESS", + banner, + test_part_result.file_name(), + test_part_result.line_number(), + summary.c_str(), + banner + ); + + // note that there was a failure in this test case + if (test_part_result.failed()) { + test_failures_++; + } + } + + // Called after a test ends. + virtual void OnTestEnd(const TestInfo& test_info) override { + test_case_tests_++; + + // count the number of ranks that had errors + int global_errors = + nest::mc::communication::global_policy::sum(test_failures_>0 ? 1 : 0); + if (global_errors>0) { + test_case_failures_++; + printf_helper(" GLOBAL_FAIL on %d ranks\n", global_errors); + } + } +}; + diff --git a/tests/global_communication/test.cpp b/tests/global_communication/test.cpp index c67a065bd88ed85aa56e865b1d4f8f9fa3d5677e..1ddb6d41baf5d84cefc5b7f4e542a7cf79d2598c 100644 --- a/tests/global_communication/test.cpp +++ b/tests/global_communication/test.cpp @@ -5,15 +5,35 @@ #include "gtest.h" -#include "../../src/communication/communicator.hpp" -#include "../../src/communication/global_policy.hpp" +#include "mpi_listener.hpp" -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); +#include <communication/communicator.hpp> +#include <communication/global_policy.hpp> +#include <util/ioutil.hpp> + +using namespace nest::mc; +int main(int argc, char **argv) { // We need to set the communicator policy at the top level // this allows us to build multiple communicators in the tests - nest::mc::communication::global_policy_guard global_guard(argc, argv); + communication::global_policy_guard global_guard(argc, argv); + + // initialize google test environment + testing::InitGoogleTest(&argc, argv); + + // set up a custom listener that prints messages in an MPI-friendly way + auto& listeners = testing::UnitTest::GetInstance()->listeners(); + // first delete the original printer + delete listeners.Release(listeners.default_result_printer()); + // now add our custom printer + listeners.Append(new mpi_listener("results_global_communication")); + + // record the local return value for tests run on this mpi rank + // 0 : success + // 1 : failure + auto result = RUN_ALL_TESTS(); - return RUN_ALL_TESTS(); + // perform global collective, to ensure that all ranks return + // the same exit code + return communication::global_policy::max(result); } diff --git a/tests/global_communication/test_communicator.cpp b/tests/global_communication/test_communicator.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f79d4e2d6e42b382bc4b54e16d29187ea531b7c0 --- /dev/null +++ b/tests/global_communication/test_communicator.cpp @@ -0,0 +1,35 @@ +#include "gtest.h" + +#include <cstdio> +#include <fstream> +#include <iostream> +#include <string> +#include <vector> + +#include <communication/communicator.hpp> +#include <communication/global_policy.hpp> + +using namespace nest::mc; + +using time_type = float; +using communicator_type = communication::communicator<time_type, communication::global_policy>; + +TEST(communicator, setup) { + /* + using policy = communication::global_policy; + + auto num_domains = policy::size(); + auto rank = policy::id(); + + auto counts = policy.gather_all(1); + EXPECT_EQ(counts.size(), unsigned(num_domains)); + for(auto i : counts) { + EXPECT_EQ(i, 1); + } + + auto part = util::parition_view(counts); + for(auto p : part) { + EXPECT_EQ(p.second-p.first, 1); + } + */ +} diff --git a/tests/performance/io/CMakeLists.txt b/tests/performance/io/CMakeLists.txt index 5d6149398089611dc161fc6591ebabe9bfc9db91..5bfe045ebf782c298702bd107201c9c343a2a297 100644 --- a/tests/performance/io/CMakeLists.txt +++ b/tests/performance/io/CMakeLists.txt @@ -9,6 +9,10 @@ add_executable(disk_io.exe ${DISK_IO_SOURCES} ${HEADERS}) target_link_libraries(disk_io.exe LINK_PUBLIC cellalgo) +if(WITH_TBB) + target_link_libraries(disk_io.exe LINK_PUBLIC ${TBB_LIBRARIES}) +endif() + if(WITH_MPI) target_link_libraries(disk_io.exe LINK_PUBLIC ${MPI_C_LIBRARIES}) set_property(TARGET disk_io.exe APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") diff --git a/tests/unit/test_algorithms.cpp b/tests/unit/test_algorithms.cpp index 2e82bcc9a6effc4e0fdbb70bbad3d7cddf443aba..7c4f9a12a717cd9929fa7ec3de14157b9c88affb 100644 --- a/tests/unit/test_algorithms.cpp +++ b/tests/unit/test_algorithms.cpp @@ -1,3 +1,4 @@ +#include <random> #include <vector> #include "gtest.h" @@ -6,6 +7,28 @@ #include "../test_util.hpp" #include "util/debug.hpp" +/// tests the sort implementation in threading +/// is only parallel if TBB is being used +TEST(algorithms, parallel_sort) +{ + auto n = 10000; + std::vector<int> v(n); + std::iota(v.begin(), v.end(), 1); + + // intialize with the default random seed + std::shuffle(v.begin(), v.end(), std::mt19937()); + + // assert that the original vector has in fact been permuted + EXPECT_FALSE(std::is_sorted(v.begin(), v.end())); + + nest::mc::threading::sort(v); + + EXPECT_TRUE(std::is_sorted(v.begin(), v.end())); + for(auto i=0; i<n; ++i) { + EXPECT_EQ(i+1, v[i]); + } +} + TEST(algorithms, sum) { @@ -14,9 +37,12 @@ TEST(algorithms, sum) EXPECT_EQ(10*2, nest::mc::algorithms::sum(v1)); // make an array 1:20 and sum it up using formula for arithmetic sequence - std::vector<int> v2(20); - std::iota(v2.begin(), v2.end(), 1); auto n = 20; + std::vector<int> v2(n); + // can't use iota because the Intel compiler optimizes it out, despite + // the result being required in EXPECT_EQ + // std::iota(v2.begin(), v2.end(), 1); + for(auto i=0; i<n; ++i) { v2[i] = i+1; } EXPECT_EQ((n+1)*n/2, nest::mc::algorithms::sum(v2)); } diff --git a/tests/unit/test_synapses.cpp b/tests/unit/test_synapses.cpp index 8cd9de04f57625ec7416e5a571ab24447c1a6cae..357a5a2a8a3f166476474a2c538a01f34d2f2853 100644 --- a/tests/unit/test_synapses.cpp +++ b/tests/unit/test_synapses.cpp @@ -58,30 +58,33 @@ TEST(synapses, expsyn_basic_state) auto ptr = dynamic_cast<synapse_type*>(mech.get()); + auto n = ptr->size(); + using view = synapse_type::view_type; + // parameters initialized to default values - for(auto e : ptr->e) { + for(auto e : view(ptr->e, n)) { EXPECT_EQ(e, 0.); } - for(auto tau : ptr->tau) { + for(auto tau : view(ptr->tau, n)) { EXPECT_EQ(tau, 2.0); } // current and voltage vectors correctly hooked up - for(auto v : ptr->vec_v_) { + for(auto v : view(ptr->vec_v_, n)) { EXPECT_EQ(v, -65.); } - for(auto i : ptr->vec_i_) { + for(auto i : view(ptr->vec_i_, n)) { EXPECT_EQ(i, 1.0); } // should be initialized to NaN - for(auto g : ptr->g) { + for(auto g : view(ptr->g, n)) { EXPECT_NE(g, g); } // initialize state then check g has been set to zero ptr->nrn_init(); - for(auto g : ptr->g) { + for(auto g : view(ptr->g, n)) { EXPECT_EQ(g, 0.); } @@ -106,32 +109,35 @@ TEST(synapses, exp2syn_basic_state) auto ptr = dynamic_cast<synapse_type*>(mech.get()); + auto n = ptr->size(); + using view = synapse_type::view_type; + // parameters initialized to default values - for(auto e : ptr->e) { + for(auto e : view(ptr->e, n)) { EXPECT_EQ(e, 0.); } - for(auto tau1: ptr->tau1) { + for(auto tau1: view(ptr->tau1, n)) { EXPECT_EQ(tau1, 0.5); } - for(auto tau2: ptr->tau2) { + for(auto tau2: view(ptr->tau2, n)) { EXPECT_EQ(tau2, 2.0); } // should be initialized to NaN - for(auto factor: ptr->factor) { + for(auto factor: view(ptr->factor, n)) { EXPECT_NE(factor, factor); } // initialize state then check factor has sane (positive) value // and A and B are zero ptr->nrn_init(); - for(auto factor: ptr->factor) { + for(auto factor: view(ptr->factor, n)) { EXPECT_GT(factor, 0.); } - for(auto A: ptr->A) { + for(auto A: view(ptr->A, n)) { EXPECT_EQ(A, 0.); } - for(auto B: ptr->B) { + for(auto B: view(ptr->B, n)) { EXPECT_EQ(B, 0.); } @@ -142,3 +148,4 @@ TEST(synapses, exp2syn_basic_state) EXPECT_NEAR(ptr->A[1], ptr->factor[1]*3.14, 1e-6); EXPECT_NEAR(ptr->B[3], ptr->factor[3]*1.04, 1e-6); } + diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt index c6c72fa04efab7394190288ba3face603d141faf..ae892560492dccca331036ad1481d2f4ce5da7bb 100644 --- a/tests/validation/CMakeLists.txt +++ b/tests/validation/CMakeLists.txt @@ -19,19 +19,20 @@ set(TARGETS validate.exe) foreach(target ${TARGETS}) target_link_libraries(${target} LINK_PUBLIC cellalgo gtest) - + if(WITH_TBB) - target_link_libraries(${target} LINK_PUBLIC ${TBB_LIBRARIES}) + target_link_libraries(${target} LINK_PUBLIC ${TBB_LIBRARIES}) endif() if(WITH_MPI) - target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES}) - set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") + target_link_libraries(${target} LINK_PUBLIC ${MPI_C_LIBRARIES}) + set_property(TARGET ${target} APPEND_STRING PROPERTY LINK_FLAGS "${MPI_C_LINK_FLAGS}") endif() - set_target_properties(${target} - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" + set_target_properties( + ${target} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" ) endforeach()