diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py index 0e73398aeeb3ef5da3dc86ccda9a67063a7cf16a..7f04ca2ea73382b5ad271071dc06bfb25a3f580f 100644 --- a/.ycm_extra_conf.py +++ b/.ycm_extra_conf.py @@ -55,7 +55,7 @@ flags = [ '-I', 'build/include', '-I', - 'aux/include', + 'sup/include', ] # Set this to the absolute path to the folder (NOT the file!) containing the diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt index b8d199bc720df0d2362d74b9811900b28f1acc5c..33d410b1208bee626f8d166b77975845cadd79b0 100644 --- a/arbor/CMakeLists.txt +++ b/arbor/CMakeLists.txt @@ -13,13 +13,10 @@ set(arbor_sources common_types_io.cpp execution_context.cpp gpu_context.cpp - local_alloc.cpp event_binner.cpp fvm_layout.cpp fvm_lowered_cell_impl.cpp - hardware/affinity.cpp hardware/memory.cpp - hardware/node_info.cpp hardware/power.cpp io/locked_ostream.cpp io/serialize_hex.cpp @@ -43,7 +40,6 @@ set(arbor_sources spike_source_cell_group.cpp swcio.cpp threading/threading.cpp - threading/thread_info.cpp thread_private_spike_store.cpp tree.cpp util/hostname.cpp diff --git a/arbor/hardware/node_info.cpp b/arbor/hardware/node_info.cpp deleted file mode 100644 index 095c26b01fc8c30e52450570951366508cdf1a28..0000000000000000000000000000000000000000 --- a/arbor/hardware/node_info.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include <thread> - -#ifdef ARB_HAVE_GPU -#include <cuda_runtime.h> -#endif - -// TODO: C++17 use __has_include(<unistd.h>) -#if defined(__unix__) || defined(__APPLE__) && defined(__MACH__) -#include <unistd.h> -#endif - -#include "affinity.hpp" -#include "node_info.hpp" - -namespace arb { -namespace hw { - - -unsigned node_gpus() { -#ifdef ARB_HAVE_GPU - int n; - if (cudaGetDeviceCount(&n)==cudaSuccess) { - return (unsigned)(n); - } -#endif - - return 0; -} - -unsigned node_processors() { - // Attempt to get count first from affinity information if available. - unsigned n = get_affinity().size(); - - // If no luck, try sysconf. -#ifdef _SC_NPROCESSORS_ONLN - if (!n) { - long r = sysconf(_SC_NPROCESSORS_ONLN); - if (r>0) { - n = (unsigned)r; - } - } -#endif - - // If still zero, try the hint from the library. - if (!n) { - n = std::thread::hardware_concurrency(); - } - - return n; -} - -} // namespace util -} // namespace arb diff --git a/arbor/hardware/node_info.hpp b/arbor/hardware/node_info.hpp deleted file mode 100644 index 0452bdd4f62d0b23b08ecb58781c2c45bb69ccf1..0000000000000000000000000000000000000000 --- a/arbor/hardware/node_info.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -namespace arb { -namespace hw { - -// Number of GPUs detected on the node. -unsigned node_gpus(); - -// Number of visible logical processors on the node. -// 0 => unable to determine. -unsigned node_processors(); - -} // namespace hw -} // namespace arb diff --git a/arbor/local_alloc.cpp b/arbor/local_alloc.cpp deleted file mode 100644 index 3320e22e5b53586d8e90adf56d95580821397e3e..0000000000000000000000000000000000000000 --- a/arbor/local_alloc.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include <arbor/context.hpp> - -#include "hardware/node_info.hpp" -#include "threading/thread_info.hpp" -#include "threading/threading.hpp" - -namespace arb { - -local_resources get_local_resources() { - auto avail_threads = threading::num_threads_init(); - auto avail_gpus = arb::hw::node_gpus(); - - return local_resources(avail_threads, avail_gpus); -} - -} // namespace arb diff --git a/arbor/threading/thread_info.cpp b/arbor/threading/thread_info.cpp deleted file mode 100644 index dce15dbb6eb819ef13cb9547d4f28d455c7c1169..0000000000000000000000000000000000000000 --- a/arbor/threading/thread_info.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include <cstdlib> -#include <exception> -#include <regex> -#include <string> - -#include <arbor/arbexcept.hpp> -#include <arbor/util/optional.hpp> -#include <hardware/node_info.hpp> - -#include "thread_info.hpp" -#include "util/strprintf.hpp" - -namespace arb { -namespace threading { - -// Test environment variables for user-specified count of threads. -// -// ARB_NUM_THREADS is used if set, otherwise OMP_NUM_THREADS is used. -// -// If neither variable is set, returns no value. -// -// Valid values for the environment variable are: -// 0 : Arbor is responsible for picking the number of threads. -// >0: The number of threads to use. -// -// Throws std::runtime_error: -// ARB_NUM_THREADS or OMP_NUM_THREADS is set with invalid value. -util::optional<size_t> get_env_num_threads() { - const char* str; - - // select variable to use: - // If ARB_NUM_THREADS_VAR is set, use $ARB_NUM_THREADS_VAR - // else if ARB_NUM_THREAD set, use it - // else if OMP_NUM_THREADS set, use it - if (auto nthreads_var_name = std::getenv("ARB_NUM_THREADS_VAR")) { - str = std::getenv(nthreads_var_name); - } - else if (! (str = std::getenv("ARB_NUM_THREADS"))) { - str = std::getenv("OMP_NUM_THREADS"); - } - - // If the selected var is unset set the number of threads to - // the hint given by the standard library - if (!str) { - return util::nullopt; - } - - errno = 0; - auto nthreads = std::strtoul(str, nullptr, 10); - - // check that the environment variable string describes a non-negative integer - if (errno==ERANGE || - !std::regex_match(str, std::regex("\\s*\\d*[0-9]\\d*\\s*"))) - { - throw arbor_exception(util::pprintf( - "requested number of threads \"{}\" is not a valid value", str)); - } - - return nthreads; -} - -std::size_t num_threads_init() { - std::size_t n = 0; - - if (auto env_threads = get_env_num_threads()) { - n = env_threads.value(); - } - - if (!n) { - n = hw::node_processors(); - } - - return n? n: 1; -} - -} // namespace threading -} // namespace arb diff --git a/arbor/threading/threading.cpp b/arbor/threading/threading.cpp index ec116fad338132006f40cddec28cad884530084b..ae251d6e89e5d38c25f7176b1f7ed62a3efc6cc6 100644 --- a/arbor/threading/threading.cpp +++ b/arbor/threading/threading.cpp @@ -1,7 +1,6 @@ #include <atomic> #include "threading.hpp" -#include "thread_info.hpp" using namespace arb::threading::impl; using namespace arb::threading; @@ -82,7 +81,8 @@ void task_system::try_run_task() { } } -task_system::task_system(): task_system(num_threads_init()) {} +// Default construct with one thread. +task_system::task_system(): task_system(1) {} task_system::task_system(int nthreads): count_(nthreads), q_(nthreads) { if (nthreads <= 0) diff --git a/example/bench/bench.cpp b/example/bench/bench.cpp index 1bae7acf7a7817cc5680791a1d8e5f50396147e0..4ab37a5e8fbfcaf684748763779aba1da097a6ab 100644 --- a/example/bench/bench.cpp +++ b/example/bench/bench.cpp @@ -18,6 +18,8 @@ #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> #ifdef ARB_MPI_ENABLED @@ -33,16 +35,22 @@ int main(int argc, char** argv) { bool is_root = true; try { + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - int rank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - is_root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + is_root = arb::rank(context) == 0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif #ifdef ARB_PROFILE_ENABLED profile::profiler_initialize(context); diff --git a/example/brunel/brunel_miniapp.cpp b/example/brunel/brunel_miniapp.cpp index 3bb100415c8c767319ade96f54c7701636cb5215..b988c6dfff00e4c9ff5ae257dff657a97efd50de 100644 --- a/example/brunel/brunel_miniapp.cpp +++ b/example/brunel/brunel_miniapp.cpp @@ -18,6 +18,8 @@ #include <arbor/simulation.hpp> #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> #include <sup/path.hpp> @@ -186,15 +188,23 @@ int main(int argc, char** argv) { int rank = 0; try { + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + rank = arb::rank(context); + root = rank==0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif std::cout << sup::mask_stream(root); @@ -257,7 +267,7 @@ int main(int argc, char** argv) { spike_out = sup::open_or_throw(p, ios_base::out, !options.over_write); sim.set_local_spike_callback(sup::spike_emitter(spike_out)); } - else if (rank==0) { + else if (root) { spike_out = sup::open_or_throw(p, ios_base::out, !options.over_write); sim.set_global_spike_callback(sup::spike_emitter(spike_out)); } diff --git a/example/miniapp/miniapp.cpp b/example/miniapp/miniapp.cpp index 3e34dd1cc6b4e97bd66bad7b561a6c16c919e000..a85a408ccc21a46979af6b233b95c694f74ed735 100644 --- a/example/miniapp/miniapp.cpp +++ b/example/miniapp/miniapp.cpp @@ -17,6 +17,8 @@ #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> #include <sup/path.hpp> @@ -46,16 +48,24 @@ int main(int argc, char** argv) { int rank = 0; try { + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + root = arb::rank(context) == 0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif + #ifdef ARB_PROFILE_ENABLED profile::profiler_initialize(context); #endif diff --git a/example/ring/ring.cpp b/example/ring/ring.cpp index f92950b970cfc872ae51490e69f82df60cdb03a7..a82abbc4298355a912b92e2b4990206b7c9d82bc 100644 --- a/example/ring/ring.cpp +++ b/example/ring/ring.cpp @@ -21,6 +21,8 @@ #include <arbor/recipe.hpp> #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> @@ -154,21 +156,26 @@ struct cell_stats { } }; - int main(int argc, char** argv) { try { bool root = true; + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + root = arb::rank(context) == 0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif #ifdef ARB_PROFILE_ENABLED diff --git a/include/arbor/context.hpp b/include/arbor/context.hpp index bfaa43e4c6c469d06c12e4ad448bce7a9dc4058c..d28b86594b42e0461736eef5fb4de09c2f994d31 100644 --- a/include/arbor/context.hpp +++ b/include/arbor/context.hpp @@ -4,17 +4,6 @@ namespace arb { -/// Summary of all available local computation resource. -struct local_resources { - const unsigned num_threads; - const unsigned num_gpus; - - local_resources(unsigned threads, unsigned gpus): - num_threads(threads), - num_gpus(gpus) - {} -}; - /// Requested dry-run parameters struct dry_run_info { unsigned num_ranks; @@ -24,9 +13,6 @@ struct dry_run_info { num_cells_per_rank(cells_per_rank) {} }; -/// Determine available local domain resources. -local_resources get_local_resources(); - /// A subset of local computation resources to use in a computation. struct proc_allocation { unsigned num_threads; @@ -37,17 +23,8 @@ struct proc_allocation { // see CUDA documenation for cudaSetDevice and cudaDeviceGetAttribute int gpu_id; - // By default a proc_allocation will take all available threads and the - // GPU with id 0, if available. - proc_allocation() { - auto avail = get_local_resources(); - - // By default take all available threads. - num_threads = avail.num_threads; - - // Take the first GPU, if available. - gpu_id = avail.num_gpus>0? 0: -1; - } + // By default use one thread and no GPU. + proc_allocation(): proc_allocation(1, -1) {} proc_allocation(unsigned threads, int gpu): num_threads(threads), diff --git a/sup/CMakeLists.txt b/sup/CMakeLists.txt index cdd4ecb0560794a11672e49f039f2bdf170239f5..83887be6d43d17cf6635fc36e844a381e3fe8e0d 100644 --- a/sup/CMakeLists.txt +++ b/sup/CMakeLists.txt @@ -1,14 +1,32 @@ set(sup-sources - + affinity.cpp + concurrency.cpp glob.cpp + default_gpu.cpp ioutil.cpp json_meter.cpp path.cpp spike_emitter.cpp ) +if(ARB_WITH_MPI) + list(APPEND sup-sources + private_gpu.cpp) +endif() + add_library(arbor-sup ${sup-sources}) + target_compile_options(arbor-sup PRIVATE ${ARB_CXXOPT_ARCH}) target_link_libraries(arbor-sup PUBLIC ext-json arbor) target_include_directories(arbor-sup PUBLIC include) + +if(ARB_WITH_MPI) + target_compile_definitions(arbor-sup PRIVATE ARB_HAVE_MPI) +endif() +if(ARB_WITH_GPU) + target_include_directories(arbor-sup PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_compile_definitions(arbor-sup PRIVATE ARB_HAVE_GPU) +endif() + set_target_properties(arbor-sup PROPERTIES OUTPUT_NAME arborsup) + diff --git a/arbor/hardware/affinity.cpp b/sup/affinity.cpp similarity index 83% rename from arbor/hardware/affinity.cpp rename to sup/affinity.cpp index 73e2762f856e4cc4481b307cec475cd876b67e19..7454b30d9c3f03bdcf3105d013377918c62291ac 100644 --- a/arbor/hardware/affinity.cpp +++ b/sup/affinity.cpp @@ -12,8 +12,7 @@ extern "C" { #include <sched.h> } -namespace arb { -namespace hw { +namespace sup { std::vector<int> get_affinity() { std::vector<int> cores; @@ -33,20 +32,17 @@ std::vector<int> get_affinity() { return cores; } -} // namespace hw -} // namespace arb +} // namespace sup #else // def __linux__ // No support for non-linux systems. -namespace arb { -namespace hw { +namespace sup { std::vector<int> get_affinity() { return {}; } -} // namespace hw -} // namespace arb +} // namespace sup #endif // def __linux__ diff --git a/sup/concurrency.cpp b/sup/concurrency.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6635ae376923c53aecb18452d605c3daa1a5743f --- /dev/null +++ b/sup/concurrency.cpp @@ -0,0 +1,81 @@ +#include <cstdlib> +#include <regex> +#include <string> +#include <thread> + +#include <arbor/arbexcept.hpp> + +#include <sup/affinity.hpp> +#include <sup/concurrency.hpp> + +// TODO: C++17 use __has_include(<unistd.h>) +#if defined(__unix__) || defined(__APPLE__) && defined(__MACH__) +#include <unistd.h> +#endif + +namespace sup { + +// Test environment variables for user-specified count of threads. +unsigned get_env_num_threads() { + const char* str; + + // select variable to use: + // If ARB_NUM_THREADS_VAR is set, use $ARB_NUM_THREADS_VAR + // else if ARB_NUM_THREAD set, use it + // else if OMP_NUM_THREADS set, use it + if (auto nthreads_var_name = std::getenv("ARB_NUM_THREADS_VAR")) { + str = std::getenv(nthreads_var_name); + } + else if (! (str = std::getenv("ARB_NUM_THREADS"))) { + str = std::getenv("OMP_NUM_THREADS"); + } + + // No environment variable set, so return 0. + if (!str) { + return 0; + } + + errno = 0; + auto nthreads = std::strtoul(str, nullptr, 10); + + // check that the environment variable string describes a non-negative integer + if (errno==ERANGE || + !std::regex_match(str, std::regex("\\s*\\d*[0-9]\\d*\\s*"))) + { + errno = 0; + throw arb::arbor_exception( + std::string("Requested number of threads \"") + str + "\" is not a valid value"); + } + errno = 0; + + return nthreads; +} + +// Take a best guess at the number of threads that can be run concurrently. +// Will return at least 1. +unsigned thread_concurrency() { + // Attempt to get count first from affinity information if available. + unsigned n = get_affinity().size(); + + // If no luck, try sysconf. +#ifdef _SC_NPROCESSORS_ONLN + if (!n) { + long r = sysconf(_SC_NPROCESSORS_ONLN); + if (r>0) { + n = (unsigned)r; + } + } +#endif + + // If still zero, try the hint from the library. + if (!n) { + n = std::thread::hardware_concurrency(); + } + + // If still zero, use one thread. + n = n? n: 1; + + return n; +} + +} // namespace sup diff --git a/sup/default_gpu.cpp b/sup/default_gpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80f3f4b9653e93aa50f662d9e6a66978ac5e45d5 --- /dev/null +++ b/sup/default_gpu.cpp @@ -0,0 +1,32 @@ +#ifdef ARB_HAVE_GPU + +#include <cuda_runtime.h> + +namespace sup { + +// When arbor does not have CUDA support, return -1, which always +// indicates that no GPU is available. +int default_gpu() { + int n; + if (cudaGetDeviceCount(&n)==cudaSuccess) { + // if 1 or more GPUs, take the first one. + // else return -1 -> no gpu. + return n? 0: -1; + } + return -1; +} + +} // namespace sup + +#else // ifdef ARB_HAVE_GPU + +namespace sup { + +int default_gpu() { + return -1; +} + +} // namespace sup + +#endif // ifdef ARB_HAVE_GPU + diff --git a/arbor/hardware/affinity.hpp b/sup/include/sup/affinity.hpp similarity index 87% rename from arbor/hardware/affinity.hpp rename to sup/include/sup/affinity.hpp index db6c8f6bab6daacb3943ce3eb0a7ac97d884d16f..49707c86e5401090f1a1ffc0bf1979d85d1e97f6 100644 --- a/arbor/hardware/affinity.hpp +++ b/sup/include/sup/affinity.hpp @@ -3,8 +3,7 @@ #include <cstdint> #include <vector> -namespace arb { -namespace hw { +namespace sup { // The list of logical processors for which the calling thread has affinity. // If calling from the main thread at application start up, before @@ -17,5 +16,4 @@ namespace hw { // available cores. std::vector<int> get_affinity(); -} // namespace util -} // namespace arb +} // namespace sup diff --git a/arbor/threading/thread_info.hpp b/sup/include/sup/concurrency.hpp similarity index 62% rename from arbor/threading/thread_info.hpp rename to sup/include/sup/concurrency.hpp index 42195fe7f4716fc1e8a6b71b54198f738112d3a6..c7a082f38fc58e84032096141f045399078bb04c 100644 --- a/arbor/threading/thread_info.hpp +++ b/sup/include/sup/concurrency.hpp @@ -2,25 +2,28 @@ #include <arbor/util/optional.hpp> -namespace arb { -namespace threading { +namespace sup { // Test environment variables for user-specified count of threads. // Potential environment variables are tested in this order: // 1. use the environment variable specified by ARB_NUM_THREADS_VAR // 2. use ARB_NUM_THREADS // 3. use OMP_NUM_THREADS -// 4. If no variable is set, returns no value. // // Valid values for the environment variable are: // 0 : Arbor is responsible for picking the number of threads. // >0 : The number of threads to use. // +// Returns: +// >0 : the number of threads set by environment variable. +// 0 : value is not set in environment variable. +// // Throws std::runtime_error: // Environment variable is set with invalid value. -util::optional<size_t> get_env_num_threads(); +unsigned get_env_num_threads(); -size_t num_threads_init(); +// Take a best guess at the number of threads that can be run concurrently. +// Will return at least 1. +unsigned thread_concurrency(); -} // namespace threading -} // namespace arb +} // namespace sup diff --git a/sup/include/sup/gpu.hpp b/sup/include/sup/gpu.hpp new file mode 100644 index 0000000000000000000000000000000000000000..21b95a897afa6e37afd289a22d20f230428abf37 --- /dev/null +++ b/sup/include/sup/gpu.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <arbor/version.hpp> + +namespace sup { + +int default_gpu(); + +template <typename Comm> +int find_private_gpu(Comm comm); + +} // namespace sup + diff --git a/sup/private_gpu.cpp b/sup/private_gpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..434d1d610ec4412a129ca00c287d8094a5484307 --- /dev/null +++ b/sup/private_gpu.cpp @@ -0,0 +1,14 @@ +#include <mpi.h> + +#include <sup/gpu.hpp> + +namespace sup { + +// Currently a placeholder. +// Take the default gpu for serial simulations. +template <> +int find_private_gpu(MPI_Comm comm) { + return default_gpu(); +} + +} // namespace sup