From 712070f19f29242483c13be22ba1900bbb18adea Mon Sep 17 00:00:00 2001 From: Ben Cumming <bcumming@cscs.ch> Date: Wed, 5 Dec 2018 16:54:33 +0100 Subject: [PATCH] Refactor hardware detection to sup (#654) Refactoring that moves the logic for determining available concurrency and available GPUs from the core Arbor library to the sup library. This also constitutes work towards providing functionality for allocating GPUs to particular ranks when multiple GPUs are visible per rank. * Move core/thread estimation code to sup library. * Change default resource behaviour to use one thread and no GPU. * Provide an interface in the sup library for: acquiring a default GPU; for coordinating an allocation of GPUs across multiple MPI ranks. --- .ycm_extra_conf.py | 2 +- arbor/CMakeLists.txt | 4 - arbor/hardware/node_info.cpp | 53 ------------ arbor/hardware/node_info.hpp | 14 ---- arbor/local_alloc.cpp | 16 ---- arbor/threading/thread_info.cpp | 77 ------------------ arbor/threading/threading.cpp | 4 +- example/bench/bench.cpp | 22 +++-- example/brunel/brunel_miniapp.cpp | 24 ++++-- example/miniapp/miniapp.cpp | 22 +++-- example/ring/ring.cpp | 23 ++++-- include/arbor/context.hpp | 27 +------ sup/CMakeLists.txt | 20 ++++- {arbor/hardware => sup}/affinity.cpp | 12 +-- sup/concurrency.cpp | 81 +++++++++++++++++++ sup/default_gpu.cpp | 32 ++++++++ .../hardware => sup/include/sup}/affinity.hpp | 6 +- .../include/sup/concurrency.hpp | 17 ++-- sup/include/sup/gpu.hpp | 13 +++ sup/private_gpu.cpp | 14 ++++ 20 files changed, 243 insertions(+), 240 deletions(-) delete mode 100644 arbor/hardware/node_info.cpp delete mode 100644 arbor/hardware/node_info.hpp delete mode 100644 arbor/local_alloc.cpp delete mode 100644 arbor/threading/thread_info.cpp rename {arbor/hardware => sup}/affinity.cpp (83%) create mode 100644 sup/concurrency.cpp create mode 100644 sup/default_gpu.cpp rename {arbor/hardware => sup/include/sup}/affinity.hpp (87%) rename arbor/threading/thread_info.hpp => sup/include/sup/concurrency.hpp (62%) create mode 100644 sup/include/sup/gpu.hpp create mode 100644 sup/private_gpu.cpp diff --git a/.ycm_extra_conf.py b/.ycm_extra_conf.py index 0e73398a..7f04ca2e 100644 --- a/.ycm_extra_conf.py +++ b/.ycm_extra_conf.py @@ -55,7 +55,7 @@ flags = [ '-I', 'build/include', '-I', - 'aux/include', + 'sup/include', ] # Set this to the absolute path to the folder (NOT the file!) containing the diff --git a/arbor/CMakeLists.txt b/arbor/CMakeLists.txt index b8d199bc..33d410b1 100644 --- a/arbor/CMakeLists.txt +++ b/arbor/CMakeLists.txt @@ -13,13 +13,10 @@ set(arbor_sources common_types_io.cpp execution_context.cpp gpu_context.cpp - local_alloc.cpp event_binner.cpp fvm_layout.cpp fvm_lowered_cell_impl.cpp - hardware/affinity.cpp hardware/memory.cpp - hardware/node_info.cpp hardware/power.cpp io/locked_ostream.cpp io/serialize_hex.cpp @@ -43,7 +40,6 @@ set(arbor_sources spike_source_cell_group.cpp swcio.cpp threading/threading.cpp - threading/thread_info.cpp thread_private_spike_store.cpp tree.cpp util/hostname.cpp diff --git a/arbor/hardware/node_info.cpp b/arbor/hardware/node_info.cpp deleted file mode 100644 index 095c26b0..00000000 --- a/arbor/hardware/node_info.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include <thread> - -#ifdef ARB_HAVE_GPU -#include <cuda_runtime.h> -#endif - -// TODO: C++17 use __has_include(<unistd.h>) -#if defined(__unix__) || defined(__APPLE__) && defined(__MACH__) -#include <unistd.h> -#endif - -#include "affinity.hpp" -#include "node_info.hpp" - -namespace arb { -namespace hw { - - -unsigned node_gpus() { -#ifdef ARB_HAVE_GPU - int n; - if (cudaGetDeviceCount(&n)==cudaSuccess) { - return (unsigned)(n); - } -#endif - - return 0; -} - -unsigned node_processors() { - // Attempt to get count first from affinity information if available. - unsigned n = get_affinity().size(); - - // If no luck, try sysconf. -#ifdef _SC_NPROCESSORS_ONLN - if (!n) { - long r = sysconf(_SC_NPROCESSORS_ONLN); - if (r>0) { - n = (unsigned)r; - } - } -#endif - - // If still zero, try the hint from the library. - if (!n) { - n = std::thread::hardware_concurrency(); - } - - return n; -} - -} // namespace util -} // namespace arb diff --git a/arbor/hardware/node_info.hpp b/arbor/hardware/node_info.hpp deleted file mode 100644 index 0452bdd4..00000000 --- a/arbor/hardware/node_info.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -namespace arb { -namespace hw { - -// Number of GPUs detected on the node. -unsigned node_gpus(); - -// Number of visible logical processors on the node. -// 0 => unable to determine. -unsigned node_processors(); - -} // namespace hw -} // namespace arb diff --git a/arbor/local_alloc.cpp b/arbor/local_alloc.cpp deleted file mode 100644 index 3320e22e..00000000 --- a/arbor/local_alloc.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include <arbor/context.hpp> - -#include "hardware/node_info.hpp" -#include "threading/thread_info.hpp" -#include "threading/threading.hpp" - -namespace arb { - -local_resources get_local_resources() { - auto avail_threads = threading::num_threads_init(); - auto avail_gpus = arb::hw::node_gpus(); - - return local_resources(avail_threads, avail_gpus); -} - -} // namespace arb diff --git a/arbor/threading/thread_info.cpp b/arbor/threading/thread_info.cpp deleted file mode 100644 index dce15dbb..00000000 --- a/arbor/threading/thread_info.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include <cstdlib> -#include <exception> -#include <regex> -#include <string> - -#include <arbor/arbexcept.hpp> -#include <arbor/util/optional.hpp> -#include <hardware/node_info.hpp> - -#include "thread_info.hpp" -#include "util/strprintf.hpp" - -namespace arb { -namespace threading { - -// Test environment variables for user-specified count of threads. -// -// ARB_NUM_THREADS is used if set, otherwise OMP_NUM_THREADS is used. -// -// If neither variable is set, returns no value. -// -// Valid values for the environment variable are: -// 0 : Arbor is responsible for picking the number of threads. -// >0: The number of threads to use. -// -// Throws std::runtime_error: -// ARB_NUM_THREADS or OMP_NUM_THREADS is set with invalid value. -util::optional<size_t> get_env_num_threads() { - const char* str; - - // select variable to use: - // If ARB_NUM_THREADS_VAR is set, use $ARB_NUM_THREADS_VAR - // else if ARB_NUM_THREAD set, use it - // else if OMP_NUM_THREADS set, use it - if (auto nthreads_var_name = std::getenv("ARB_NUM_THREADS_VAR")) { - str = std::getenv(nthreads_var_name); - } - else if (! (str = std::getenv("ARB_NUM_THREADS"))) { - str = std::getenv("OMP_NUM_THREADS"); - } - - // If the selected var is unset set the number of threads to - // the hint given by the standard library - if (!str) { - return util::nullopt; - } - - errno = 0; - auto nthreads = std::strtoul(str, nullptr, 10); - - // check that the environment variable string describes a non-negative integer - if (errno==ERANGE || - !std::regex_match(str, std::regex("\\s*\\d*[0-9]\\d*\\s*"))) - { - throw arbor_exception(util::pprintf( - "requested number of threads \"{}\" is not a valid value", str)); - } - - return nthreads; -} - -std::size_t num_threads_init() { - std::size_t n = 0; - - if (auto env_threads = get_env_num_threads()) { - n = env_threads.value(); - } - - if (!n) { - n = hw::node_processors(); - } - - return n? n: 1; -} - -} // namespace threading -} // namespace arb diff --git a/arbor/threading/threading.cpp b/arbor/threading/threading.cpp index ec116fad..ae251d6e 100644 --- a/arbor/threading/threading.cpp +++ b/arbor/threading/threading.cpp @@ -1,7 +1,6 @@ #include <atomic> #include "threading.hpp" -#include "thread_info.hpp" using namespace arb::threading::impl; using namespace arb::threading; @@ -82,7 +81,8 @@ void task_system::try_run_task() { } } -task_system::task_system(): task_system(num_threads_init()) {} +// Default construct with one thread. +task_system::task_system(): task_system(1) {} task_system::task_system(int nthreads): count_(nthreads), q_(nthreads) { if (nthreads <= 0) diff --git a/example/bench/bench.cpp b/example/bench/bench.cpp index 1bae7acf..4ab37a5e 100644 --- a/example/bench/bench.cpp +++ b/example/bench/bench.cpp @@ -18,6 +18,8 @@ #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> #ifdef ARB_MPI_ENABLED @@ -33,16 +35,22 @@ int main(int argc, char** argv) { bool is_root = true; try { + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - int rank = 0; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - is_root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + is_root = arb::rank(context) == 0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif #ifdef ARB_PROFILE_ENABLED profile::profiler_initialize(context); diff --git a/example/brunel/brunel_miniapp.cpp b/example/brunel/brunel_miniapp.cpp index 3bb10041..b988c6df 100644 --- a/example/brunel/brunel_miniapp.cpp +++ b/example/brunel/brunel_miniapp.cpp @@ -18,6 +18,8 @@ #include <arbor/simulation.hpp> #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> #include <sup/path.hpp> @@ -186,15 +188,23 @@ int main(int argc, char** argv) { int rank = 0; try { + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + rank = arb::rank(context); + root = rank==0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif std::cout << sup::mask_stream(root); @@ -257,7 +267,7 @@ int main(int argc, char** argv) { spike_out = sup::open_or_throw(p, ios_base::out, !options.over_write); sim.set_local_spike_callback(sup::spike_emitter(spike_out)); } - else if (rank==0) { + else if (root) { spike_out = sup::open_or_throw(p, ios_base::out, !options.over_write); sim.set_global_spike_callback(sup::spike_emitter(spike_out)); } diff --git a/example/miniapp/miniapp.cpp b/example/miniapp/miniapp.cpp index 3e34dd1c..a85a408c 100644 --- a/example/miniapp/miniapp.cpp +++ b/example/miniapp/miniapp.cpp @@ -17,6 +17,8 @@ #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> #include <sup/path.hpp> @@ -46,16 +48,24 @@ int main(int argc, char** argv) { int rank = 0; try { + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + root = arb::rank(context) == 0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif + #ifdef ARB_PROFILE_ENABLED profile::profiler_initialize(context); #endif diff --git a/example/ring/ring.cpp b/example/ring/ring.cpp index f92950b9..a82abbc4 100644 --- a/example/ring/ring.cpp +++ b/example/ring/ring.cpp @@ -21,6 +21,8 @@ #include <arbor/recipe.hpp> #include <arbor/version.hpp> +#include <sup/concurrency.hpp> +#include <sup/gpu.hpp> #include <sup/ioutil.hpp> #include <sup/json_meter.hpp> @@ -154,21 +156,26 @@ struct cell_stats { } }; - int main(int argc, char** argv) { try { bool root = true; + arb::proc_allocation resources; + if (auto nt = sup::get_env_num_threads()) { + resources.num_threads = nt; + } + else { + resources.num_threads = sup::thread_concurrency(); + } + #ifdef ARB_MPI_ENABLED sup::with_mpi guard(argc, argv, false); - auto context = arb::make_context(arb::proc_allocation(), MPI_COMM_WORLD); - { - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - root = rank==0; - } + resources.gpu_id = sup::find_private_gpu(MPI_COMM_WORLD); + auto context = arb::make_context(resources, MPI_COMM_WORLD); + root = arb::rank(context) == 0; #else - auto context = arb::make_context(); + resources.gpu_id = sup::default_gpu(); + auto context = arb::make_context(resources); #endif #ifdef ARB_PROFILE_ENABLED diff --git a/include/arbor/context.hpp b/include/arbor/context.hpp index bfaa43e4..d28b8659 100644 --- a/include/arbor/context.hpp +++ b/include/arbor/context.hpp @@ -4,17 +4,6 @@ namespace arb { -/// Summary of all available local computation resource. -struct local_resources { - const unsigned num_threads; - const unsigned num_gpus; - - local_resources(unsigned threads, unsigned gpus): - num_threads(threads), - num_gpus(gpus) - {} -}; - /// Requested dry-run parameters struct dry_run_info { unsigned num_ranks; @@ -24,9 +13,6 @@ struct dry_run_info { num_cells_per_rank(cells_per_rank) {} }; -/// Determine available local domain resources. -local_resources get_local_resources(); - /// A subset of local computation resources to use in a computation. struct proc_allocation { unsigned num_threads; @@ -37,17 +23,8 @@ struct proc_allocation { // see CUDA documenation for cudaSetDevice and cudaDeviceGetAttribute int gpu_id; - // By default a proc_allocation will take all available threads and the - // GPU with id 0, if available. - proc_allocation() { - auto avail = get_local_resources(); - - // By default take all available threads. - num_threads = avail.num_threads; - - // Take the first GPU, if available. - gpu_id = avail.num_gpus>0? 0: -1; - } + // By default use one thread and no GPU. + proc_allocation(): proc_allocation(1, -1) {} proc_allocation(unsigned threads, int gpu): num_threads(threads), diff --git a/sup/CMakeLists.txt b/sup/CMakeLists.txt index cdd4ecb0..83887be6 100644 --- a/sup/CMakeLists.txt +++ b/sup/CMakeLists.txt @@ -1,14 +1,32 @@ set(sup-sources - + affinity.cpp + concurrency.cpp glob.cpp + default_gpu.cpp ioutil.cpp json_meter.cpp path.cpp spike_emitter.cpp ) +if(ARB_WITH_MPI) + list(APPEND sup-sources + private_gpu.cpp) +endif() + add_library(arbor-sup ${sup-sources}) + target_compile_options(arbor-sup PRIVATE ${ARB_CXXOPT_ARCH}) target_link_libraries(arbor-sup PUBLIC ext-json arbor) target_include_directories(arbor-sup PUBLIC include) + +if(ARB_WITH_MPI) + target_compile_definitions(arbor-sup PRIVATE ARB_HAVE_MPI) +endif() +if(ARB_WITH_GPU) + target_include_directories(arbor-sup PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_compile_definitions(arbor-sup PRIVATE ARB_HAVE_GPU) +endif() + set_target_properties(arbor-sup PROPERTIES OUTPUT_NAME arborsup) + diff --git a/arbor/hardware/affinity.cpp b/sup/affinity.cpp similarity index 83% rename from arbor/hardware/affinity.cpp rename to sup/affinity.cpp index 73e2762f..7454b30d 100644 --- a/arbor/hardware/affinity.cpp +++ b/sup/affinity.cpp @@ -12,8 +12,7 @@ extern "C" { #include <sched.h> } -namespace arb { -namespace hw { +namespace sup { std::vector<int> get_affinity() { std::vector<int> cores; @@ -33,20 +32,17 @@ std::vector<int> get_affinity() { return cores; } -} // namespace hw -} // namespace arb +} // namespace sup #else // def __linux__ // No support for non-linux systems. -namespace arb { -namespace hw { +namespace sup { std::vector<int> get_affinity() { return {}; } -} // namespace hw -} // namespace arb +} // namespace sup #endif // def __linux__ diff --git a/sup/concurrency.cpp b/sup/concurrency.cpp new file mode 100644 index 00000000..6635ae37 --- /dev/null +++ b/sup/concurrency.cpp @@ -0,0 +1,81 @@ +#include <cstdlib> +#include <regex> +#include <string> +#include <thread> + +#include <arbor/arbexcept.hpp> + +#include <sup/affinity.hpp> +#include <sup/concurrency.hpp> + +// TODO: C++17 use __has_include(<unistd.h>) +#if defined(__unix__) || defined(__APPLE__) && defined(__MACH__) +#include <unistd.h> +#endif + +namespace sup { + +// Test environment variables for user-specified count of threads. +unsigned get_env_num_threads() { + const char* str; + + // select variable to use: + // If ARB_NUM_THREADS_VAR is set, use $ARB_NUM_THREADS_VAR + // else if ARB_NUM_THREAD set, use it + // else if OMP_NUM_THREADS set, use it + if (auto nthreads_var_name = std::getenv("ARB_NUM_THREADS_VAR")) { + str = std::getenv(nthreads_var_name); + } + else if (! (str = std::getenv("ARB_NUM_THREADS"))) { + str = std::getenv("OMP_NUM_THREADS"); + } + + // No environment variable set, so return 0. + if (!str) { + return 0; + } + + errno = 0; + auto nthreads = std::strtoul(str, nullptr, 10); + + // check that the environment variable string describes a non-negative integer + if (errno==ERANGE || + !std::regex_match(str, std::regex("\\s*\\d*[0-9]\\d*\\s*"))) + { + errno = 0; + throw arb::arbor_exception( + std::string("Requested number of threads \"") + str + "\" is not a valid value"); + } + errno = 0; + + return nthreads; +} + +// Take a best guess at the number of threads that can be run concurrently. +// Will return at least 1. +unsigned thread_concurrency() { + // Attempt to get count first from affinity information if available. + unsigned n = get_affinity().size(); + + // If no luck, try sysconf. +#ifdef _SC_NPROCESSORS_ONLN + if (!n) { + long r = sysconf(_SC_NPROCESSORS_ONLN); + if (r>0) { + n = (unsigned)r; + } + } +#endif + + // If still zero, try the hint from the library. + if (!n) { + n = std::thread::hardware_concurrency(); + } + + // If still zero, use one thread. + n = n? n: 1; + + return n; +} + +} // namespace sup diff --git a/sup/default_gpu.cpp b/sup/default_gpu.cpp new file mode 100644 index 00000000..80f3f4b9 --- /dev/null +++ b/sup/default_gpu.cpp @@ -0,0 +1,32 @@ +#ifdef ARB_HAVE_GPU + +#include <cuda_runtime.h> + +namespace sup { + +// When arbor does not have CUDA support, return -1, which always +// indicates that no GPU is available. +int default_gpu() { + int n; + if (cudaGetDeviceCount(&n)==cudaSuccess) { + // if 1 or more GPUs, take the first one. + // else return -1 -> no gpu. + return n? 0: -1; + } + return -1; +} + +} // namespace sup + +#else // ifdef ARB_HAVE_GPU + +namespace sup { + +int default_gpu() { + return -1; +} + +} // namespace sup + +#endif // ifdef ARB_HAVE_GPU + diff --git a/arbor/hardware/affinity.hpp b/sup/include/sup/affinity.hpp similarity index 87% rename from arbor/hardware/affinity.hpp rename to sup/include/sup/affinity.hpp index db6c8f6b..49707c86 100644 --- a/arbor/hardware/affinity.hpp +++ b/sup/include/sup/affinity.hpp @@ -3,8 +3,7 @@ #include <cstdint> #include <vector> -namespace arb { -namespace hw { +namespace sup { // The list of logical processors for which the calling thread has affinity. // If calling from the main thread at application start up, before @@ -17,5 +16,4 @@ namespace hw { // available cores. std::vector<int> get_affinity(); -} // namespace util -} // namespace arb +} // namespace sup diff --git a/arbor/threading/thread_info.hpp b/sup/include/sup/concurrency.hpp similarity index 62% rename from arbor/threading/thread_info.hpp rename to sup/include/sup/concurrency.hpp index 42195fe7..c7a082f3 100644 --- a/arbor/threading/thread_info.hpp +++ b/sup/include/sup/concurrency.hpp @@ -2,25 +2,28 @@ #include <arbor/util/optional.hpp> -namespace arb { -namespace threading { +namespace sup { // Test environment variables for user-specified count of threads. // Potential environment variables are tested in this order: // 1. use the environment variable specified by ARB_NUM_THREADS_VAR // 2. use ARB_NUM_THREADS // 3. use OMP_NUM_THREADS -// 4. If no variable is set, returns no value. // // Valid values for the environment variable are: // 0 : Arbor is responsible for picking the number of threads. // >0 : The number of threads to use. // +// Returns: +// >0 : the number of threads set by environment variable. +// 0 : value is not set in environment variable. +// // Throws std::runtime_error: // Environment variable is set with invalid value. -util::optional<size_t> get_env_num_threads(); +unsigned get_env_num_threads(); -size_t num_threads_init(); +// Take a best guess at the number of threads that can be run concurrently. +// Will return at least 1. +unsigned thread_concurrency(); -} // namespace threading -} // namespace arb +} // namespace sup diff --git a/sup/include/sup/gpu.hpp b/sup/include/sup/gpu.hpp new file mode 100644 index 00000000..21b95a89 --- /dev/null +++ b/sup/include/sup/gpu.hpp @@ -0,0 +1,13 @@ +#pragma once + +#include <arbor/version.hpp> + +namespace sup { + +int default_gpu(); + +template <typename Comm> +int find_private_gpu(Comm comm); + +} // namespace sup + diff --git a/sup/private_gpu.cpp b/sup/private_gpu.cpp new file mode 100644 index 00000000..434d1d61 --- /dev/null +++ b/sup/private_gpu.cpp @@ -0,0 +1,14 @@ +#include <mpi.h> + +#include <sup/gpu.hpp> + +namespace sup { + +// Currently a placeholder. +// Take the default gpu for serial simulations. +template <> +int find_private_gpu(MPI_Comm comm) { + return default_gpu(); +} + +} // namespace sup -- GitLab