Skip to content
Snippets Groups Projects
  • Sam Yates's avatar
    Replace arb::util::optional with std::optional. (#1158) · d5ace349
    Sam Yates authored
    * Substitute `std::optional<T>` for `arb::util::optional<T>` for non-reference types T.
    * Remove reference-deducing `util::value_by_key`; replace usages with new function `util::ptr_by_key`.
    * Add some missing header includes that were required but included only transitively.
    * Remove `operator<<` overload for optional in python/strprintf.hpp; replace with utility wrapper class that catches `std::optional<T>`.
    * Wrap some `std::optional` values with `to_string` in ostream output in python lib.
    
    Fixes #1154.
    Unverified
    d5ace349
gpu_uuid.cpp 5.73 KiB
#include <algorithm>
#include <array>
#include <cstring>
#include <functional>
#include <iomanip>
#include <ios>
#include <numeric>
#include <optional>
#include <ostream>
#include <stdexcept>
#include <vector>

#include <arbor/util/scope_exit.hpp>
#include "gpu_uuid.hpp"
#include "gpu_api.hpp"


#ifdef __linux__
extern "C" {
    #include <unistd.h>
}

std::optional<std::string> get_hostname() {
    // Hostnames can be up to 256 characters in length, however on many systems
    // it is limitted to 64.
    char name[256];
    auto result = gethostname(name, sizeof(name));
    if (result) {
        return std::nullopt;
    }
    return std::string(name);
}
#else
std::optional<std::string> get_hostname() {
    return std::nullopt;
}
#endif


using arb::util::on_scope_exit;

namespace arbenv {

// Test GPU uids for equality
bool operator==(const uuid& lhs, const uuid& rhs) {
    for (auto i=0u; i<lhs.bytes.size(); ++i) {
        if (lhs.bytes[i]!=rhs.bytes[i]) return false;
    }
    return true;
}

// Strict lexographical ordering of GPU uids
bool operator<(const uuid& lhs, const uuid& rhs) {
    for (auto i=0u; i<lhs.bytes.size(); ++i) {
        if (lhs.bytes[i]<rhs.bytes[i]) return true;
        if (lhs.bytes[i]>lhs.bytes[i]) return false;
    }
    return false;
}

std::ostream& operator<<(std::ostream& o, const uuid& id) {
    std::ios old_state(nullptr);
    old_state.copyfmt(o);
    o << std::hex << std::setfill('0');

    bool first = true;
    int ranges[6] = {0, 4, 6, 8, 10, 16};
    for (int i=0; i<5; ++i) {
        if (!first) o << "-";
        for (auto j=ranges[i]; j<ranges[i+1]; ++j) {
            o << std::setw(2) << (int)id.bytes[j];
        }
        first = false;
    }
    o.copyfmt(old_state);
    return o;
}

std::runtime_error make_runtime_error(api_error_type error_code) {
    return std::runtime_error(
        std::string("gpu runtime error ")
        + error_code.name() + ": " + error_code.description());
}

// For CUDA 10 and later the uuid of all available GPUs is straightforward
// to obtain by querying cudaGetDeviceProperties for each visible device.
std::vector<uuid> get_gpu_uuids() {
    // Get number of devices.
    int ngpus = 0;
    auto status = get_device_count(&ngpus);
    if (status.no_device_found()) {
        // No GPUs detected: return an empty list.
        return {};
    }
    else if (!status) {
        throw make_runtime_error(status);
    }

    // Storage for the uuids.
    std::vector<uuid> uuids(ngpus);

    // For each GPU query CUDA runtime API for uuid.
    for (int i=0; i<ngpus; ++i) {
        DeviceProp props;
        status = get_device_properties(&props, i);
        if (!status) {
            throw make_runtime_error(status);
        }

#ifdef ARB_HIP
        // Build a unique string for the device and hash it, then
        // copy the bytes of the has to uuids[i].
        auto host = get_hostname();
        if (!host) throw std::runtime_error("Can't uniquely identify GPUs on the system");
        auto uid = std::hash<std::string>{} (*host + '-' + std::to_string(props.pciBusID) + '-' + std::to_string(props.pciDeviceID));
        auto b = reinterpret_cast<const unsigned char*>(&uid);
        std::copy(b, b+sizeof(std::size_t), uuids[i].bytes.begin());
#else
        // Copy the bytes from props.uuid to uuids[i].
        auto b = reinterpret_cast<const unsigned char*>(&props.uuid);
        std::copy(b, b+sizeof(uuid), uuids[i].bytes.begin());
#endif
    }

    return uuids;
}

// Compare two sets of uuids
//   1: both sets are identical
//  -1: some common elements
//   0: no common elements
// Each set is described by a pair of iterators.
template <typename I>
int compare_gpu_groups(std::pair<I,I> l, std::pair<I,I> r) {
    auto range_size = [] (auto& rng) { return std::distance(rng.first, rng.second);};
    if (range_size(l)<range_size(r)) {
        std::swap(l, r);
    }

    unsigned count = 0;
    for (auto it=l.first; it!=l.second; ++it) {
        if (std::find(r.first, r.second, *it)!=r.second) ++count;
    }

    // test for complete match
    if (count==range_size(l) && count==range_size(r)) return 1;
    // test for partial match
    if (count) return -1;
    return 0;
}

gpu_rank assign_gpu(const std::vector<uuid>& uids,
                    const std::vector<int>&  uid_part,
                    int rank)
{
    // Determine the number of ranks in MPI communicator
    auto nranks = uid_part.size()-1;

    // Helper that generates the range of gpu id for rank i
    auto make_group = [&] (int i) {
        return std::make_pair(uids.begin()+uid_part[i], uids.begin()+uid_part[i+1]);
    };

    // The list of ranks that share the same GPUs as this rank (including this rank).
    std::vector<int> neighbors;

    // The gpu uid range for this rank
    auto local_gpus = make_group(rank);

    // Find all ranks with the same set of GPUs as this rank.
    for (std::size_t i=0; i<nranks; ++i) {
        auto other_gpus = make_group(i);
        auto match = compare_gpu_groups(local_gpus, other_gpus);
        if (match==1) { // found a match
            neighbors.push_back(i);
        }
        else if (match==-1) { // partial match, which is not permitted
            return {};
        }
        // case where match==0 can be ignored.
    }

    // Determine the position of this rank in the sorted list of ranks.
    int pos_in_group =
        std::distance(
            neighbors.begin(),
            std::find(neighbors.begin(), neighbors.end(), rank));

    // The number of GPUs available to the ranks.
    int ngpu_in_group = std::distance(local_gpus.first, local_gpus.second);

    // Assign GPUs to the first ngpu ranks. If there are more ranks than GPUs,
    // some ranks will not be assigned a GPU (return -1).
    return pos_in_group<ngpu_in_group? gpu_rank(pos_in_group): gpu_rank(-1);
}

} // namespace arbenv