diff --git a/arbor/include/arbor/communication/remote.hpp b/arbor/include/arbor/communication/remote.hpp index f5bef0c4b746ec40be558e036a3def0f9053e4e9..05b3c89882e55c3317ae5d0e744a36f77021372e 100644 --- a/arbor/include/arbor/communication/remote.hpp +++ b/arbor/include/arbor/communication/remote.hpp @@ -2,8 +2,7 @@ #include <cstring> #include <cstdint> -#include <memory> -#include <exception> +#include <stdexcept> #include <variant> #include <vector> #include <string> diff --git a/doc/concepts/interconnectivity.rst b/doc/concepts/interconnectivity.rst index bdb93f3a2906a0c26bb71884de04a6d7d80af280..c2d3a94213356cfc411010624758ce9714308219 100644 --- a/doc/concepts/interconnectivity.rst +++ b/doc/concepts/interconnectivity.rst @@ -441,7 +441,7 @@ exchange process. Due to the way MPI defines intercommunicators, the exchange is the same as with intracommunicators. Control Plane and Epochs -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~ Before initiating the actual simulation, Arbor sets the ``epoch`` length to half the minimal delay in the global network. The minimal delay can be queried using @@ -500,9 +500,9 @@ scenarios where both sides are launched as a single job (eg via ``SLURM``), but might do so where unrelated jobs are used. Tying It All Together -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~ -While there is no requirement on doing, we strongly recommend to make use of the +While there is no requirement on doing so, we strongly recommend to make use of the facilities offered in ``arbor/communication/remote.hpp``, as does Arbor internally. It should also be possible to interact with this protocol via ``C`` or other languages, if needed, as the infrastructure relies on byte-buffers and @@ -610,6 +610,12 @@ Terms and Definitions API --- +* Interconnectivity + + * :ref:`Python <pyinterconnectivity>` + * :ref:`C++ <cppinterconnectivity>` + +* Remote -* :ref:`Python <pyinterconnectivity>` -* :ref:`C++ <cppinterconnectivity>` + * :ref:`Python <pyremote>` + * :ref:`C++ <cppremote>` diff --git a/doc/cpp/remote.rst b/doc/cpp/remote.rst new file mode 100644 index 0000000000000000000000000000000000000000..db20905f0779039df332f50eab30cb8c69d02731 --- /dev/null +++ b/doc/cpp/remote.rst @@ -0,0 +1,77 @@ +.. _cppremote: + +Remote Communication +#################### + +Wraps remote communication for Arbor. This is meant to facilitate sending data +_to_ Arbor, not for pulling data into Arbor from the outside, which is done +automatically. If you are developing a bridge between Arbor and another +simulator that supports calling C++, this is the correct place. In all other +cases it is likely not what you are looking for. For a description of the +protocol see :ref:`here <interconnectivity>` + +.. cpp:namespace:: arb::remote + +Control Messages +================ + +.. cpp:class:: msg_null + + Empty message, possibly used as a keep-alive signal. + + +.. cpp:class:: msg_abort + + Request termination, giving the reason as a message. + + .. cpp:member:: char[512] reason + +.. cpp:class:: msg_epoch + + Commence next epoch, giving the open interval :math:`[from, to)` with times + in `ms`. + + .. cpp:member:: double t_start + + .. cpp:member:: double t_end + +.. cpp:class:: msg_done + + Conclude simulation, giving the final time :math:`t_{\mathrm{final}}` in `ms`. + + .. cpp:member:: double time + +.. cpp:type:: ctrl_message = std::variant<msg_null, msg_abort, msg_epoch, msg_done> + +.. function:: exchange_ctrl(ctrl_message message, MPI_Comm comm) + + Send ``message`` to all peers in the MPI intercomm ``comm`` and receive the + unanimous answer. ``message`` must be one of the types ``msg_*`` above. + +Spike Exchange +============== + +.. cpp:class:: arb_spike + + .. cpp:member:: uint32_t gid + + Global id of the spiking cell, must fit in an unsigned 32b integer. + ``gid`` must be unique in the external network. + + .. cpp:member:: uint32_t lid + + Local id on the spiking cell, must fit in an unsigned 32b integer. This + ``lid`` describes which logical part of the cell ``gid`` emitted the + spike. If the external simulation doesn't distinguish betwenn different + sources on the same cell, always set this to zero. + + .. cpp:member:: double time + + Time at which the occured. + + .. function:: gather_spikes(const std::vector<arb_spike>& spikes, MPI_Comm comm) + + Sends a buffer of spikes over ``comm`` receiving back the concatenated + result of all calling MPI tasks in Arbor. This is a collective + operation; each MPI task on the remote side must call it simultaneously + with its _local_ part of the spikes to send. diff --git a/doc/python/remote.rst b/doc/python/remote.rst new file mode 100644 index 0000000000000000000000000000000000000000..83af055adae0110f7752dd16f0cbc44ff22dea68 --- /dev/null +++ b/doc/python/remote.rst @@ -0,0 +1,74 @@ +.. _pyremote: + +Remote Communication +#################### + +Wraps remote communication for Arbor. This is meant to facilitate sending data +_to_ Arbor, not for pulling data into Arbor from the outside, which is done +automatically. If you are developing a bridge between Arbor and another +simulator that is written in pure Python, this is the correct place. In all +other cases it is likely not what you are looking for. For a description of the +protocol see + +.. currentmodule:: arbor + +Control Messages +================ + +.. class:: msg_null + + Empty message, possibly used as a keep-alive signal. + + .. function:: msg_null() + +.. class:: msg_abort + + Request termination, giving the reason as a message (< 512 bytes) + + .. function:: msg_abort(reason) + +.. class:: msg_epoch + + Commence next epoch, giving the open interval :math:`[from, to)` with times + in `ms`. + + .. function:: msg_epoch(from, to) + +.. class:: msg_done + + Conclude simulation, giving the final time :math:`t_{\mathrm{final}}` in `ms`. + + .. function:: msg_done(tfinal) + +.. function:: exchange_ctrl(message, comm) + + Send ``message`` to all peers in the MPI intercomm ``comm`` and receive the + unanimous answer. ``message`` must be one of the types ``msg_*`` above. + +Spike Exchange +============== + +.. class:: arb_spike + + .. attribute:: gid + + Global id of the spiking cell, must fit in an unsigned 32b integer. + ``gid`` must be unique in the external network. + + .. attribute:: lid + + Local id on the spiking cell, must fit in an unsigned 32b integer. This + ``lid`` describes which logical part of the cell ``gid`` emitted the + spike. If the external simulation doesn't distinguish betwenn different + sources on the same cell, always set this to zero. + + .. attribute:: time + + Time at which the occured. + +.. function:: gather_spikes(spikes, comm) + + Sends a buffer of spikes over ``comm`` receiving back the concatenated + result of all calling MPI tasks in Arbor. This is a collective + operation; each MPI task on the remote side must call it simultaneously + with its _local_ part of the spikes to send. diff --git a/ext/units b/ext/units index e7aff9f8e4cc1ce19b1ea7e7095036e64123601f..7917f5f2cfefdcc90b5085ade91761d06df74e59 160000 --- a/ext/units +++ b/ext/units @@ -1 +1 @@ -Subproject commit e7aff9f8e4cc1ce19b1ea7e7095036e64123601f +Subproject commit 7917f5f2cfefdcc90b5085ade91761d06df74e59 diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c590da1f1260727b1be3d2ac2b3172af4ad7c06b..54b13c3bc5c4bf9aada54143dbcf589cb308ca19 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -43,6 +43,7 @@ set(pyarb_source simulation.cpp single_cell_model.cpp env.cpp + remote.cpp units.cpp ) diff --git a/python/mpi.hpp b/python/mpi.hpp index 2a278e6fcb3539123c74a075a8ea9d49f461e7b0..8a14d76297a5a12d0316ab8e45956153d9ca03dd 100644 --- a/python/mpi.hpp +++ b/python/mpi.hpp @@ -2,6 +2,7 @@ #ifdef ARB_MPI_ENABLED #include <mpi.h> +#include <pybind11/pybind11.h> namespace pyarb { // A shim is required for MPI_Comm, because OpenMPI defines it as a pointer to diff --git a/python/pyarb.cpp b/python/pyarb.cpp index d4c14f076626c5834148530f8157df76fb1c1258..6a5da40d9a7771c661690e5f44d9db4fc688d790 100644 --- a/python/pyarb.cpp +++ b/python/pyarb.cpp @@ -37,6 +37,7 @@ void register_label_dict(pybind11::module& m); #ifdef ARB_MPI_ENABLED void register_mpi(pybind11::module& m); +void register_remote(pybind11::module& m); #endif } // namespace pyarb @@ -61,7 +62,6 @@ PYBIND11_MODULE(_arbor, m) { pyarb::register_cable_probes(m, global_ptr); pyarb::register_mechanisms(m); pyarb::register_cells(m); - pyarb::register_cable_loader(m); pyarb::register_config(m); pyarb::register_contexts(m); @@ -72,6 +72,10 @@ PYBIND11_MODULE(_arbor, m) { pyarb::register_arborenv(m); pyarb::register_single_cell(m); pyarb::register_network(m); + #ifdef ARB_MPI_ENABLED + pyarb::register_mpi(m); + pyarb::register_remote(m); + #endif // This is the fallback. All specific translators take precedence by being // registered *later*. @@ -100,8 +104,4 @@ PYBIND11_MODULE(_arbor, m) { pybind11::register_exception<arb::zero_thread_requested_error>(m, "ArbValueError", PyExc_ValueError); pybind11::implicitly_convertible<const std::tuple<double, double, double, double>&, arb::mpoint>(); - - #ifdef ARB_MPI_ENABLED - pyarb::register_mpi(m); - #endif } diff --git a/python/remote.cpp b/python/remote.cpp new file mode 100644 index 0000000000000000000000000000000000000000..15cd56bb907b012dd8a2ad84daa7745d7cd4c4bb --- /dev/null +++ b/python/remote.cpp @@ -0,0 +1,103 @@ +#include <arbor/version.hpp> + +#ifdef ARB_MPI_ENABLED + +#include "mpi.hpp" + +#include <arbor/communication/remote.hpp> +#include <arbor/version.hpp> + +#include <pybind11/pybind11.h> +#include <pybind11/stl.h> + +#include "conversion.hpp" +#include "context.hpp" +#include "error.hpp" +#include "strprintf.hpp" + +namespace pyarb { +using namespace pybind11::literals; + +void register_remote(pybind11::module& m) { + auto s = m.def_submodule("remote", "Wrappers for remote communication."); + + pybind11::class_<arb::remote::msg_null> msg_null(s, "msg_null", "Empty message."); + msg_null + .def(pybind11::init<>([]() { return arb::remote::msg_null{};})) + .def("__repr__", [](const arb::remote::msg_null&){return "(arb::remote::msg_null)";}) + .def("__str__", [](const arb::remote::msg_null&){return "(msg_null)";}); + + pybind11::class_<arb::remote::msg_abort> msg_abort(s, "msg_abort", "Aborting with error."); + msg_abort + .def(pybind11::init<>([](const std::string& s) { + auto res = arb::remote::msg_abort{}; + std::memset(res.reason, 0x0, sizeof(res.reason)); + std::strncpy(res.reason, s.c_str(), 511); + return res; + }), + "reason"_a, + "Signal abort with a reason.") + .def(pybind11::init<>([]() { + auto res = arb::remote::msg_abort{}; + std::memset(res.reason, 0x0, sizeof(res.reason)); + return res; + }), + "Signal abort without a reason.") + .def("__repr__", [](const arb::remote::msg_abort& s){return util::pprintf("(arb::remote::msg_abort reason={})", s.reason);}) + .def("__str__", [](const arb::remote::msg_abort& s){return util::pprintf("(abort reason={})", s.reason);}); + + pybind11::class_<arb::remote::msg_epoch> msg_epoch(s, "msg_epoch", "Commencing epoch."); + msg_epoch + .def(pybind11::init<>([](double f, double t) { return arb::remote::msg_epoch{f, t}; }), + "from"_a, "to"_a, + "Signal commencing of epoch [from, to).") + .def("__repr__", [](const arb::remote::msg_epoch& s){return util::pprintf("(arb::remote::msg_epoch from={} to={})", s.t_start, s.t_end);}) + .def("__str__", [](const arb::remote::msg_epoch& s){return util::pprintf("(epoch from={} to={})", s.t_start, s.t_end);}); + + pybind11::class_<arb::remote::msg_done> msg_done(s, "msg_done", "Concluded simulation period with final time."); + msg_done + .def(pybind11::init<>([](float t) { return arb::remote::msg_done{t}; }), + "final"_a, + "Signal conclusion of simulation at time `final``.") + .def("__repr__", [](const arb::remote::msg_done& s){return util::pprintf("(arb::remote::msg_done to={})", s.time);}) + .def("__str__", [](const arb::remote::msg_done& s){return util::pprintf("(done to={})", s.time);}); + + s.def("exchange_ctrl", + [](arb::remote::ctrl_message msg, pybind11::object mpi) { + auto err = "Invalid MPI Communicator."; + if (can_convert_to_mpi_comm(mpi)) { + return arb::remote::exchange_ctrl(msg, convert_to_mpi_comm(mpi)); + } + else if (auto c = py2optional<mpi_comm_shim>(mpi, err)) { + return arb::remote::exchange_ctrl(msg, c->comm); + } else { + throw pyarb_error(err); + } + }, + "msg"_a, "mpi_comm"_a, + "Send given control message to all peers and receive their (unanimous) answer."); + + pybind11::class_<arb::remote::arb_spike> arb_spike(s, "arb_spike", "Empty message."); + arb_spike.def(pybind11::init<>([](std::uint32_t gid, std::uint32_t lid, double t) { return arb::remote::arb_spike{{gid, lid}, t};}), + "gid"_a, "lid"_a, "time"_a, + "Spike caused by cell `gid` on location `lid` at time `time`.") + .def("__repr__", [](const arb::remote::arb_spike& s){return util::pprintf("(arb::remote::arb_spike gid={} lid={} time={})", s.source.gid, s.source.lid, s.time);}) + .def("__str__", [](const arb::remote::arb_spike& s){return util::pprintf("(spike gid={} lid={} time={})", s.source.gid, s.source.lid, s.time);}); + + s.def("gather_spikes", + [](const std::vector<arb::remote::arb_spike>& msg, pybind11::object mpi) { + auto err = "Invalid MPI Communicator."; + if (can_convert_to_mpi_comm(mpi)) { + return arb::remote::gather_spikes(msg, convert_to_mpi_comm(mpi)); + } + else if (auto c = py2optional<mpi_comm_shim>(mpi, err)) { + return arb::remote::gather_spikes(msg, c->comm); + } else { + throw pyarb_error(err); + } + }, + "msg"_a, "mpi_comm"_a, + "Send list of spikes to all peers and receive their collected answer."); +} +} +#endif