diff --git a/arborenv/concurrency.cpp b/arborenv/concurrency.cpp index fd0e59198238ff41d68b309c7fb9be968d12dd75..1880d8f2f0947e30501c8973f0172c8ab146bce7 100644 --- a/arborenv/concurrency.cpp +++ b/arborenv/concurrency.cpp @@ -19,7 +19,7 @@ unsigned get_env_num_threads() { // select variable to use: // If ARB_NUM_THREADS_VAR is set, use $ARB_NUM_THREADS_VAR - // else if ARB_NUM_THREAD set, use it + // else if ARB_NUM_THREADS set, use it // else if OMP_NUM_THREADS set, use it if (auto nthreads_var_name = std::getenv("ARB_NUM_THREADS_VAR")) { str = std::getenv(nthreads_var_name); diff --git a/doc/Rasterplot b/doc/Rasterplot deleted file mode 100644 index d4d1fa6f4d1071b3a90e0c544d7234d5922c90a1..0000000000000000000000000000000000000000 Binary files a/doc/Rasterplot and /dev/null differ diff --git a/doc/cpp_distributed_context.rst b/doc/cpp_distributed_context.rst index b5878a1eafc420dff6221242046f2cfaaa57ac50..93a07ce042bfc177369df35da20116610efaf4ad 100644 --- a/doc/cpp_distributed_context.rst +++ b/doc/cpp_distributed_context.rst @@ -124,8 +124,8 @@ Class Documentation .. cpp:function:: std::vector<std::string> gather(std::string value, int root) const - Special overload for gathering a string provided by each domain into a vector - of strings on domain :cpp:var:`root`. + Overload for gathering a string from each domain into a vector + of strings on domain :cpp:any:`root`. .. cpp:function:: T min(T value) const @@ -186,7 +186,7 @@ Class Documentation .. cpp:function:: mpi_context(MPI_Comm comm) - Create a context that will uses the MPI communicator :cpp:var:`comm`. + Create a context that will uses the MPI communicator :cpp:any:`comm`. .. cpp:function:: distributed_context_handle make_mpi_context(MPI_Comm comm) diff --git a/doc/cpp_domdec.rst b/doc/cpp_domdec.rst index 755385ca879164e29bf06c5e745696c483c9b1d8..c109292c7cd05566111b805d4940e9b4905b3a86 100644 --- a/doc/cpp_domdec.rst +++ b/doc/cpp_domdec.rst @@ -3,72 +3,112 @@ Domain Decomposition ==================== -Definitions ------------ +The C++ API for defining hardware resources and partitioning a model over +distributed and local hardware is described here. +Arbor provides two library APIs for working with hardware resources: -Domain decomposition - A description of the distribution of the model over the available - computational resources. The description partitions the - cells in the model as follows: +* The core *libarbor* is used to *describe* the hardware resources + and their contexts for use in Arbor simulations. +* The *libarborenv* provides an API for querying available hardware + resources (e.g. the number of available GPUs), and initializing MPI. - * group the cells into *cell groups* of the same kind of cell; - * assign each cell group to either a CPU core or GPU on a specific MPI rank. - The number of cells in each cell group depends on different factors, - including the type of the cell, and whether the cell group will run on a CPU - core or the GPU. +Managing Hardware +----------------- - See :cpp:class:`arb::domain_decomposition`. +The *libarborenv* API for querying and managing hardware resources is in the +:cpp:any:`arbenv` namespace. This functionality is in a seperate +library because the main Arbor library should only +present an interface for running simulations on hardware resources provided +by the calling application. As such, it should not provide access to how +it manages hardware resources internally, or place restrictions on how +the calling application selects or manages resources such as GPUs and MPI communicators. -Load balancer - A distributed algorithm that determines the domain decomposition using the - model recipe and a description of the available computational resources as - inputs. +However, for the purpose of writing tests, examples, benchmarks and validation +tests, functionality for detecting GPUs, managing MPI lifetimes and the like +is neccesary. This functionality is kept in a separate library to ensure +separation of concerns, and to provide examples of quality implementations +of such functionality for users of the library to reuse. - See :cpp:func:`arb::partition_load_balance`. +.. cpp:namespace:: arbenv -Hardware --------- +.. cpp:function:: arb::optional<int> get_env_num_threads() -.. cpp:namespace:: arb + Tests whether the number of threads to use has been set in an environment variable. + First checks ``ARB_NUM_THREADS``, and if that is not set checks ``OMP_NUM_THREADS``. + + Return value: -.. cpp:class:: local_resources + * no value: the :cpp:any:`optional` return value contains no value if the + * has value: the number of threads set by the environment variable. - Enumerates the computational resources available locally, specifically the - number of hardware threads and the number of GPUs. + Exceptions: - The function :cpp:func:`arb::get_local_resources` can be used to automatically - detect the available resources are available :cpp:class:`local_resources` + * throws :cpp:any:`std::runtime_error` if environment variable set with invalid + number of threads. .. container:: example-code - .. code-block:: cpp + .. code-block:: cpp - auto resources = arb::get_local_resources(); - std::cout << "This node supports " << resources.num_threads " threads," << - << " and " << resources.num_gpus << " gpus."; + if (auto nt = arbenv::get_env_num_threads()) { + std::cout << "requested " << nt.value() << "threads \n"; + } + else { + std::cout << "no enviroment variable set\n"; + } - .. cpp:function:: local_resources(unsigned threads, unsigned gpus) +.. cpp:function:: int thread_concurrency() - Constructor. + Attempts to detect the number of available CPU cores. Returns 1 if unable to detect + the number of cores. - .. cpp:member:: const unsigned num_threads + .. container:: example-code + + .. code-block:: cpp + + // Set num_threads to value from environment variable if set, + // otherwise set it to the available number of cores. + int num_threads = 0; + if (auto nt = arbenv::get_env_num_threads()) { + num_threads = nt.value(); + } + else { + num_threads = arbenv::thread_concurrency(); + } - The number of threads available. +.. cpp:function:: int default_gpu() - .. cpp:member:: const unsigned num_gpus + Detects if a GPU is available, and returns the - The number of GPUs available. + Return value: -.. cpp:function:: local_resources get_local_resources() + * non-negative value: if a GPU is available, the index of the selected GPU is returned. The index will be in the range ``[0, num_gpus)`` where ``num_gpus`` is the number of GPUs detected using the ``cudaGetDeviceCount`` `CUDA API call <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html>`_. + * -1: if no GPU available, or if Arbor was built without GPU support. + + .. container:: example-code - Returns an instance of :cpp:class:`local_resources` with the following: + .. code-block:: cpp - * ``num_threads`` is determined from the ``ARB_NUM_THREADS`` environment variable if - set, otherwise Arbor attempts to detect the number of available hardware cores. - If Arbor can't determine the available threads it defaults to 1 thread. - * ``num_gpus`` is the number of GPUs detected using the CUDA ``cudaGetDeviceCount`` that - `API call <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html>`_. + if (arbenv::default_gpu()>-1) {} + std::cout << "a GPU is available\n"; + } + +.. cpp:function:: int find_private_gpu(MPI_Comm comm) + + stuff. + +.. cpp:class:: with_mpi + + Purpose and functionality + + Constructor + + Usage notes. + +Blurb for the *libarbor* + +.. cpp:namespace:: arb .. cpp:class:: proc_allocation diff --git a/doc/cpp_dry_run.rst b/doc/cpp_dry_run.rst index 58698cc3a232afa0dde376b04101178965bf77c1..565e679092f7e3789436c2bd3ca2e203cc7f701b 100644 --- a/doc/cpp_dry_run.rst +++ b/doc/cpp_dry_run.rst @@ -1,4 +1,4 @@ -.. _cppdistcontext: +.. _cppdryrun: .. Note:: This is a developer feature for benchmarking, and is not useful for scientific use cases. @@ -57,15 +57,15 @@ To support dry-run mode we use the following classes: .. cpp:function:: gathered_vector<arb::spike> gather_spikes(const std::vector<arb::spike>& local_spikes) const - The vector of :cpp:var:`local_spikes` represents the spikes obtained from running a + The vector of :cpp:any:`local_spikes` represents the spikes obtained from running a simulation of :cpp:member:`num_cells_per_tile_` on the local domain. The returned vector should contain the spikes obtained from all domains in the dry-run. - The spikes from the non-simulated domains are obtained by copying :cpp:var:`local_spikes` + The spikes from the non-simulated domains are obtained by copying :cpp:any:`local_spikes` and modifying the gids of each spike to refer to the corresponding gids on each domain. The obtained vectors of spikes from each domain are concatenated along with the original - :cpp:var:`local_spikes` and returned. + :cpp:any:`local_spikes` and returned. - .. cpp:function:: distributed_context_handle make_dry_run_context(unsigned num_ranks, unsigned num_cells_per_tile) + .. cpp:function:: distributed_context_handle make_dry_run_context(unsigned num_ranks, unsigned num_cells_per_tile) Convenience function that returns a handle to a :cpp:class:`dry_run_context`. @@ -76,28 +76,21 @@ To support dry-run mode we use the following classes: rules: it allows connection from gids greater than the total number of cells in a recipe, :cpp:var:`ncells`. - :cpp:class:`arb::tile` describes the model on a single domain containing :cpp:var:`ncells` = - :cpp:var:`num_cells_per_tile` cells, which is to be duplicated over :cpp:var:`num_ranks` + :cpp:class:`arb::tile` describes the model on a single domain containing :cpp:expr:`num_cells = + num_cells_per_tile` cells, which is to be duplicated over :cpp:var:`num_ranks` domains in dry-run mode. It contains information about :cpp:var:`num_ranks` which is provided by the following function: .. cpp:function:: cell_size_type num_tiles() const -Most of the overloaded functions in :cpp:class:`arb::tile` should describe a recipe on the local -domain, as if it was the only domain in the simulation. The exceptions are the following 2 functions: + Most of the overloaded functions in :cpp:class:`arb::tile` describe a recipe on the local + domain, as if it was the only domain in the simulation, except for the following two + functions that accept :cpp:any:`gid` arguments in the half open interval + ``[0, num_cells*num_tiles)``: - .. cpp:function:: std::vector<cell_connection> connections_on(cell_gid_type i) const - - Returns the connections on 0 <= i < :cpp:var:`ncells`. But allows connections from gids - outside the local domain (gid > :cpp:var:`ncells`). This is in order to create a realistic - network with communication between domains. - - .. cpp:function:: std::vector<event_generator> event_generators(cell_gid_type i) const - - Describes event generators for all gids from all domains: 0 <= i < :cpp:var:`ncells` * - :cpp:var:`num_tiles()`. Unlike other functions, has knowledge of the mimicked domains, - namely their event generators. + .. cpp:function:: std::vector<cell_connection> connections_on(cell_gid_type gid) const + .. cpp:function:: std::vector<event_generator> event_generators(cell_gid_type gid) const .. cpp:class:: symmetric_recipe: public recipe diff --git a/doc/cpp_recipe.rst b/doc/cpp_recipe.rst index 48e953dfe55cab99883ed8114af8c5a661065d99..c40e60175f4a0edcb60567ce2a3cc39bd2462e51 100644 --- a/doc/cpp_recipe.rst +++ b/doc/cpp_recipe.rst @@ -5,86 +5,19 @@ Recipes The :cpp:class:`arb::recipe` class documentation is below. -Why Recipes? --------------- +.. _cpp_recipe_best_practice: -The interface and design of Arbor recipes was motivated by the following aims: +C++ Best Practices +------------------ - * Building a simulation from a recipe description must be possible in a - distributed system efficiently with minimal communication. - * To minimise the amount of memory used in model building, to make it - possible to build and run simulations in one run. - -Recipe descriptions are cell-oriented, in order that the building phase can -be efficiently distributed and that the model can be built independently of any -runtime execution environment. - -During model building, the recipe is queried first by a load balancer, -then later when building the low-level cell groups and communication network. -The cell-centered recipe interface, whereby cell and network properties are -specified "per-cell", facilitates this. - -The steps of building a simulation from a recipe are: - -.. topic:: 1. Load balancing - - First, the cells are partitioned over MPI ranks, and each rank parses - the cells assigned to it to build a cost model. - The ranks then coordinate to redistribute cells over MPI ranks so that - each rank has a balanced workload. Finally, each rank groups its local - cells into :cpp:type:`cell_group` s that balance the work over threads (and - GPU accelerators if available). - -.. topic:: 2. Model building - - The model building phase takes the cells assigned to the local rank, and builds the - local cell groups and the part of the communication network by querying the recipe - for more information about the cells assigned to it. - -.. _recipe_best_practice: - -Best Practices --------------- - -Here is a set of rules of thumb to keep in mind when making recipes. The first is -mandatory, and following the others will lead to better performance. +Here we collect rules of thumb to keep in mind when making recipes in C++. .. topic:: Stay thread safe The load balancing and model construction are multithreaded, that is multiple threads query the recipe simultaneously. Hence calls to a recipe member should not have side effects, and should use - lazy evaluation when possible (see `Be lazy <recipe_lazy_>`_). - -.. _recipe_lazy: - -.. topic:: Be lazy - - A recipe does not have to contain a complete description of the model in - memory; precompute as little as possible, and use - `lazy evaluation <https://en.wikipedia.org/wiki/Lazy_evaluation>`_ to generate - information only when requested. - This has multiple benefits, including: - - * thread safety; - * minimising the memory footprint of the recipe. - -.. topic:: Think of the cells - - When formulating a model, think cell-first, and try to formulate the model and - the associated workflow from a cell-centered perspective. If this isn't possible, - please contact the developers, because we would like to develop tools that help - make this simpler. - -.. topic:: Be reproducible - - Arbor is designed to give reproduceable results when the same model is run on a - different number of MPI ranks or threads, or on different hardware (e.g. GPUs). - This only holds when a recipe provides a reproducible model description, which - can be a challenge when a description uses random numbers, e.g. to pick incoming - connections to a cell from a random subset of a cell population. - To get a reproduceable model, use the cell `gid` (or a hash based on the `gid`) - to seed random number generators, including those for :cpp:type:`event_generator` s. + lazy evaluation when possible (see `Be lazy <_recipe_lazy>`_). Class Documentation @@ -111,7 +44,7 @@ Class Documentation multiple threads should be able to call member functions of a recipe simultaneously. Model building is multithreaded to reduce model building times, so recipe implementations should avoid using locks and mutexes to introduce - thread safety. See `recipe best practices <recipe_best_practice_>`_ for more + thread safety. See `recipe best practices <cpp_recipe_best_practice_>`_ for more information. **Required Member Functions** diff --git a/doc/index.rst b/doc/index.rst index 430f9d96cd19a1dada6f5f637c9631f1dd983d0e..6cbf8db699b232d05665dacfb47a796fbb5bd8dd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -54,12 +54,6 @@ Some key features include: .. toctree:: :caption: Python: - py_overview - py_common - py_recipe - py_domdec - py_simulation - .. toctree:: :caption: C++ API: @@ -75,9 +69,7 @@ Some key features include: library simd_api profiler - py_profiler sampling_api cpp_distributed_context cpp_dry_run - py_unittest diff --git a/doc/install.rst b/doc/install.rst index 0408ba455b14e74226165bb72dd83e7fde26a2fb..c091f5ab083b45e24b7509f17810a63d0138eb81 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -3,7 +3,7 @@ Installing Arbor ################ -Installation of Arbor is done by obtaining the source code and compiling it on +Arbor is installed by obtaining the source code and compiling it on the target system. This guide starts with an overview of the building process, and the various options @@ -28,7 +28,7 @@ with very few tools. Tool Notes =========== ============================================ Git To check out the code, minimum version 2.0. - CMake To set up the build, minimum version 3.8 (3.9 for MPI). + CMake To set up the build, minimum version 3.9 compiler A C++14 compiler. See `compilers <compilers_>`_. =========== ============================================ @@ -46,9 +46,8 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. Compiler Min version Notes =========== ============ ============================================ GCC 6.1.0 - Clang 4.0 Needs GCC 6 or later for standard library. + Clang 4.0 Needs GCC 6 or later for standard library. Apple Clang 9 Apple LLVM version 9.0.0 (clang-900.0.39.2) - Intel 17.0.1 Needs GCC 5 or later for standard library. =========== ============ ============================================ .. _note_CC: @@ -93,12 +92,12 @@ We recommend using GCC or Clang, for which Arbor has been tested and optimised. `NMODL <https://www.neuron.yale.edu/neuron/static/docs/help/neuron/nmodl/nmodl.html>`_. The generated code is explicitly vectorised, obviating the need for vendor compilers, and we can take advantage of their benefits of GCC and Clang: - faster compilation times; fewer compiler bugs; and support for recent C++ standards. + faster compilation times; fewer compiler bugs; and better support for C++ standards. .. Note:: - The IBM XL C/C++ compiler for Linux up to version 14 is not supported, owing to unresolved + The IBM XL C++ compiler and Intel C++ compiler are not supported, owing to unresolved compiler issues. We strongly recommend building with GCC or Clang instead on PowerPC - platforms. + and Intel platforms. Optional Requirements --------------------- @@ -106,7 +105,7 @@ Optional Requirements GPU Support ~~~~~~~~~~~ -Arbor has full support for NVIDIA GPUs, for which the NVIDIA CUDA toolkit version 8 is required. +Arbor has full support for NVIDIA GPUs, for which the NVIDIA CUDA toolkit version 9 is required. Distributed ~~~~~~~~~~~ @@ -118,9 +117,10 @@ More information on building with MPI is in the `HPC cluster section <cluster_>` Python ~~~~~~ -Arbor has a python front end, for which Python 3.6 is required. -In order to use MPI in combination with the python frontend `mpi4py <https://mpi4py.readthedocs.io/en/stable/install.html#>`_ is required as a site-package of python. - +Arbor has a Python front end, for which Python 3.6 is required. +In order to use MPI in combination with the python frontend the +`mpi4py <https://mpi4py.readthedocs.io/en/stable/install.html#>`_ +Python package is also recommended. Documentation ~~~~~~~~~~~~~~ @@ -134,11 +134,11 @@ Getting the Code ================ The easiest way to acquire the latest version of Arbor is to check the code out from -the `Github repository <https://github.com/eth-cscs/arbor>`_: +the `Github repository <https://github.com/arbor-sim/arbor>`_: .. code-block:: bash - git clone https://github.com/eth-cscs/arbor.git --recurse-submodules + git clone https://github.com/arbor-sim/arbor.git --recurse-submodules We recommend using a recursive checkout, because Arbor uses Git submodules for some of its library dependencies. @@ -155,7 +155,7 @@ recursive checkout: git submodule update --init --recursive You can also point your browser to Arbor's -`Github page <https://github.com/eth-cscs/arbor>`_ and download a zip file. +`Github page <https://github.com/arbor-sim/arbor>`_ and download a zip file. If you use the zip file, then don't forget to run Git submodule update manually. .. _building: @@ -163,7 +163,7 @@ If you use the zip file, then don't forget to run Git submodule update manually. Building and Installing Arbor ============================= -Once the Arbor code has been checked out, it can be built by first running CMake to configure the build, then running make. +Once the Arbor code has been checked out, first run CMake to configure the build, then run make. Below is a simple workflow for: **1)** getting the source; **2)** configuring the build; **3)** building; **4)** running tests; **5)** install. @@ -173,7 +173,7 @@ For more detailed build configuration options, see the `quick start <quickstart_ .. code-block:: bash # 1) Clone. - git clone https://github.com/eth-cscs/arbor.git --recurse-submodules + git clone https://github.com/arbor-sim/arbor.git --recurse-submodules cd arbor # Make a path for building @@ -185,8 +185,10 @@ For more detailed build configuration options, see the `quick start <quickstart_ # Release mode should be used for installing and benchmarking Arbor. cmake .. - # 3) Build Arbor. + # 3.1) Build Arbor library. make -j 4 + # 3.2) Build Arbor unit tests. + make -j 4 tests # 4) Run tests. ./bin/unit @@ -211,14 +213,14 @@ CMake parameters and flags, follow links to the more detailed descriptions below .. code-block:: bash - cmake .. -DARB_WITH_ASSERTIONS=ON -DCMAKE_BUILD_TYPE=debug + cmake -DARB_WITH_ASSERTIONS=ON -DCMAKE_BUILD_TYPE=debug .. topic:: `Release <buildtarget_>`_ mode (compiler optimizations enabled) with the default compiler, optimized for the local `system architecture <architecture_>`_. .. code-block:: bash - cmake .. -DARB_ARCH=native + cmake -DARB_ARCH=native .. topic:: `Release <buildtarget_>`_ mode with `Clang <compilers_>`_. @@ -226,27 +228,27 @@ CMake parameters and flags, follow links to the more detailed descriptions below export CC=`which clang` export CXX=`which clang++` - cmake .. + cmake .. topic:: `Release <buildtarget_>`_ mode for the `Haswell architecture <architecture_>`_ and `explicit vectorization <vectorize_>`_ of kernels. .. code-block:: bash - cmake .. -DARB_VECTORIZE=ON -DARB_ARCH=haswell + cmake -DARB_VECTORIZE=ON -DARB_ARCH=haswell -.. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <vectorize_>`_, targeting the `Broadwell architecture <vectorize_>`_, with support for `P100 GPUs <gpu_>`_, and building with `GCC 5 <compilers_>`_. +.. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <vectorize_>`_, targeting the `Broadwell architecture <vectorize_>`_, with support for `P100 GPUs <gpu_>`_, and building with `GCC 6 <compilers_>`_. .. code-block:: bash - export CC=gcc-5 - export CXX=g++-5 - cmake .. -DARB_VECTORIZE=ON -DARB_ARCH=broadwell -DARB_WITH_GPU=ON + export CC=gcc-6 + export CXX=g++-6 + cmake -DARB_VECTORIZE=ON -DARB_ARCH=broadwell -DARB_WITH_GPU=ON .. topic:: `Release <buildtarget_>`_ mode with `explicit vectorization <vectorize_>`_, optimized for the `local system architecture <architecture_>`_ and `install <install_>`_ in ``/opt/arbor`` .. code-block:: bash - cmake .. -DARB_VECTORIZE=ON -DARB_ARCH=native -DCMAKE_INSTALL_PREFIX=/opt/arbor + cmake -DARB_VECTORIZE=ON -DARB_ARCH=native -DCMAKE_INSTALL_PREFIX=/opt/arbor .. _buildtarget: @@ -279,8 +281,8 @@ is to set ``ARB_ARCH`` to ``native``: cmake -DARB_ARCH=native -When deploying on a different machine, one should, for an optimized library, specify -the specific architecture of that machine. The valid values correspond to those given +When deploying on a different machine (cross-compiling) specify +the specific architecture of the target machine. The valid values correspond to those given to the ``-mcpu`` or ``-march`` options for GCC and Clang; the build system will translate these names to corresponding values for other supported compilers. @@ -290,6 +292,19 @@ for example GCC `x86 options <https://gcc.gnu.org/onlinedocs/gcc/x86-Options.htm `PowerPC options <https://gcc.gnu.org/onlinedocs/gcc/RS_002f6000-and-PowerPC-Options.html#RS_002f6000-and-PowerPC-Options>`_, and `ARM options <https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html>`_. +.. code-block:: bash + + # Intel architectures + cmake -DARB_ARCH=broadwell # broadwell with avx2 + cmake -DARB_ARCH=skylake-avx512 # skylake with avx512 (Xeon server) + cmake -DARB_ARCH=knl # Xeon Phi KNL + + # IBM Power8 + cmake -DARB_ARCH=power8 + + # IBM Arm8a + cmake -DARB_ARCH=armv8-a + .. _vectorize: Vectorization @@ -342,20 +357,21 @@ example: Python Front End ---------------- -Arbor can be used with a python front end which is enabled by setting the +Arbor can be used with a python front end which is enabled by toggling the CMake ``ARB_WITH_PYTHON`` option: .. code-block:: bash - cmake .. -ARB_WITH_PYTHON=ON + cmake -ARB_WITH_PYTHON=ON By default ``ARB_WITH_PYTHON=OFF``. When this option is turned on, a python module called :py:mod:`arbor` is built. -Depending on the configuration of the system where Arbor is being built, the -C++ compiler may not be able to find ``mpi4py`` when Arbor is configured with both, python ``-ARB_WITH_PYTHON=ON`` and MPI ``-DARB_WITH_MPI=ON``. -The easiest workaround is to add the path to the include directory containing the header to the -``CPATH`` environment variable before configuring and building Arbor, for -example: +The Arbor Python wrapper has optional support for the ``mpi4py`` Python module +for MPI. CMake will attempt to automatically detect ``mpi4py`` if configured +with both ``-ARB_WITH_PYTHON=ON`` and MPI ``-DARB_WITH_MPI=ON``. +If CMake fails to find ``mpi4py`` when it should, the easiest workaround is to +add the path to the include directory for ``mpi4py`` to the ``CPATH`` environment +variable before configuring and building Arbor: .. code-block:: bash @@ -368,7 +384,7 @@ example: # set CPATH and run cmake export CPATH="/path/to/python3/site-packages/mpi4py/include/:$CPATH" - cmake .. -ARB_WITH_PYTHON=ON -DARB_WITH_MPI=ON + cmake -ARB_WITH_PYTHON=ON -DARB_WITH_MPI=ON .. _install: @@ -378,12 +394,14 @@ Installation Arbor can be installed with ``make install`` after configuration. The installation comprises: -- The static library ``libarbor.a``. +- The static libraries ``libarbor.a`` and ``libarborenv.a``. - Public header files. +- The ``lmorpho`` l-system morphology generation utility - The ``modcc`` NMODL compiler if built. +- The python module if built. - The HTML documentation if built. -The default install path (``/usr/local``) can be overridden with the standard +The default install path (``/usr/local``) can be overridden with the ``CMAKE_INSTALL_PREFIX`` configuration option. Provided that Sphinx is available, HTML documentation for Arbor can be built @@ -404,7 +422,7 @@ HPC Clusters HPC clusters offer their own unique challenges when compiling and running software, so we cover some common issues in this section. If you have problems on your target system that are not covered here, please make an issue on the -Arbor `Github issues <https://github.com/eth-cscs/arbor/issues>`_ page. +Arbor `Github issues <https://github.com/arbor-sim/arbor/issues>`_ page. We will do our best to help you directly, and update this guide to help other users. MPI @@ -421,7 +439,7 @@ An example of building a 'release' (optimized) version of Arbor with MPI is: export CXX=`which mpicxx` # configure with mpi - cmake .. -DARB_WITH_MPI=ON + cmake -DARB_WITH_MPI=ON # run MPI-specific unit tests on 2 MPI ranks mpirun -n 2 ./bin/unit-mpi @@ -440,8 +458,8 @@ using the supplied MPI compiler wrappers in preference. The wrapper forwards the compilation to a compiler, like GCC, and you have to ensure that this compiler is able to compile Arbor. For wrappers - that call GCC, Intel or Clang compilers, you can pass the ``--version`` flag - to the wrapper. For example, on a Cray system where the C++ wrapper is called ``CC``: + that call GCC or Clang compilers, pass the ``--version`` flag + to the wrapper. For example, on a Cray system, where the C++ wrapper is called ``CC``: .. code-block:: bash @@ -500,7 +518,7 @@ Putting it all together, a typical workflow to build Arbor on a Cray system is: module swap PrgEnv-cray PrgEnv-gnu moudle swap gcc/7.1.0 export CC=`which cc`; export CXX=`which CC`; - cmake .. -DARB_WITH_MPI=ON # MPI support + cmake -DARB_WITH_MPI=ON # MPI support .. Note:: If ``CRAYPE_LINK_TYPE`` isn't set, there will be warnings like the following when linking: @@ -553,7 +571,7 @@ example: .. code-block:: bash - cmake .. -DARB_MODCC=path-to-local-modcc + cmake -DARB_MODCC=path-to-local-modcc Here we will use the example of compiling for Intel KNL on a Cray system, which has Intel Sandy Bridge CPUs on login nodes that don't support the AVX512 @@ -669,7 +687,7 @@ and have to be turned on by setting the ``ARB_WITH_ASSERTIONS`` CMake option: library, caused either by a logic error in Arbor, or incorrectly checked user input. If this occurs, it is highly recommended that you attach the output to the - `bug report <https://github.com/eth-cscs/arbor/issues>`_ you send to the Arbor developers! + `bug report <https://github.com/arbor-sim/arbor/issues>`_ you send to the Arbor developers! CMake Git Submodule Warnings @@ -685,23 +703,5 @@ need to be `updated <downloading_>`_. git submodule init git submodule update Or download submodules recursively when checking out: - git clone --recurse-submodules https://github.com/eth-cscs/arbor.git - - -Wrong Headers for Intel Compiler ------------------------------------- - -The Intel C++ compiler does not provide its own copy of the C++ standard library, -instead it uses the implementation from GCC. You can see what the default version of -GCC is by ``g++ --version``. - -If the Intel compiler uses an old version of the standard library, -errors like the following occur: - -.. code-block:: none - - /users/bcumming/arbor_knl/src/util/meta.hpp(127): error: namespace "std" has no member "is_trivially_copyable" - enable_if_t<std::is_trivially_copyable<T>::value>; + git clone --recurse-submodules https://github.com/arbor-sim/arbor.git -On clusters, a GCC module with a full C++11 implementation of the standard library, -i.e. version 5.1 or later, can be loaded to fix the issue. diff --git a/doc/model_common.rst b/doc/model_common.rst index 2b6e1c2cb3acd8a0847433a2b7d10fedebd98a23..6d431bd35b87d28948a3677da960c7f449a10ece 100644 --- a/doc/model_common.rst +++ b/doc/model_common.rst @@ -5,7 +5,7 @@ Common Types The basic unit of abstraction in an Arbor model is a cell. A cell represents the smallest model that can be simulated. -Cells interact with each other only via spike exchange. +Cells interact with each other via spike exchange and gap junctions. Cells can be of various types, admitting different representations and implementations. A *cell group* represents a collection of cells of the same type together with an implementation of their simulation. Arbor currently supports specialized leaky integrate and fire cells and cells representing artificial spike sources in addition to multi-compartment neurons. @@ -17,28 +17,23 @@ Since the neuron model and the associated workflow are formulated from a cell-ce ======================== ====================== =========================================================== Identifyer/ Index Type Description ======================== ====================== =========================================================== - gid integer The global identifier of the cell associated with the item. - index unsigned integer The index of the item in a cell-local collection. + gid integral The global identifier of the cell associated with the item. + index integral The index of the item in a cell-local collection. cell member tuple (gid, index) The global identification of a cell-local item associated with a unique cell, identified by the member `gid`, and identifying an item within a cell-local collection by the member `index`. - cell size unsigned integer Counting collections of cells. - cell local size unsigned integer Counting cell-local data. - cell kind enumerator The identification of the cell type/ kind, - used by the model to group equal kinds in the same cell group: - - * Cell with morphology described by branching 1D cable segments, - * Leaky-integrate and fire neuron, - * Regular spiking source, + cell size integral Counting collections of cells. + cell local size integral Counting cell-local data. + cell kind eunum Identifiies of the cell type, including: + + * Cell with morphology described by branching 1D cable segments. + * Leaky-integrate and fire neuron. + * Regular spiking source. * Spike source from values inserted via description. ======================== ====================== =========================================================== Example - An example of the cell member identifyer is uniquely identifying a synapse in the model. + A `cell member` identifier is used to uniquely identify synapses. Each synapse has a post-synaptic cell (`gid`), and an `index` into the set of synapses on the post-synaptic cell. -Further, to interact with the model probes are specified whereby the item or value that is subjected to a probe will be specific to a particular cell type. -Probes are specified in the recipe that is used to initialize a model with cell `gid` and index of the probe. -The probe's adress is a cell-type specific location info, specific to the cell kind of `gid`. - -C++ specific common types are explained in detail in :ref:`cppcommon` and in :ref:`pycommon` for Arbor's python front end. +C++ specific common types are explained in detail in :ref:`cppcommon`. diff --git a/doc/model_domdec.rst b/doc/model_domdec.rst index 95287d4355944e7a133c96b5d402f7076b566779..254f23364724943d03551c55c7e22c16ee36fa8e 100644 --- a/doc/model_domdec.rst +++ b/doc/model_domdec.rst @@ -31,4 +31,4 @@ Execution Context An *execution context* contains the local thread pool, and optionally the GPU state and MPI communicator, if available. Users of the library configure contexts, which are passed to Arbor methods and types. -Detailed documentations can be found in :ref:`cppdomdec` for C++ and in :ref:`pydomdec` for python. +See :ref:`cppdomdec` for documentation of the C++ interface for domain decomposition. diff --git a/doc/model_intro.rst b/doc/model_intro.rst index aac5e2a36868a2ab3705b4335e00cfb5578040f9..4280fe9b0473fb93de94424072366abde41a98fe 100644 --- a/doc/model_intro.rst +++ b/doc/model_intro.rst @@ -2,17 +2,17 @@ Overview ========= -Arbor's design model was created to enable scalability through abtraction. +Arbor's design aims to enable scalability through abtraction. -Thereby, Arbor makes a distinction between the **description** of a model, and the +To achieve this, Arbor makes a distinction between the **description** of a model, and the **execution** of a model: a *recipe* describes a model, and a *simulation* is an executable instatiation of a model. To be able to simulate a model, three basic steps need to be considered: * first, describe the neuron model by defining a recipe; -* then, get the local computational resources, the execution context, and partition the load balance; -* finally, execute the model by initiating and running the simulation. +* then, define the computational resources available to execute the model; +* finally, initiate and execute a simulation of the recipe on the chosen hardware resources. .. topic:: Concepts diff --git a/doc/model_recipe.rst b/doc/model_recipe.rst index ccbfb14d1863cfad5d3bb63a55b2ff08304722ee..d0579c480494ce77796960856a53186f4127f223 100644 --- a/doc/model_recipe.rst +++ b/doc/model_recipe.rst @@ -4,18 +4,52 @@ Recipes =============== An Arbor *recipe* is a description of a model. The recipe is queried during the model -building phase to provide cell information, such as: +building phase to provide information about cells in the model, such as: * the number of cells in the model; * the type of a cell; * a description of a cell, e.g. with soma, synapses, detectors, stimuli; - -and optionally, e.g.: - * the number of spike targets; * the number of spike sources; * incoming network connections from other cells terminating on a cell. +Why Recipes? +-------------- + +The interface and design of Arbor recipes was motivated by the following aims: + + * Building a simulation from a recipe description must be possible in a + distributed system efficiently with minimal communication. + * To minimise the amount of memory used in model building, to make it + possible to build and run simulations in one run. + +Recipe descriptions are cell-oriented, in order that the building phase can +be efficiently distributed and that the model can be built independently of any +runtime execution environment. + +During model building, the recipe is queried first by a load balancer, +then later when building the low-level cell groups and communication network. +The cell-centered recipe interface, whereby cell and network properties are +specified "per-cell", facilitates this. + +The steps of building a simulation from a recipe are: + +.. topic:: 1. Load balancing + + First, the cells are partitioned over MPI ranks, and each rank parses + the cells assigned to it to build a cost model. + The ranks then coordinate to redistribute cells over MPI ranks so that + each rank has a balanced workload. Finally, each rank groups its local + cells into :cpp:type:`cell_group` s that balance the work over threads (and + GPU accelerators if available). + +.. topic:: 2. Model building + + The model building phase takes the cells assigned to the local rank, and builds the + local cell groups and the part of the communication network by querying the recipe + for more information about the cells assigned to it. + + General Best Practices ---------------------- @@ -26,6 +60,19 @@ General Best Practices please contact the developers, because we would like to develop tools that help make this simpler. +.. _recipe_lazy: + +.. topic:: Be lazy + + A recipe does not have to contain a complete description of the model in + memory; precompute as little as possible, and use + `lazy evaluation <https://en.wikipedia.org/wiki/Lazy_evaluation>`_ to generate + information only when requested. + This has multiple benefits, including: + + * thread safety; + * minimising the memory footprint of the recipe. + .. topic:: Be reproducible Arbor is designed to give reproduceable results when the same model is run on a @@ -33,7 +80,9 @@ General Best Practices This only holds when a recipe provides a reproducible model description, which can be a challenge when a description uses random numbers, e.g. to pick incoming connections to a cell from a random subset of a cell population. - To get a reproduceable model, use the cell global identifyer `gid` to seed random number generators. + To get a reproduceable model, use the cell `gid` (or a hash based on the `gid`) + to seed random number generators, including those for :cpp:type:`event_generator` s. + Mechanisms ---------------------- @@ -45,4 +94,4 @@ subset of NEURONs mechanism specification language NMODL. Examples Common examples are the *passive/ leaky integrate-and-fire* model, the *Hodgkin-Huxley* mechanism, the *(double-) exponential synapse* model, or the *Natrium current* model for an axon. -The detailed documentations and specific best practices for C++ recipes can be found in :ref:`cpprecipe` and in :ref:`pyrecipe` covering python recipes. +Detailed documentation and best practices for C++ recipes can be found in :ref:`cpprecipe`. diff --git a/doc/model_simulation.rst b/doc/model_simulation.rst index 159b87c4fac0f280956990222d98874e8291d413..429a59017806ba640b7d1dc4f0fd9e9a85444a8b 100644 --- a/doc/model_simulation.rst +++ b/doc/model_simulation.rst @@ -25,5 +25,4 @@ Simulations provide an interface for executing and interacting with the model: * The model state can be *reset* to its initial state before the simulation was started. * *Sampling* of the simulation state can be performed during execution with samplers and probes (e.g. compartment voltage and current) and spike output with the total number of spikes generated since either construction or reset. - -Detailed documentation can be found in C++ API :ref:`cppsimulation` and :ref:`pysimulation` for Arbor's python frontend. +See :ref:`cppsimulation` for documentation can be found in C++ simulation API. diff --git a/doc/py_common.rst b/doc/py_common.rst deleted file mode 100644 index 676d9d7f9f04cb0b1d834ccda9c055a036c7a2e5..0000000000000000000000000000000000000000 --- a/doc/py_common.rst +++ /dev/null @@ -1,91 +0,0 @@ -.. _pycommon: - -Common Types -===================== - -Cell Identifiers and Indexes ----------------------------- -The types defined below are used as identifiers for cells and members of cell-local collections. - -.. module:: arbor - -.. class:: cell_member - - .. function:: cell_member() - - Construct a cell member with default values :attr:`gid = 0` and :attr:`index = 0`. - - .. function:: cell_member(gid, index) - - Construct a cell member with parameters :attr:`gid` and :attr:`index` for global identification of an item of a cell-local item. - - Items of type :class:`cell_member` must: - - * be associated with a unique cell, identified by the member :attr:`gid`; - * identify an item within a cell-local collection by the member :attr:`index`. - - An example is uniquely identifying a synapse in the model. - Each synapse has a post-synaptic cell (with :attr:`gid`), and an :attr:`index` into the set of synapses on the post-synaptic cell. - - Lexographically ordered by :attr:`gid`, then :attr:`index`. - - .. attribute:: gid - - The global identifier of the cell. - - .. attribute:: index - - The cell-local index of the item. - Local indices for items within a particular cell-local collection should be zero-based and numbered contiguously. - - An example of a cell member construction reads as follows: - - .. container:: example-code - - .. code-block:: python - - import arbor - - # construct - cmem1 = arbor.cell_member() - cmem2 = arbor.cell_member(0, 0) - - # set gid and index - cmem1.gid = 1 - cmem1.index = 1 - - -.. class:: cell_kind - - Identify the cell type/ kind used by the model to group equal kinds in the same cell group (enumerator). - - .. attribute:: cable1d - - A cell with morphology described by branching 1D cable segments. - - .. attribute:: lif - - A leaky-integrate and fire neuron. - - .. attribute:: spike_source - - A cell that generates spikes at a user-supplied sequence of time points. - - .. attribute:: benchmark - - A proxy cell used for benchmarking. - -An example of a cell construction of :class:`cell_kind.cable1d` reads as follows: - - .. container:: example-code - - .. code-block:: python - - import arbor - - kind = arbor.cell_kind.cable1d - -Probes ------- - -Yet to be implemented. diff --git a/doc/py_domdec.rst b/doc/py_domdec.rst deleted file mode 100644 index d1bc518c49bbf9d82f996c51656e4799ffc68c28..0000000000000000000000000000000000000000 --- a/doc/py_domdec.rst +++ /dev/null @@ -1,282 +0,0 @@ -.. _pydomdec: - -Domain Decomposition -==================== - -Decomposition -------------- -As defined in :ref:`modeldomdec` a domain decomposition is a description of the distribution of the model over the available computational resources. -Therefore, the following data structures are used to describe domain decompositions. - -.. currentmodule:: arbor - -.. class:: backend_kind - - Indicate which hardware backend to use for running a :class:`cell_group` (enumeration). - - .. attribute:: multicore - - Use the multicore backend. - - .. attribute:: gpu - - Use the GPU back end. - - .. Note:: - Setting the GPU back end is only meaningful if the - :class:`cell_group` type supports the GPU backend. - -.. class:: domain_decomposition - - Describe a domain decomposition. The class is soley responsible for describing the - distribution of cells across cell groups and domains. - It holds cell group descriptions (:attr:`groups`) for cells assigned to - the local domain, and a helper function (:func:`gid_domain`) used to - look up which domain a cell has been assigned to. - The :class:`domain_decomposition` object also has meta-data about the - number of cells in the global model, and the number of domains over which - the model is destributed. - - .. Note:: - The domain decomposition represents a division of **all** of the cells in - the model into non-overlapping sets, with one set of cells assigned to - each domain. - A domain decomposition is generated either by a load balancer or is - directly specified by the user, and it is a requirement that the - decomposition is correct: - - * Every cell in the model appears once in one and only one cell :attr:`groups` on one and only one local :class:`domain_decomposition` object. - * :attr:`num_local_cells` is the sum of the number of cells in each of the :attr:`groups`. - * The sum of :attr:`num_local_cells` over all domains matches :attr:`num_global_cells`. - - .. function:: gid_domain(gid) - - Query the domain id that a cell is assigned to (using global identifier :attr:`arbor.cell_member.gid`). - - .. attribute:: num_domains - - The number of domains that the model is distributed over. - - .. attribute:: domain_id - - The index of the local domain. - Always 0 for non-distributed models, and corresponds to the MPI rank - for distributed runs. - - .. attribute:: num_local_cells - - The total number of cells in the local domain. - - .. attribute:: num_global_cells - - The total number of cells in the global model - (sum of :attr:`num_local_cells` over all domains). - - .. attribute:: groups - - The description of the cell groups on the local domain. - See :class:`group_description`. - -.. class:: group_description - - Return the indexes of a set of cells of the same kind that are grouped together in a cell group in an :class:`arbor.simulation`. - - .. function:: group_description(kind, gids, backend) - - Construct a group description with parameters :attr:`kind`, :attr:`gids` and :attr:`backend`. - - .. attribute:: kind - - The kind of cell in the group. - - .. attribute:: gids - - The (list of) gids of the cells in the cell group, **sorted in ascending order**. - - .. attribute:: backend - - The back end on which the cell group is to run. - - -Load Balancers --------------- - -Load balancing generates a :class:`domain_decomposition` given an :class:`arbor.recipe` -and a description of the hardware on which the model will run. Currently Arbor provides -one load balancer, :func:`partition_load_balance`, and more will be added over time. - -If the model is distributed with MPI, the partitioning algorithm for cells is -distributed with MPI communication. The returned :class:`domain_decomposition` -describes the cell groups on the local MPI rank. - -.. Note:: - The :class:`domain_decomposition` type is simple and - independent of any load balancing algorithm, so users can supply their - own domain decomposition without using one of the built-in load balancers. - This is useful for cases where the provided load balancers are inadequate, - and when the user has specific insight into running their model on the - target computer. - -.. function:: partition_load_balance(recipe, context) - - Construct a :class:`domain_decomposition` that distributes the cells - in the model described by an :class:`arbor.recipe` over the distributed and local hardware - resources described by a :class:`context`. - - The algorithm counts the number of each cell type in the global model, then - partitions the cells of each type equally over the available nodes. - If a GPU is available, and if the cell type can be run on the GPU, the - cells on each node are put into one large group to maximise the amount of fine - grained parallelism in the cell group. - Otherwise, cells are grouped into small groups that fit in cache, and can be - distributed over the available cores. - - .. Note:: - The partitioning assumes that all cells of the same kind have equal - computational cost, hence it may not produce a balanced partition for - models with cells that have a large variance in computational costs. - -Hardware --------- - -.. class:: proc_allocation - - Enumerate the computational resources to be used for a simulation, typically a - subset of the resources available on a physical hardware node. - - .. container:: example-code - - .. code-block:: python - - # Default construction uses all detected cores/threads, and the first GPU, if available. - import arbor - alloc = arbor.proc_allocation() - - # Remove any GPU from the resource description. - alloc.gpu_id = -1 - - - .. function:: proc_allocation() - - Construct an allocation by setting the number of threads to the number available locally for execution, and - chooses either the first available GPU, or no GPU if none are available. - - .. function:: proc_allocation(threads, gpu_id) - - Construct an allocation by setting the number of threads to :attr:`threads` and selecting the GPU with :attr:`gpu_id`. - - .. attribute:: threads - - The number of CPU threads available locally for execution. - - .. attribute:: gpu_id - - The identifier of the GPU to use. - The :attr:`gpu_id` corresponds to the ``int device`` parameter used by CUDA API calls - to identify gpu devices. - Set to -1 to indicate that no GPU device is to be used. - See ``cudaSetDevice`` and ``cudaDeviceGetAttribute`` provided by the - `CUDA API <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html>`_. - - .. cpp:function:: has_gpu() - - Query (with True/ False) whether a GPU is selected (i.e. whether :attr:`gpu_id` is ``-1``). - -Execution Context ------------------ - -The :class:`proc_allocation` class enumerates the hardware resources on the local hardware -to use for a simulation. - -.. class:: context - - A :class:`context` is a handle for the interfaces to the hardware resources used in a simulation. - It contains the local thread pool, and optionally the GPU state - and MPI communicator, if available. Users of the library do not directly use the functionality - provided by :class:`context`, instead they configure contexts, which are passed to - Arbor methods and types. - - .. function:: context() - - Construct the (default) local context that uses all detected threads and a GPU if any are available. - - .. function:: context(proc_allocation) - - Construct a local context that uses the local resources described by :class:`proc_allocation`. - - .. function:: context(proc_allocation, mpi_comm) - - Construct a context that uses the local resources described by :class:`proc_allocation`, and - uses an MPI communicator (see e.g. :class:`arbor.mpi_comm` to be documented) for distributed calculation. - - .. function:: context(threads, gpu) - - Construct a context that uses a set number of :attr:`threads` and gpu id :attr:`gpu`. - - .. attribute:: threads - - The number of threads available locally for execution (default: 1). - - .. attribute:: gpu - - The index of the GPU to use (default: none for no GPU). - - .. function:: context(threads, gpu, mpi) - - Construct a context that uses a set number of :attr:`threads` and gpu id :attr:`gpu` and MPI communicator :attr:`mpi`. - - .. attribute:: threads - - The number of threads available locally for execution (default: 1). - - .. attribute:: gpu - - The index of the GPU to use (default: none for no GPU). - - .. attribute:: mpi - - An MPI communicator (see e.g. :class:`arbor.mpi_comm` to be documented, default: none for no MPI). - - .. attribute:: has_mpi - - Query whether the context uses MPI for distributed communication. - - .. attribute:: has_gpu - - Query whether the context has a GPU. - - .. attribute:: threads - - The number of threads available locally for execution. - - .. attribute:: ranks - - The number of distributed domains (equivalent to the number of MPI ranks). - - .. attribute:: rank - - The numeric id of the local domain (equivalent to MPI rank). - -Here are some examples of how to create a :class:`context`: - - .. container:: example-code - - .. code-block:: python - - import arbor - - # Construct a non-distributed context that uses all detected available resources. - context = arbor.context() - - # Construct a context that: - # * does not use a GPU, reguardless of whether one is available; - # * uses 8 threads in its thread pool. - alloc = arbor.proc_allocation(8, -1) - context = arbor.context(alloc) - - # Construct a context that: - # * uses all available local hardware resources; - # * uses the standard MPI communicator MPI_COMM_WORLD for distributed computation. - alloc = arbor.proc_allocation() # defaults to all detected local resources - comm = arbor.mpi_comm() - context = arbor.context(alloc, comm); diff --git a/doc/py_overview.rst b/doc/py_overview.rst deleted file mode 100644 index aed5321f358c276c5515fc040b6e98584875893a..0000000000000000000000000000000000000000 --- a/doc/py_overview.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. _pyoverview: - -Overview -========= -This section gives insights to the usage of Arbor's python front end :py:mod:`arbor` with examples and detailed descriptions of features. -The python front end is the main interface through which Arbor is used. - -.. _prerequisites: - -Prerequisites -~~~~~~~~~~~~~ - -Once Arbor is built in the folder ``path/to/arbor/build`` (and/ or installed to ``path/to/arbor/install``, see the :ref:`installarbor` documentation) python needs to be set up by setting - -.. code-block:: bash - - export PYTHONPATH="path/to/arbor/build/lib:$PYTHONPATH" - -or, in case of installation - -.. code-block:: bash - - export PYTHONPATH="path/to/arbor/install/lib/python3/site-packages:$PYTHONPATH" - -With this setup, Arbor's python module :py:mod:`arbor` can be imported with python3 via - - >>> import arbor - -.. _simsteps: - -Simulation steps -~~~~~~~~~~~~~~~~ - -Then, according to the :ref:`modelsimulation` description Arbor's python module :py:mod:`arbor` can be utilized to - -* first, **describe** the neuron model by defining a recipe; -* then, get the local **resources**, the **execution context**, and partition the **load balance**; -* finally, **execute** the model by initiating and running the simulation. - -In order to visualise the result a **spike recorder** can be used and to analyse Arbor's performance a **meter manager** is available. - -These steps are described and examples are given in the next subsections :ref:`pycommon`, :ref:`pyrecipe`, :ref:`pydomdec` and :ref:`pysimulation`. - -.. note:: - - Detailed information on Arbor's python features can be obtained with the ``help`` function, e.g. - - >>> help(arbor.recipe) diff --git a/doc/py_profiler.rst b/doc/py_profiler.rst deleted file mode 100644 index 2110ccb198b970ca1d181e01617600758edcf12b..0000000000000000000000000000000000000000 --- a/doc/py_profiler.rst +++ /dev/null @@ -1,91 +0,0 @@ -Python Profiler -=============== - -Arbor's python module :py:mod:`arbor` has a profiler for fine-grained timings and memory consumptions of regions of interest in the code. - -Instrumenting Code ------------------- - -Developers manually instrument the regions to profile. -This allows the developer to only profile the parts of the code that are of interest, and choose the appropriate granularity for profiling different regions. - -Once a region of code is marked for the profiler, the application will track the total time spent in the region, and how much memory (and if available energy) is consumed. - -Marking Regions -~~~~~~~~~~~~~~~ - -For measuring time, memory (and energy) consumption Arbor's meter manager in python can be used. -First the meter manager needs to be initiated, then the metering started and checkpoints set, wherever the manager should report the meters. -The measurement starts from the start to the first checkpoint and then in between checkpoints. -Checkpoints are defined by a string describing the process to be measured. - -Running the Profiler -~~~~~~~~~~~~~~~~~~~~~ - -The profiler does not need to be started or stopped by the user. -It needs to be initialized before entering any profiling region. -It is initialized using the information provided by the execution context. -At any point a summary of profiler region times and consumptions can be obtained. - -For example, the following will record and summarize the total time and memory spent: - -.. container:: example-code - - .. code-block:: python - - import arbor - - context = arbor.context() - meter_manager = arbor.meter_manager() - meter_manager.start(context) - - n_cells = 100 - recipe = my_recipe(n_cells) - - meter_manager.checkpoint('recipe create', context) - - decomp = arbor.partition_load_balance(recipe, context) - - meter_manager.checkpoint('load balance', context) - - sim = arbor.simulation(recipe, decomp, context) - - meter_manager.checkpoint('simulation init', context) - - tSim = 2000 - dt = 0.025 - sim.run(tSim, dt) - - meter_manager.checkpoint('simulation run', context) - - print(arbor.make_meter_report(meter_manager, context)) - -Profiler Output ------------------- - -The ``meter_report`` holds a summary of the accumulated recorders. -Calling ``make_meter_report`` will generate a profile summary, which can be printed using ``print``. - -Take the example output above: - ->>> ---- meters ------------------------------------------------------------------------------- ->>> meter time(s) memory(MB) ->>> ------------------------------------------------------------------------------------------- ->>> recipe create 0.000 0.001 ->>> load balance 0.000 0.009 ->>> simulation init 0.005 0.707 ->>> simulation run 3.357 0.028 - -For each region there are up to three values reported: - -.. table:: - :widths: 20,50 - - ============= ========================================================================= - Value Definition - ============= ========================================================================= - time (s) The total accumulated time (in seconds) spent in the region. - memory (MB) The total memory consumption (in mega bytes) in the region. - energy (kJ) The total energy consumption (in kilo joule) in the region (if available). - ============= ========================================================================= - diff --git a/doc/py_recipe.rst b/doc/py_recipe.rst deleted file mode 100644 index 0ed843c5a1af042b53a214b4f12bd262bde42bce..0000000000000000000000000000000000000000 --- a/doc/py_recipe.rst +++ /dev/null @@ -1,232 +0,0 @@ -.. _pyrecipe: - -Recipes -================= - -A recipe describes neuron models in a cell-oriented manner and supplies methods to provide cell information. Details on why Arbor uses recipes and general best practices can be found in :ref:`modelrecipe`. - -.. currentmodule:: arbor - -.. class:: recipe - - Describe a model by describing the cells and network, without any information about how the model is to be represented or executed. - - All recipes derive from this abstract base class. - - Recipes provide a cell-centric interface for describing a model. This means that - model properties, such as connections, are queried using the global identifier - (:attr:`arbor.cell_member.gid`) of a cell. In the description below, the term :attr:`gid` is used as shorthand - for the cell with global identifier. - - **Required Member Functions** - - The following member functions (besides a constructor) must be implemented by every recipe: - - .. function:: num_cells() - - The number of cells in the model. - - .. function:: cell_kind( gid ) - - The cell kind of the cell with global identifier :attr:`gid` (return type: :class:`arbor.cell_kind`). - - .. function:: cell_description( gid ) - - A high level decription of the cell with global identifier :attr:`gid`, - for example the morphology, synapses and ion channels required to build a multi-compartment neuron. - The type used to describe a cell depends on the kind of the cell. - The interface for querying the kind and description of a cell are seperate - to allow the cell type to be provided without building a full cell description, - which can be very expensive. - - **Optional Member Functions** - - .. function:: num_sources( gid ) - - The number of spike sources on :attr:`gid`. - - .. function:: num_targets( gid ) - - The number of event targets on :attr:`gid` (e.g. synapses). - - .. function:: connections_on( gid ) - - A list of all the incoming connections for :attr:`gid`. - Each connection should have post-synaptic target :attr:`connection.destination` that matches the argument :attr:`gid`, and a valid synapse id :attr:`arbor.cell_member.index` on :attr:`gid`. - See :class:`connection`. - - By default returns an empty list. - - .. function:: event_generator(index, weight, schedule) - - A list of all the event generators that are attached to the :attr:`gid` with cell-local :attr:`index`, weight and schedule (:class:`regular_schedule`, :class:`explicit_schedule` or :class:`poisson_schedule`). - - By default returns an empty list. - - -.. class:: connection - - Describe a connection between two cells: a pre-synaptic source and a post-synaptic destination. - The source is typically a threshold detector on a cell or a spike source. - The destination is a synapse on the post-synaptic cell. - - .. function:: connection(source, destination, weight, delay) - - Construct a connection between the :attr:`source` and the :attr:`destination` with a :attr:`weight` and time :attr:`delay`. - - .. attribute:: source - - The source of the connection (type: :class:`arbor.cell_member`). - - .. attribute:: destination - - The destination of the connection (type: :class:`arbor.cell_member`). - - .. attribute:: weight - - The weight of the connection (Sâ‹…cmâ»Â²). - - .. attribute:: delay - - The delay time of the connection (ms). - -.. class:: regular_schedule - - .. function:: regular_schedule() - - Construct a default regular schedule with empty time range and zero time step size. - - .. function:: regular_schedule(tstart, tstop, dt) - - Construct a regular schedule as list of times from :attr:`tstart` to :attr:`tstop` in :attr:`dt` time steps. - - .. attribute:: tstart - - The start time (ms). - - .. attribute:: tstop - - The end time (ms). - - .. attribute:: dt - - The time step size (ms). - - -.. class:: explicit_schedule - - .. function:: explicit_schedule() - - Construct a default explicit schedule with an empty list. - - .. attribute:: times - - Set the list of times in the schedule (ms). - -.. class:: poisson_schedule - - To be implemented. - - -Cells ------- -A multicompartmental cell in Arbor's python front end can be created by making a soma and adding synapses at specific segment locations. - -.. class:: make_soma_cell - - Construct a single compartment cell with properties: - - - diameter 18.8 µm; - - Hodgkin-Huxley (HH) mechanisms (with default parameters as described below); - - bulk resistivitiy 100 Ω·cm (default); - - capacitance 0.01 Fâ‹…mâ»Â² (default). - - The default parameters of HH mechanisms are: - - - Na-conductance 0.12 Sâ‹…mâ»Â², - - K-conductance 0.036 Sâ‹…mâ»Â², - - passive conductance 0.0003 Sâ‹…mâ»Â² and - - passive potential -54.3 mV - - .. class:: segment_location( segment, position ) - - Set the location to a cell-local segment and a position. - - .. attribute:: segment - - The segment as cell-local index. - - .. attribute:: position - - The position between 0 and 1. - -.. class:: mccell - - .. function:: add_synapse( location ) - - Add an exponential synapse at segment location. - - .. function:: add_stimulus( location, t0, duration, weight ) - - Add a stimulus to the cell at a specific location, start time t0 (ms), duration (ms) with weight (nA). - - .. function:: add_detector( location, threshold ) - - Add a detector to the cell at a specific location and threshold (mV). - - -An example of a recipe construction of a ring network of multicompartmental cells reads as follows: - -.. container:: example-code - - .. code-block:: python - - import arbor - - # A recipe, that describes the cells and network of a model, can be defined - # in python by implementing the arbor.recipe interface. - - class ring_recipe(arbor.recipe): - - def __init__(self, n=4): - # The base C++ class constructor must be called first, to ensure that - # all memory in the C++ class is initialized correctly. - arbor.recipe.__init__(self) - self.ncells = n - - # The num_cells method that returns the total number of cells in the model - # must be implemented. - def num_cells(self): - return self.ncells - - # The cell_description method returns a cell - def cell_description(self, gid): - # Make a soma cell - cell = arbor.make_soma_cell() - - # Add synapse at segment 0 at location 0.5 - loc = arbor.segment_location(0, 0.5) - cell.add_synapse(loc) - - # Add stimulus to first cell with gid 0 at t0 = 0 ms for duration of 20 ms with weight 0.01 nA - if gid==0: - cell.add_stimulus(loc, 0, 20, 0.01) - return cell - - def num_targets(self, gid): - return 1 - - def num_sources(self, gid): - return 1 - - # The kind method returns the type of cell with gid. - # Note: this must agree with the type returned by cell_description. - def kind(self, gid): - return arbor.cell_kind.cable1d - - # Make a ring network - def connections_on(self, gid): - # Define the source of cell with gid as the previous cell with gid-1 - # caution: close the ring at gid 0 - src = self.num_cells()-1 if gid==0 else gid-1 - return [arbor.connection(arbor.cell_member(src,0), arbor.cell_member(gid,0), 0.1, 10)] diff --git a/doc/py_simulation.rst b/doc/py_simulation.rst deleted file mode 100644 index 93c32d02149dc49968b77d7fdb9b3984984c4c89..0000000000000000000000000000000000000000 --- a/doc/py_simulation.rst +++ /dev/null @@ -1,214 +0,0 @@ -.. _pysimulation: - -Simulations -=========== - -A simulation is the executable form of a model. - -From recipe to simulation -------------------------- - -To build a simulation the following concepts are needed: - - * an :class:`arbor.recipe` that describes the cells and connections in the model; - * an :class:`arbor.context` used to execute the simulation. - -The workflow to build a simulation is to first generate an -:class:`arbor.domain_decomposition` based on the :class:`arbor.recipe` and :class:`arbor.context` describing the distribution of the model -over the local and distributed hardware resources (see :ref:`pydomdec`). Then, the simulation is build using the :class:`arbor.domain_decomposition`. - -.. container:: example-code - - .. code-block:: python - - import arbor - - # Get hardware resources, create a context - resources = arbor.proc_allocation() - context = arbor.context(resources) - - # Initialise a recipe of user defined type my_recipe with 100 cells. - n_cells = 100 - recipe = my_recipe(n_cells) - - # Get a description of the partition the model over the cores - # (and gpu if available) on node. - decomp = arbor.partition_load_balance(recipe, context) - - # Instatitate the simulation. - sim = arbor.simulation(recipe, decomp, context) - - # Run the simulation for 2000 ms with time stepping of 0.025 ms - tSim = 2000 - dt = 0.025 - sim.run(tSim, dt) - -.. currentmodule:: arbor - -.. class:: simulation - - A simulation is constructed from a recipe, and then used to update and monitor the model state. - - Simulations take the following inputs: - - * an :class:`arbor.recipe` that describes the model; - * an :class:`arbor.domain_decomposition` that describes how the cells in the model are assigned to hardware resources; - * an :class:`arbor.context` which is used to execute the simulation. - - Simulations provide an interface for executing and interacting with the model: - - * **Advance the model state** from one time to another and reset the model state to its original state before simulation was started. - * Sample the simulation state during the execution (e.g. compartment voltage and current) and generate spike output by using an **I/O interface**. - - **Constructor:** - - .. function:: simulation(recipe, dom_dec, context) - - Initialize the model described by a :attr:`recipe`, with cells and network distributed according to :attr:`dom_dec`, and computation resources described by :attr:`context`. - - .. attribute:: recipe - - An :class:`arbor.recipe`. - - .. attribute:: dom_dec - - An :class:`arbor.domain_decomposition`. - - .. attribute:: context - - An :class:`arbor.context`. - - **Updating Model State:** - - .. function:: reset() - - Reset the state of the simulation to its initial state to rerun the simulation. - - .. function:: run(tfinal, dt) - - Run the simulation from current simulation time to :attr:`tfinal`, - with maximum time step size :attr:`dt`. - - .. attribute:: tfinal - - The final simulation time (ms). - - .. attribute:: dt - - The time step size (ms). - -Recording spikes ----------------- -In order to analyze the simulation output spikes can be recorded. - -**Types**: - -.. class:: spike - - .. function:: spike() - - Construct a spike with default :attr:`arbor.cell_member.gid = 0` and :attr:`arbor.cell_member.index = 0`. - - .. attribute:: source - - The spike source (of type: :class:`arbor.cell_member` with :attr:`arbor.cell_member.gid` and :attr:`arbor.cell_member.index`). - - .. attribute:: time - - The spike time (ms, default: -1 ms). - -.. class:: sprec - - .. function:: sprec() - - Initialize the spike recorder. - - .. attribute:: spikes - - The recorded spikes (of type: :class:`spike`). - -**I/O interface**: - -.. function:: make_spike_recorder(simulation) - - Record all spikes generated over all domains during a simulation (of type: :class:`sprec`) - -.. container:: example-code - - .. code-block:: python - - import arbor - - # Instatitate the simulation. - sim = arbor.simulation(recipe, decomp, context) - - # Build the spike recorder - recorder = arbor.make_spike_recorder(sim) - - # Run the simulation for 2000 ms with time stepping of 0.025 ms - tSim = 2000 - dt = 0.025 - sim.run(tSim, dt) - - # Get the recorder`s spikes - spikes = recorder.spikes - - # Print the spikes and according spike time - for i in range(len(spikes)): - spike = spikes[i] - print(' cell %2d at %8.3f ms'%(spike.source.gid, spike.time)) - ->>> SPIKES: ->>> cell 0 at 5.375 ms ->>> cell 1 at 15.700 ms ->>> cell 2 at 26.025 ms ->>> cell 3 at 36.350 ms ->>> cell 4 at 46.675 ms ->>> cell 5 at 57.000 ms ->>> cell 6 at 67.325 ms ->>> cell 7 at 77.650 ms ->>> cell 8 at 87.975 ms ->>> cell 9 at 98.300 ms - -The recorded spikes of the neurons with :attr:`gid` can then for instance be visualized in a raster plot over the spike time. - -.. container:: example-code - - .. code-block:: python - - import numpy as np - import math - import matplotlib.pyplot as plt - - # Use a raster plot to visualize spiking activity. - tVec = np.arange(0,tSim,dt) - SpikeMat_rows = n_cells # number of cells - SpikeMat_cols = math.floor(tSim/dt) - SpikeMat = np.zeros((SpikeMat_rows, SpikeMat_cols)) - - # save spike trains in matrix: - # (if spike in cell n at time step k, then SpikeMat[n,k]=1, else 0) - for i in range(len(spikes)): - spike = spikes[i] - tCur = math.floor(spike.time/dt) - SpikeMat[spike.source.gid][tCur] = 1 - - for i in range(SpikeMat_rows): - for j in range(SpikeMat_cols): - if(SpikeMat[i,j] == 1): - x1 = [i,i+0.5] - x2 = [j,j] - plt.plot(x2,x1,color = 'black') - - plt.title('Spike raster plot') - plt.xlabel('Spike time (ms)') - tick = range(0,SpikeMat_cols+10000,10000) - label = range(0,tSim+250,250) - plt.xticks(tick, label) - plt.ylabel('Neuron (gid)') - plt.show() - - -.. figure:: Rasterplot - - Exemplary spike raster plot. diff --git a/doc/py_unittest.rst b/doc/py_unittest.rst deleted file mode 100644 index 5cbc79fa965ad46e22cf95a72413e10c87b7a72e..0000000000000000000000000000000000000000 --- a/doc/py_unittest.rst +++ /dev/null @@ -1,209 +0,0 @@ -.. _pyunittest: - -Python Unit Testing -==================== - -In order to test individual units of Arbor's python module :py:mod:`arbor` during development with `pybind11 <https://pybind11.readthedocs.io/en/stable/intro.html>`_, python's `unittest <https://docs.python.org/3/library/unittest.html>`_ is deployed. - -Directory Structure -------------------- - -The predefined directory structure in the ``python/test`` folder - -* ``test\`` - * ``options.py`` - * ``unit\`` - * ``runner.py`` - * ``unit-distributed\`` - * ``runner.py`` - -is the entry point to file and run the unit tests. - -In ``python/test`` an ``options.py`` file is located to handle (global) command line options such as ``-v{0,1,2}`` to increase the verbosity. -Further, in ``python/test/unit`` all serial unit tests ``test_xxxs.py`` are stored defining unittest classes with test methods and its own test suite, whereas in ``python/test/unit_distributed`` all distributed/ parallel unit tests are located. - -In each subfolder ``python/test/unit`` and ``python/test/unit_distributed`` a ``runner.py`` execution file/ module is defined to run all tests included in the test suite of the respective subfolder. - -Testing --------- - -The unit tests are started in the respective subfolder ``python/test/unit`` for serial tests and ``python/test/unit_distributed`` for tests related to a distributed execution. - -In the respective folder the module ``runner`` is used to start the test. - - -.. container:: example-code - - .. code-block:: bash - - $ python -m runner - - ... - ---------------------------------------------------------------------- - Ran 3 tests in 0.001s - - OK - -The in ``options.py`` defined command line option ``-v{0,1,2}`` in- and decreases the verbosity, e.g. - -.. container:: example-code - - .. code-block:: bash - - $ python -m runner -v2 - - test_context (test.unit.test_contexts.Contexts) ... ok - test_default (test.unit.test_contexts.Contexts) ... ok - test_resources (test.unit.test_contexts.Contexts) ... ok - - ---------------------------------------------------------------------- - Ran 3 tests in 0.001s - - OK - -To run a specific test in the subfolder, the test module ``test_xxxs`` needs to be executed, e.g. - -.. container:: example-code - - .. code-block:: bash - - python -m test_contexts - -From any folder other than the respective test folder, the python file needs to be executed, e.g. - -.. container:: example-code - - .. code-block:: bash - - python path/to/test_xxxs.py - -Adding New Tests ------------------ -During development of Arbor's python front end (via wrapper functions using `pybind11 <https://pybind11.readthedocs.io/en/stable/intro.html>`_) new unit tests are constantly added. -Thereby, three basic steps are performed: - -1) Create a ``test_xxxs.py`` file in the according subfolder ``python/test/unit`` (serial cases) or ``python/test/unit_distributed`` (parallel cases). - -2) In this file ``test_xxxs.py`` - - a) import all necessary modules, e.g. ``import unittest``, ``import arbor``, ``import options``; - b) define a unit test ``class Xxxs(unittest.TestCase)`` with test methods ``test_yyy`` using ``assert`` functions; - c) add a ``suite()`` function consisting of all desired tests (either as tuple or all starting with `test`) and returning a unittest suite, e.g. ``unittest.makeSuite(Xxxs, ('test'))``; - d) add a ``run()`` function with a ``runner = unittest.TextTestRunner()`` that runs the suite via ``runner.run(suite())``; - e) finally, in ``if __name__ == "__main__":`` call ``run()``. - -3) In the ``runner.py`` file - - a) ``import test_xxxs`` (and ``from test.subfolder import test_xxxs``); - b) add the new test module ``test_xxxs`` to the ``test_modules`` list. - -**Naming Convention** - - * Modules: ``test_xxxs.py`` all lower case, ending with ``s`` since module can consist of multiple classes; - * Class(es): ``Xxxs`` first letter upper case, ending with ``s`` since class can consist of multiple test methods; - * Methods: ``test_yyy`` all lower case, always starting with ``test`` since suite is build from all methods starting with ``test``. - -.. container:: example-code - - .. code-block:: python - - # test_contexts.py - - import unittest - - import arbor - - # to be able to run .py file from child directory - import sys, os - sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) - - try: - import options - except ModuleNotFoundError: - from test import options - - class Contexts(unittest.TestCase): - - def test_context(self): - alloc = arbor.proc_allocation() - - ctx1 = arbor.context() - - self.assertEqual(ctx1.threads, alloc.threads) - self.assertEqual(ctx1.has_gpu, alloc.has_gpu) - - # default construction does not use GPU or MPI - self.assertEqual(ctx1.threads, 1) - self.assertFalse(ctx1.has_gpu) - self.assertFalse(ctx1.has_mpi) - self.assertEqual(ctx1.ranks, 1) - self.assertEqual(ctx1.rank, 0) - - # change allocation - alloc.threads = 23 - self.assertEqual(alloc.threads, 23) - alloc.gpu_id = -1 - self.assertEqual(alloc.gpu_id, -1) - - # test context construction with proc_allocation() - ctx2 = arbor.context(alloc) - self.assertEqual(ctx2.threads, alloc.threads) - self.assertEqual(ctx2.has_gpu, alloc.has_gpu) - self.assertEqual(ctx2.ranks, 1) - self.assertEqual(ctx2.rank, 0) - - - def suite(): - # specify class and test functions in tuple (here: all tests starting with 'test' from class Contexts - suite = unittest.makeSuite(Contexts, ('test')) - return suite - - def run(): - v = options.parse_arguments().verbosity - runner = unittest.TextTestRunner(verbosity = v) - runner.run(suite()) - - if __name__ == "__main__": - run() - -.. container:: example-code - - .. code-block:: python - - # runner.py - - import unittest - - # to be able to run .py file from child directory - import sys, os - sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) - - try: - import options - import test_contexts - # add more if needed - except ModuleNotFoundError: - from test import options - from test.unit import test_contexts - # add more if needed - - test_modules = [\ - test_contexts\ - ] # add more if needed - - def suite(): - loader = unittest.TestLoader() - - suites = [] - for test_module in test_modules: - test_module_suite = test_module.suite() - suites.append(test_module_suite) - - suite = unittest.TestSuite(suites) - - return suite - - if __name__ == "__main__": - v = options.parse_arguments().verbosity - runner = unittest.TextTestRunner(verbosity = v) - runner.run(suite())