diff --git a/.travis.yml b/.travis.yml index 3cb473553e0b9b7a5cc3fb34c7863144e2cc763d..3a6abccecca892af460b2023250c504e4e79c343 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ env: - BUILD_NAME=tbb WITH_THREAD=tbb WITH_DISTRIBUTED=serial # test mpi - BUILD_NAME=mpi WITH_THREAD=cthread WITH_DISTRIBUTED=mpi - - BUILD_NAME=mpitbb WITH_THREAD=tbb WITH_DISTRIBUTED=mpi + - BUILD_NAME=mpitbb WITH_THREAD=tbb WITH_DISTRIBUTED=mpi before_install: - CC=gcc-5 diff --git a/CMakeLists.txt b/CMakeLists.txt index 0566fffa4bca13258376d3bec36b3a0763fd1755..3c05fdcf4a7927ca221050222ab25596631146db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,8 +11,13 @@ if ("${CMAKE_VERSION}" MATCHES "^3.[0-9].") cmake_policy(SET CMP0023 OLD) endif() -# save incoming CXX flags for forwarding to modcc external project -set(SAVED_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +# Set release as the default build type. +# Otherwise, CMake will default to debug. +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE release CACHE STRING "Choose the type of build." FORCE) + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "debug" "release") +endif() # compilation flags set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") @@ -205,7 +210,7 @@ endif() #---------------------------------------------------------- # vectorization target #---------------------------------------------------------- -set(ARB_VECTORIZE_TARGET "none" CACHE STRING "CPU target for vectorization {KNL,AVX2,AVX512}") +set(ARB_VECTORIZE_TARGET "none" CACHE STRING "CPU target for vectorization {none,KNL,AVX2,AVX512}") set_property(CACHE ARB_VECTORIZE_TARGET PROPERTY STRINGS none KNL AVX2 AVX512) # Note: this option conflates modcc code generation options and diff --git a/README.md b/README.md index 728641df9e46a5cf3e5c19e1f113524bc6f284ca..1b6fc6ee4635292180bb38ae2dd8daba87030fb5 100644 --- a/README.md +++ b/README.md @@ -1,224 +1,9 @@ -# Arbor Library [](https://travis-ci.org/eth-cscs/arbor) +# Arbor Library -This is the repository for the Arbor library. Unfortunately we do not have thorough documentation of how-to guides. -Below are some guides for how to build the project and run the miniapp. -Contact us or submit a ticket if you have any questions or want help. -https://github.com/eth-cscs/arbor +[](https://travis-ci.org/eth-cscs/arbor) -1. Basic installation -2. MPI -3. TBB -4. TBB on Cray systems -5. Targeting KNL -6. Examples of environment configuration - - Julia - -## Basic installation -```bash -# clone repository -git clone git@github.com:eth-cscs/arbor.git -cd arbor/ +Arbor is a library for implementing performance portable network simulations of multi-compartment neuron models. -# setup environment -# on a desktop system this is probably not required -# on a cluster this is usually required to make sure that an appropriate -# compiler is chosen. -module load gcc -module load cmake -export CC=`which gcc` -export CXX=`which g++` - -# build main project (out-of-tree) -mkdir build -cd build -cmake <path to CMakeLists.txt> -make -j - -# test -cd tests -./test.exe -``` - -## MPI - -Set the `ARB_WITH_MPI` option either via the ccmake interface, or via the command line as shown below. -To ensure that CMake detects MPI correctly, you should specify the MPI wrapper for the compiler by setting the `CXX` and `CC` environment variables. - -``` -export CXX=mpicxx -export CC=mpicc -cmake <path to CMakeLists.txt> -DARB_WITH_MPI=ON -``` - -## TBB - -Support for multi-threading requires Intel Threading Building Blocks (TBB). -When TBB is installed, it comes with some scripts that can be run to set up the user environment. -The scripts set the `TBB_ROOT` environment variable, which is used by the CMake configuration to find TBB. - -``` -cmake <path to CMakeLists.txt> -DARB_THREADING_MODEL=tbb -``` - -### TBB on Cray systems - -TBB requires dynamic linking, which is not enabled by default in the Cray programming environment. -CMake is quite brittle, so take care to follow these step closely. -TBB provides a CMake package that will attempt to automatically download and compile TBB from within CMake. -Set the environment variable `CRAYPE_LINK_TYPE=dynamic`, to instruct the Cray PE linker to enable dynamic linking. -CMake (at least since CMake 3.6) will automatically detect the Cray programming environment, and will by default use static linking, unless the `CRAYPE_LINK_TYPE` environment variable has been set to `dynamic`. -Note, the CMake package provided by TBB is very fragile, and won't work if CMake is forced to use the `CrayLinuxEnvironment` as shown in the code below. Instead, let Cmake automatically detect the programming environment. - -``` -export CRAYPE_LINK_TYPE=dynamic -cmake <path-to-arbor-source> -DARB_THREADING_MODEL=tbb - -# NOTE: specifying CMAKE_SYSTEM_NAME won't work, instead let CMake automatically -# detect the build environment as above. -cmake <path-to-arbor-source> -DARB_THREADING_MODEL=tbb -DCMAKE_SYSTEM_NAME=CrayLinuxEnvironment -``` - -``` -export CRAYPE_LINK_TYPE=dynamic -``` - -## targeting KNL - -#### build modparser without KNL environment - -The source to source compiler "modparser" that generates the C++/CUDA kernels for the ion channels and synapses is in a separate repository. -By default it will be built with the same compiler and flags that are used to build the miniapp and tests. - -This can cause problems if we are cross compiling, e.g. for KNL, because the modparser compiler might not be runnable on the compilation node. -You are probably best of building the software twice: Once without KNL support to create the modcc parser and next the KNL version using -the now compiled executable - -Modparser requires a C++11 compiler, and has been tested on GCC, Intel, and Clang compilers - - if the default compiler on your is some ancient version of gcc you might need to load a module/set the CC and CXX environment variables. - -CMake will look for the source to source compiler executable, `modcc`, in the `PATH` environment variable, and will use the version if finds instead of building its own. -So add the g++ compiled modcc to your path -e.g: - -```bash -# First build a 'normal' non KNL version of the software - -# Load your environment (see section 6 for detailed example) -export CC=`which gcc`; export CXX=`which g++` - -# Make directory , do the configuration and build -mkdir build -cd build -cmake <path to CMakeLists.txt> -DCMAKE_BUILD_TYPE=release -make -j8 - -# set path and test that you can see modcc -export PATH=`pwd`/modcc:$PATH -which modcc -``` - -#### set up environment - -- source the intel compilers -- source the TBB vars -- I have only tested with the latest stable version from on-line, not the version that comes installed sometimes with the Intel compilers. - -#### build miniapp - -```bash -# clone the repository and set up the submodules -git clone https://github.com/eth-cscs/arbor.git -cd arbor - -# make a path for out of source build -mkdir build_knl -cd build_knl - -# run cmake with all the magic flags -export CC=`which icc` -export CXX=`which icpc` -cmake <path to CMakeLists.txt> -DCMAKE_BUILD_TYPE=release -DARB_THREADING_MODEL=tbb -DARB_WITH_PROFILING=ON -DARB_VECTORIZE_TARGET=KNL -make -j -``` - -The flags passed into cmake are described: - - `-DCMAKE_BUILD_TYPE=release` : build in release mode with `-O3`. - - `-DARB_THREADING_MODEL=tbb` : use TBB for threading on multi-core - - `-DARB_WITH_PROFILING=ON` : use internal profilers that print profiling report at end - - `-DARB_VECTORIZE_TARGET=KNL` : generate AVX512 instructions, alternatively you can use: - - `AVX2` for Haswell & Broadwell - - `AVX` for Sandy Bridge and Ivy Bridge - -Currently, the Intel compiler is required when you specify a vectorize target. - -#### run tests - -Run some unit tests -```bash -cd tests -./test.exe -cd .. -``` - -## run miniapp - -The miniapp is the target for benchmarking. -First, we can run a small problem to check the build. -For the small test run, the parameters have the following meaning - - `-n 1000` : 1000 cells - - `-s 200` : 200 synapses per cell - - `-t 20` : simulated for 20ms - - `-p 0` : no file output of voltage traces - -The number of cells is the number of discrete tasks that are distributed to the threads in each large time integration period. -The number of synapses per cell is the amount of computational work per cell/task. -Realistic cells have anywhere in the range of 1,000-10,000 synapses per cell. - -```bash -cd miniapp - -# a small run to check that everything works -./miniapp.exe -n 1000 -s 200 -t 20 -p 0 - -# a larger run for generating meaningful benchmarks -./miniapp.exe -n 2000 -s 2000 -t 100 -p 0 -``` - -This generates the following profiler output (some reformatting to make the table work): - -``` - --------------------------------------- - | small | large | --------------|-------------------|-------------------| -total | 0.791 100.0 | 38.593 100.0 | - stepping | 0.738 93.3 | 36.978 95.8 | - matrix | 0.406 51.3 | 6.034 15.6 | - solve | 0.308 38.9 | 4.534 11.7 | - setup | 0.082 10.4 | 1.260 3.3 | - other | 0.016 2.0 | 0.240 0.6 | - state | 0.194 24.5 | 23.235 60.2 | - expsyn | 0.158 20.0 | 22.679 58.8 | - hh | 0.014 1.7 | 0.215 0.6 | - pas | 0.003 0.4 | 0.053 0.1 | - other | 0.019 2.4 | 0.287 0.7 | - current | 0.107 13.5 | 7.106 18.4 | - expsyn | 0.047 5.9 | 6.118 15.9 | - pas | 0.028 3.5 | 0.476 1.2 | - hh | 0.006 0.7 | 0.096 0.2 | - other | 0.026 3.3 | 0.415 1.1 | - events | 0.005 0.6 | 0.125 0.3 | - sampling | 0.003 0.4 | 0.051 0.1 | - other | 0.024 3.0 | 0.428 1.1 | - other | 0.053 6.7 | 1.614 4.2 | ------------------------------------------------------ -``` - -## Examples of environment configuration -### Julia (HBP PCP system) -``` bash -module load cmake -module load intel-ics -module load openmpi_ics/2.0.0 -module load gcc/6.1.0 -``` +An installation guide and library documentation are available online at [Read the Docs](http://arbor.readthedocs.io/en/latest/). +[Submit a ticket](https://github.com/eth-cscs/arbor) if you have any questions or want help. diff --git a/doc/index.rst b/doc/index.rst index 661a67946e69f057bb4f0f40ebd7f540e1e4d59c..bbd64a5f67e52ea3a8fb8e933f32835350dcb843 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,12 +1,41 @@ Arbor ===== -Arbor is a high-performance library for computationa neurscience simulations. +.. image:: https://travis-ci.org/eth-cscs/arbor.svg?branch=master + :target: https://travis-ci.org/eth-cscs/arbor -.. toctree:: - :maxdepth: 1 +What is Arbor? +-------------- + +Arbor is a high-performance library for computational neurscience simulations. + +The development team is from from high-performance computing (HPC) centers: + + * Swiss National Supercomputing Center (CSCS), Jülich and BSC in work package 7.5.4 of the HBP. + * Aim to prepare neuroscience users for new HPC architectures; + +Arbor is designed from the ground up for **many core** architectures: + + * Written in C++11 and CUDA; + * Distributed parallelism using MPI; + * Multithreading with TBB and C++11 threads; + * **Open source** and **open development**; + * Sound development practices: **unit testing**, **continuous Integration**, + and **validation**. + +Features +-------- + +We are actively developing Arbor, improving performance and adding features. +Some key features include: - introduction + * Optimized back ends for CUDA, KNL and AVX2 intrinsics. + * Asynchronous spike exchange that overlaps compute and communication. + * Efficient sampling of voltage and current on all back ends. + * Efficient implementation of all features on GPU. + * Reporting of memory and energy consumption (when available on platform). + * An API for addition of new cell types, e.g. LIF and Poisson spike generators. + * Validation tests against numeric/analytic models and NEURON. .. toctree:: :caption: Getting Stared: diff --git a/doc/install.rst b/doc/install.rst index cf860e8ad230364f48a1739040c3aaaac6897c0c..7ddce436cf8f625f90160f452b97034ba86efd3c 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -1,58 +1,681 @@ -Installing -############## +Installing Arbor +################ -Installation guide. +Installation of Arbor is done by obtaining the source code and compiling it on +the target system. + +This guide starts with an overview of the building process, and the various options +available to customize the build. +The guide then covers installation and running on `HPC clusters <cluster_>`_, followed by a +`troubleshooting guide <troubleshooting_>`_ for common build problems. .. _install_requirements: -Before starting -=============== +Requirements +============ + +Minimum Requirements +-------------------- + +The non distributed (i.e. no MPI) version of Arbor can be compiled on Linux or OS X systems +with very few tools. + +.. table:: Required Tools + + =========== ============================================ + Tool Notes + =========== ============================================ + Git To check out the code, min version 2.0. + CMake To set up the build, min version 3.0. + compiler A C++11 compiler. See `compilers <compilers_>`_. + =========== ============================================ + +.. _compilers: + +Compilers +~~~~~~~~~ + +Arbor requires a C++ compiler that fully supports C++11 (we have plans to move +to C++14 soon). +We recommend using GCC or Clang, for which Arbor has been tested and optimised. + +.. table:: Supported Compilers + + =========== ============ ============================================ + Compiler Min version Notes + =========== ============ ============================================ + GCC 5.2.0 5.1 probably works, 5.0 doesn't. + Clang 4.0 Clang 3.8 and later probably work. + Apple Clang 9 + Intel 17.0.1 Needs GCC 5 or later for standard library. + =========== ============ ============================================ + +.. _note_CC: + +.. Note:: + The ``CC`` and ``CXX`` environment variables specify which compiler executable + CMake should use. If these are not set, CMake will attempt to automatically choose a compiler, + which may be too old to compile Arbor. + For example, the default compiler chosen below by CMake was GCC 4.8.5 at ``/usr/bin/c++``, + so the ``CC`` and ``CXX`` variables were used to specify GCC 5.2.0 before calling ``cmake``. -We will require: + .. code-block:: bash - * git - * cmake 3.0 - * C++11 compliant compiler + # on this system CMake chooses the following compiler by default + $ c++ --version + c++ (GCC) 4.8.5 20150623 (Red Hat 4.8.5-16) -For GPU support: + # check which version of GCC is available + $ g++ --version + g++ (GCC) 5.2.0 + Copyright (C) 2015 Free Software Foundation, Inc. - * NVIDIA CUDA toolkit 8.0 + # set environment variables for compilers + $ export CC=`which gcc`; export CXX=`which g++`; -To make these docs you also need: + # launch CMake + # the compiler version and path is given in the CMake output + $ cmake .. + -- The C compiler identification is GNU 5.2.0 + -- The CXX compiler identification is GNU 5.2.0 + -- Check for working C compiler: /cm/local/apps/gcc/5.2.0/bin/gcc + -- Check for working C compiler: /cm/local/apps/gcc/5.2.0/bin/gcc -- works + ... - * Sphinx +.. Note:: + Is is commonly assumed that to get the best performance one should use a vendor-specific + compiler (e.g. the Intel, Cray or IBM compilers). These compilers are often better at + auto-vectorizing loops, however for everything else GCC and Clang nearly always generate + more efficient code. + + The main computational loops in Arbor are generated from + `NMODL <https://www.neuron.yale.edu/neuron/static/docs/help/neuron/nmodl/nmodl.html>`_. + The generated code is explicitly vectorised, obviating the need for vendor compilers, + and we can take advantage of their benefits of GCC and Clang: + faster compilation times; fewer compiler bugs; and support for recent C++ standards. + +.. Note:: + The IBM xlc compiler versions 13.1.4 and 13.1.6 have been tested for compiling on + IBM power 8. Arbor contains some patches to work around xlc compiler bugs, + however we do not recommend using xlc because GCC produces faster code, + with faster compilation times. + +Optional Requirements +--------------------- + +GPU Support +~~~~~~~~~~~ + +Arbor has full support for NVIDIA GPUs, for which the NVIDIA CUDA toolkit version 8 is required. + +Distributed +~~~~~~~~~~~ + +Arbor uses MPI to run on HPC cluster systems. +Arbor has been tested on MVAPICH2, OpenMPI, Cray MPI, and IBM MPI. +More information on building with MPI is in the `HPC cluster section <cluster_>`_. + +Documentation +~~~~~~~~~~~~~~ + +To build a local copy of the html documentation that you are reading now, you will need to +install `Sphinx <http://www.sphinx-doc.org/en/master/>`_. .. _downloading: -Downloading -====================================== +Getting the Code +================ -The easiest way to acquire the latest version of Arbor is to check the code out from our GitHub repository: +The easiest way to acquire the latest version of Arbor is to check the code out from +the `Github repository <https://github.com/eth-cscs/arbor>`_: .. code-block:: bash - git clone https://github.com/eth-cscs/arbor.git + git clone https://github.com/eth-cscs/arbor.git --recurse-submodules + +We recommend using a recursive checkout, because Arbor uses Git submodules for some +of its library dependencies. +The CMake configuration attempts to detect if a required submodule is available, and +will print a helpful warning +or error message if not, but it is up to the user to ensure that all required +submodules are downloaded. + +The Git submodules can be updated, or initialized in a project that didn't use a +recursive checkout: + +.. code-block:: bash -You can also point your browser to our `Github page <https://github.com/eth-cscs/arbor>`_ and download a zip file. + git submodule update --init --recursive -.. _install_desktop: +You can also point your browser to Arbor's +`Github page <https://github.com/eth-cscs/arbor>`_ and download a zip file. +If you use the zip file, then don't forget to run Git submodule update manually. -Basic Installation -====================================== +.. _building: -Before building an optimzed version for your target system, it is a good idea to build a debug version: +Building Arbor +============== + +Once the Arbor code has been checked out, it can be built by first running CMake to configure the build, then running make. + +Below is a simple workflow for: **1)** getting the source; **2)** configuring the build; +**3)** building then; **4)** running tests. +For more detailed build configuration options, see the `quick start <quickstart_>`_ guide. .. code-block:: bash - # make a path for building + # 1) Clone. + git clone https://github.com/eth-cscs/arbor.git --recurse-submodules + cd arbor + + # Make a path for building mkdir build cd build - # configure and build + # 2) Use CMake to configure the build. + # By default Arbor builds in release mode, i.e. with optimizations on. + # Release mode should be used for installing and benchmarking Arbor. cmake .. + + # 3) Build Arbor. make -j 4 - # run tests + # 4) Run tests. ./test/test.exe ./test/global_communication.exe +This will build Arbor in release mode with the `default C++ compiler <note_CC_>`_. + +.. _quickstart: + +Quick Start: Examples +--------------------- + +Below are some example of CMake configurations for Arbor. For more detail on individual +CMake parameters and flags, follow links to the more detailed descriptions below. + +.. topic:: `Debug <buildtarget_>`_ mode with `assertions <debugging_>`_, + `single threaded <threading_>`_. + + If you encounter problems building or running Arbor, compile with these options + for testing and debugging. + + .. code-block:: bash + + cmake .. -DARB_THREADING_MODEL=serial \ + -DARB_WITH_ASSERTIONS=ON \ + -DCMAKE_BUILD_TYPE=debug + +.. topic:: `Release <buildtarget_>`_ mode (i.e. build with optimization flags) + with `Clang <compilers_>`_ + + .. code-block:: bash + + export CC=`which clang` + export CXX=`which clang++` + cmake .. + +.. topic:: `Release <buildtarget_>`_ mode on `Haswell <vectorize_>`_ with `cthread threading <threading_>`_ + + .. code-block:: bash + + cmake .. -DARB_THREADING_MODEL=cthread -DARB_VECTORIZE_TARGET=AVX2 + +.. topic:: `Release <buildtarget_>`_ mode on `KNL <vectorize_>`_ with `TBB threading <threading_>`_ + + .. code-block:: bash + + cmake .. -DARB_THREADING_MODEL=tbb -DARB_VECTORIZE_TARGET=KNL + +.. topic:: `Release <buildtarget_>`_ mode with `CUDA <gpu_>`_ and `AVX2 <vectorize_>`_ and `GCC 5 <compilers_>`_ + + .. code-block:: bash + + export CC=gcc-5 + export CXX=g++-5 + cmake .. -DARB_VECTORIZE_TARGET=AVX2 -DARB_WITH_CUDA=ON + +.. _buildtarget: + +Build Target +------------ + +By default, Arbor is built in release mode, which should be used when installing +or benchmarking Arbor. To compile in debug mode (which in practical terms means +with ``-g -O0`` flags), use the ``CMAKE_BUILD_TYPE`` CMake parameter. + +.. code-block:: bash + + cmake -DCMAKE_BUILD_TYPE={debug,release} + +.. _vectorize: + +Vectorization +------------- + +Explicit vectorization of key computational kernels can be enabled in Arbor by setting the +``ARB_VECTORIZE_TARGET`` CMake parameter: + +.. code-block:: bash + + cmake -DARB_VECTORIZE_TARGET={none,KNL,AVX2,AVX512} + +By default the ``none`` target is selected, which relies on compiler auto-vectorization. + +.. Warning:: + The vectorization target must be supported by the target architecture. + A sure sign that an unsuported vectorization was chosen is an ``Illegal instruction`` + error at runtime. In the example below, the unit tests for an ``ARB_VECTORIZE_TARGET=AVX2`` + build are run on an Ivy Bridge CPU, which does not support AVX2 vector instructions: + + .. code-block:: none + + $ ./tests/test.exe + [==========] Running 581 tests from 105 test cases. + [----------] Global test environment set-up. + [----------] 15 tests from algorithms + [ RUN ] algorithms.parallel_sort + Illegal instruction + + See the hints on `cross compiling <crosscompiling_>`_ if you get illegal instruction + errors when trying to compile on HPC systems. + +.. Note:: + The vectorization selection will change soon, to an interface with two parameters. The first + will toggle vectorization, and the second will specify a specific architecture to target. + For example, to generate optimized code for Intel Broadwell (i.e. AVX2 intrinsics): + + .. code-block:: bash + + cmake -DCMAKE_BUILD_TYPE=release \ + -DARB_ARCH=broadwell \ + -DARB_VECTORIZE=ON \ + + +.. _threading: + +Multithreading +-------------- + +Arbor provides three possible multithreading implementations. The implementation +is selected at compile time by setting the ``ARB_THREADING_MODEL`` CMake option: + +.. code-block:: bash + + cmake -DARB_THREADING_MODEL={serial,cthread,tbb} + +By default Arbor is built with multithreading enabled with the **cthread** backend, +which is implemented in the Arbor source code. + + +.. table:: Threading Models. + + =========== ============== ================================================= + Model Source Description + =========== ============== ================================================= + **cthread** Arbor Default. Multithreaded, based on C++11 ``std::thread``. + **serial** Arbor Single threaded. + **tbb** Git submodule `Intel TBB <https://www.threadingbuildingblocks.org/>`_. + Recommended when using many threads. + =========== ============== ================================================= + +.. Note:: + The default `cthread` threading is suitable for most applications. + However there are some situations when the overheads of the threading runtime + become significant. This is often the case for: + + * simulations with many small/light cells (e.g. LIF cells); + * running with many threads, such as on IBM Power 8 (80 threads/socket) or Intel + KNL (64-256 threads/socket). + + The TBB threading back end is highly optimized, and well suited to these cases. + + +.. Note:: + If the TBB back end is selected, Arbor's CMake uses a Git submodule of the TBB + repository to build and link a static version of the the TBB library. If you get + an error stating that the TBB submodule is not available, you must update the Git + submodules: + + .. code-block:: bash + + git submodule update --init --recursive + +.. Note:: + The TBB back end can be used on IBM Power 8 systems. + +.. _gpu: + +GPU Backend +----------- + +Arbor supports NVIDIA GPUs using CUDA. The CUDA back end is enabled by setting the CMake ``ARB_WITH_CUDA`` option. + +.. code-block:: bash + + cmake .. -DARB_WITH_CUDA=ON + +.. Note:: + Abor requires: + * CUDA version >= 8 + * P100 or more recent GPU (``-arch=sm_60``) + +.. _cluster: + +HPC Clusters +============ + +HPC clusters offer their own unique challenges when compiling and running +software, so we cover some common issues in this section. If you have problems +on your target system that are not covered here, please make an issue on the +Arbor `Github issues <https://github.com/eth-cscs/arbor/issues>`_ page. +We will do our best to help you directly, and update this guide to help other users. + +MPI +--- + +Arbor uses MPI for distributed systems. By default it is built without MPI support, which +can enabled by setting the ``DARB_DISTRIBUTED_MODEL`` CMake parameter. +An example of building Arbor with MPI, high-performance threading and optimizations enabled +is: + +.. code-block:: bash + + # set the compiler wrappers + export CC=`which mpicc` + export CXX=`which mpicxx` + + # configure with mpi, tbb threading and compiled with optimizations + cmake .. -DARB_DISTRIBUTED_MODEL=mpi \ # Use MPI + -DCMAKE_BUILD_TYPE=release \ # Optimizations on + -DARB_THREADING_MODEL=tbb \ # TBB threading library + + # run unit tests for global communication on 2 MPI ranks + mpirun -n 2 ./tests/global_communication.exe + +The example above set ``CC`` and ``CXX`` environment variables to use compiler +wrappers provided by the MPI implementation. It is recommended to use compiler +wrappers for MPI, unless you know what you are doing and have a specific use +case or issue to work around. + +.. Note:: + MPI distributions provide **compiler wrappers** for compiling MPI applications. + + In the example above the compiler wrappers for C and C++ called + ``mpicc`` and ``mpicxx`` respectively. The name of the compiler wrapper + is dependent on the MPI distribution. + + The wrapper forwards the compilation to a compiler, like GCC, and + you have to ensure that this compiler is able to compile Arbor. For wrappers + that call GCC, Intel or Clang compilers, you can pass the ``--version`` flag + to the wrapper. For example, on a Cray system where the C++ wrapper is called ``CC``: + + .. code-block:: bash + + $ CC --version + g++ (GCC) 6.2.0 20160822 (Cray Inc.) + +Cray Systems +------------ + +The compiler used by the MPI wrappers is set using a "programming enviroment" module. +The first thing to do is change this module, which by default is set to the Cray +programming environment. +For example, to use the GCC compilers, select the GNU programming enviroment: + +.. code-block:: bash + + module swap PrgEnv-cray PrgEnv-gnu + +The version of the GCC can then be set by choosing an appropriate gcc module. +In the example below we use ``module avail`` to see which versions of GCC are available, +then choose GCC 7.1.0 + +.. code-block:: bash + + $ module avail gcc # see all available gcc versions + + ------------------------- /opt/modulefiles --------------------------- + gcc/4.9.3 gcc/6.1.0 gcc/7.1.0 gcc/5.3.0(default) gcc/6.2.0 + + $ module swap gcc/7.1.0 # swap gcc 5.3.0 for 7.1.0 + + $ CC --version # test that the wrapper uses gcc 7.1.0 + g++ (GCC) 7.1.0 20170502 (Cray Inc.) + + # set compiler wrappers + $ export CC=`which cc` + $ export CXX=`which CC` + +Note that the C and C++ compiler wrappers are called ``cc`` and ``CC`` +respectively on Cray systems. + +CMake detects that it is being run in the Cray programming environment, which makes +our lives a little bit more difficult (CMake sometimes tries a bit too hard to help). +To get CMake to correctly link our code, we need to set the ``CRAYPE_LINK_TYPE`` +enviroment variable to ``dynamic``. + +.. code-block:: bash + + export CRAYPE_LINK_TYPE=dynamic + +Putting it all together, a typicaly workflow to configure the environment and CMake, +then build Arbor is: + +.. code-block:: bash + + export CRAYPE_LINK_TYPE=dynamic + module swap PrgEnv-cray PrgEnv-gnu + moudle swap gcc/7.1.0 + export CC=`which cc`; export CXX=`which CC`; + cmake .. -DARB_DISTRIBUTED_MODEL=mpi \ # MPI support + -DCMAKE_BUILD_TYPE=release \ # optimized + -DARB_THREADING_MODEL=tbb \ # tbb threading + -DARB_SYSTEM_TYPE=Cray # turn on Cray specific options + +.. Note:: + If ``CRAYPE_LINK_TYPE`` isn't set, there will be warnings like the following when linking: + + .. code-block:: none + + warning: Using 'dlopen' in statically linked applications requires at runtime + the shared libraries from the glibc version used for linking + + Often the library or executable will work, however if a different glibc is loaded, + Arbor will crash at runtime with obscure errors that are very difficult to debug. + + +.. _troubleshooting: + +Troubleshooting +=============== + +.. _crosscompiling: + +Cross Compiling NMODL +--------------------- + +Care must be taken when Arbor is compiled on a system with a different architecture to the target system where Arbor will run. +This occurs quite frequently on HPC systems, for example when building on a login/service node that has a different architecture to the compute nodes. + +.. Note:: + If building Arbor on a laptop or desktop system, i.e. on the same computer that + you will run Arbor on, cross compilation is not an issue. + +.. Warning:: + ``Illegal instruction`` errors are a sure sign that + Arbor is running on a system that does not support the architecture it was compiled for. + +When cross compiling, we have to take care that the *modcc* compiler, which is used to convert NMODL to C++/CUDA code, is able to run on the compilation node. + +By default, CMake looks for the *modcc* executable, ``modcc``, in paths specified by the ``PATH`` environment variable, and will use this executable if it finds it. +Otherwise, the CMake script will build *modcc* from source. +To ensure that cross compilation works, a copy of modcc that is compiled for the build system should be in ``PATH``. + +Here we will use the example of compiling for Intel KNL on a Cray system, which has Intel Sandy Bridge CPUs on login nodes that don't support the AVX512 instructions used by KNL. + + +.. code-block:: bash + + # + # Step 1: Build modcc. + # + + module swap PrgEnv-cray PrgEnv-gnu + # Important: use GNU compilers directly, not the compiler wrappers, + # which generate code for KNL, not the login nodes. + export CC=`which gcc`; export CXX=`which g++`; + export CRAYPE_LINK_TYPE=dynamic + + # make a path for the modcc build + mkdir build_modcc + cd build_modcc + + # configure and make modcc + cmake .. + make -j modcc + + # set PATH to find modcc + cd .. + export PATH=`pwd`/build_modcc/modcc:$PATH + + # + # Step 2: Build Arbor. + # + + mkdir build; cd build; + # use the compiler wrappers to build Arbor + export CC=`which cc`; export CXX=`which CC`; + cmake .. -DARB_DISTRIBUTED_MODEL=mpi \ + -DCMAKE_BUILD_TYPE=release \ + -DARB_THREADING_MODEL=tbb \ + -DARB_SYSTEM_TYPE=Cray \ + -DARB_VECTORIZE_TARGET=KNL + + +.. Note:: + Cross compilation issues can occur when there are minor differences between login and compute nodes, e.g. + when the login node has Intel Haswell, and the compute nodes have Intel Broadwell. + + Other systems, such as IBM BGQ, have very different architectures for login and compute nodes. + + If the *modcc* compiler was not compiled for the login node, illegal instruction errors will + occur when building, e.g. + + .. code-block:: none + + $ make + ... + [ 40%] modcc generating: /users/bcumming/arbor_knl/mechanisms/multicore/pas_cpu.hpp + /bin/sh: line 1: 12735 Illegal instruction (core dumped) /users/bcumming/arbor_knl/build_modcc/modcc/modcc -t cpu -s\ avx512 -o /users/bcumming/arbor_knl/mechanisms/multicore/pas /users/bcumming/arbor_knl/mechanisms/mod/pas.mod + mechanisms/CMakeFiles/build_all_mods.dir/build.make:69: recipe for target '../mechanisms/multicore/pas_cpu.hpp' failed + + If you have errors when running the tests or a miniapp, then either the wrong + ``ARB_VECTORIZE_TARGET`` was selected; or you might have forgot to launch on the + compute node. e.g.: + + .. code-block:: none + + $ ./tests/test.exe + Illegal instruction (core dumped) + + On the Cray KNL system, ``srun`` is used to launch (it might be ``mpirun`` + or similar on your system): + + .. code-block:: none + + $ srun -n1 -c1 ./tests/test.exe + [==========] Running 609 tests from 108 test cases. + [----------] Global test environment set-up. + [----------] 15 tests from algorithms + [ RUN ] algorithms.parallel_sort + [ OK ] algorithms.parallel_sort (15 ms) + [ RUN ] algorithms.sum + [ OK ] algorithms.sum (0 ms) + ... + + +.. _debugging: + +Debugging +--------- + +Sometimes things go wrong: tests fail, simulations give strange results, segmentation +faults occur and exceptions are thrown. + +A good first step when things to wrong is to turn on additional assertions that can +catch errors. These are turned off by default (because they slow things down a lot), +and have to be turned on by setting the ``ARB_WITH_ASSERTIONS`` CMake option: + +.. code-block:: bash + + cmake -DARB_WITH_ASSERTIONS=ON + +.. Note:: + These assertions are in the form of ``EXPECTS`` statements inside the code, + for example: + + .. code-block:: cpp + + void decrement_min_remaining() { + EXPECTS(min_remaining_steps_>0); + if (!--min_remaining_steps_) { + compute_min_remaining(); + } + } + + A failing ``EXPECT`` statement indicates that an error inside the Arbor + library, caused either by a logic error in Arbor, or incorrectly checked user input. + + If this occurs, it is highly recommended that you attach the output to the + `bug report <https://github.com/eth-cscs/arbor/issues>`_ you send to the Arbor developers! + + +CMake CMP0023 Warning +--------------------- + +On version 3.9 or greater CMake generates the following warning: + +.. code-block:: none + + CMake Deprecation Warning at CMakeLists.txt:11 (cmake_policy): + The OLD behavior for policy CMP0023 will be removed from a future version + of CMake. + +This is caused because we have to work around conflicting modules in CMake, and +isn't a problem. It will be fixed when we start using the built in support for +CUDA introduced in CMake 3.9. + +CMake Git Submodule Warnings +---------------------------- + +When running CMake, warnings like the following indicate that the Git submodules +need to be `updated <downloading_>`_. + +.. code-block:: none + + The Git submodule for rtdtheme is not available. + To check out all submodules use the following commands: + git submodule init + git submodule update + Or download submodules recursively when checking out: + git clone --recurse-submodules https://github.com/eth-cscs/arbor.git + + +Wrong Headers for Intel Compiler +------------------------------------ + +The Intel C++ compiler does not provide its own copy of the C++ standard library, +instead it uses the implementation from GCC. You can see what the default version of +GCC is by ``g++ --version``. + +If the Intel compiler uses an old version of the standard library, +errors like the following occur: + +.. code-block:: none + + /users/bcumming/arbor_knl/src/util/meta.hpp(127): error: namespace "std" has no member "is_trivially_copyable" + enable_if_t<std::is_trivially_copyable<T>::value>; + +On clusters, a GCC module with a full C++11 implementation of the standard library, +i.e. version 5.1 or later, can be loaded to fix the issue. diff --git a/doc/introduction.rst b/doc/introduction.rst deleted file mode 100644 index ff1b0eee3b3549f00f109e8830c8129911f6b155..0000000000000000000000000000000000000000 --- a/doc/introduction.rst +++ /dev/null @@ -1,5 +0,0 @@ -Why Arbor? -########## - -The diverse ecosystem of emerging HPC computing architectures promises exciting opportunities for larger, more detailed simulations run over longer time periods. -Arbor is a library developed by the HPC community to help computational neuroscientists take advantage of such systems.