diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a520546120d5fe0a8dfb4396713c372eca93a38c..6e6c5b9659a82c7a2f53a70cb884c9f28d65c30e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,65 +1,77 @@ +include: + - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.cscs.yml' + stages: - build + - allocate - test - -.kubernetes: - variables: - GIT_SUBMODULE_STRATEGY: recursive - DOCKER_DRIVER: overlay2 - DOCKER_HOST: tcp://localhost:2375 - DOCKER_TLS_CERTDIR: "" - DOCKER_BUILDKIT: 1 - BUILDKIT_PROGRESS: plain - image: docker:stable - only: - - master - - staging - - trying - tags: - - kubernetes - services: - - docker:19.03.1-dind # Important to keep the patch version here! + - cleanup # Builds a docker image on kubernetes build: - extends: .kubernetes + extends: .dind stage: build + only: ['master', 'staging', 'trying'] + variables: + GIT_SUBMODULE_STRATEGY: recursive + BUILD_DOCKERFILE: docker/build-env/Dockerfile + BUILD_IMAGE: $CI_REGISTRY_IMAGE/build-env:latest + DEPLOY_DOCKERFILE: docker/deploy/Dockerfile + DEPLOY_IMAGE: $CI_REGISTRY_IMAGE/deploy:$CI_COMMIT_SHA before_script: - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY script: - - docker build -f docker/build-env/Dockerfile --network=host --cache-from $CI_REGISTRY_IMAGE/build-env:latest --build-arg BUILDKIT_INLINE_CACHE=1 -t $CI_REGISTRY_IMAGE/build-env:latest . - - docker push $CI_REGISTRY_IMAGE/build-env:latest - - docker build -f docker/deploy/Dockerfile --network=host --build-arg BUILD_ENV=$CI_REGISTRY_IMAGE/build-env:latest -t $CI_REGISTRY_IMAGE/deploy:$CI_COMMIT_SHA . - - docker push $CI_REGISTRY_IMAGE/deploy:$CI_COMMIT_SHA + - docker build -f $BUILD_DOCKERFILE --network=host --cache-from $BUILD_IMAGE --build-arg BUILDKIT_INLINE_CACHE=1 -t $BUILD_IMAGE . + - docker push $BUILD_IMAGE + - docker build -f $DEPLOY_DOCKERFILE --network=host --build-arg BUILD_ENV=$BUILD_IMAGE -t $DEPLOY_IMAGE . + - docker push $DEPLOY_IMAGE # Executes the docker image on Daint via Sarus -.daint-common: +image: $CI_REGISTRY_IMAGE/deploy:$CI_COMMIT_SHA + +# Some variables used for running on daint +variables: + CRAY_CUDA_MPS: 1 + USE_MPI: 'YES' + DISABLE_AFTER_SCRIPT: 'YES' + PULL_IMAGE: 'NO' + ALLOCATION_NAME: arbor-ci-$CI_PIPELINE_ID + SLURM_CONSTRAINT: gpu + SLURM_JOB_NUM_NODES: 2 + SLURM_PARTITION: normal + +allocate: + stage: allocate + only: ['master', 'staging', 'trying'] + extends: .daint_alloc variables: - CRAY_CUDA_MPS: 1 - only: - - master - - staging - - trying - tags: - - daint + PULL_IMAGE: 'YES' -test: +single node: + extends: .daint + only: ['master', 'staging', 'trying'] stage: test - extends: .daint-common - before_script: - - export IMAGE=$CI_REGISTRY_IMAGE/deploy:$CI_COMMIT_SHA - - module load sarus daint-gpu - - sarus pull $IMAGE - - salloc --no-shell --job-name=arbor-ci-$CI_JOB_ID -N 2 -n 2 -C gpu -p normal - - export JOBID=$(squeue -h --name=arbor-ci-$CI_JOB_ID --format=%A) + resource_group: daint-job script: - - srun --jobid=$JOBID -N 1 -n 1 -J arbor-ci-$CI_JOB_ID-unit sarus run --mpi --mount=type=bind,source=$PWD,destination=/arbor $IMAGE unit - - srun --jobid=$JOBID -N 2 -n 2 -J arbor-ci-$CI_JOB_ID-unit-mpi sarus run --mpi --mount=type=bind,source=$PWD,destination=/arbor $IMAGE unit-mpi - - srun --jobid=$JOBID -N 1 -n 1 -J arbor-ci-$CI_JOB_ID-unit-local sarus run --mpi --mount=type=bind,source=$PWD,destination=/arbor $IMAGE unit-local - - srun --jobid=$JOBID -N 1 -n 1 -J arbor-ci-$CI_JOB_ID-unit-modcc sarus run --mpi --mount=type=bind,source=$PWD,destination=/arbor $IMAGE unit-modcc - after_script: - - export IMAGE=$CI_REGISTRY_IMAGE/deploy:$CI_COMMIT_SHA - - export JOBID=$(squeue -h --name=arbor-ci-$CI_JOB_ID --format=%A) - - scancel $JOBID - - module load sarus - - sarus rmi $IMAGE + - unit + - unit-local + - unit-modcc + variables: + SLURM_JOB_NUM_NODES: 1 + SLURM_NTASKS: 1 + +multi node: + extends: .daint + only: ['master', 'staging', 'trying'] + stage: test + resource_group: daint-job + script: + - unit-mpi + variables: + SLURM_JOB_NUM_NODES: 2 + SLURM_NTASKS: 2 + +deallocate: + only: ['master', 'staging', 'trying'] + stage: cleanup + extends: .daint_dealloc \ No newline at end of file diff --git a/docker/build-env/Dockerfile b/docker/build-env/Dockerfile index 873f17686ea287f0217041dbf41a8b8a4b345d50..70b1ba5f772c704ce475760afe232bf500312401 100644 --- a/docker/build-env/Dockerfile +++ b/docker/build-env/Dockerfile @@ -27,6 +27,7 @@ RUN wget -q https://www.mpich.org/static/downloads/${MPICH_VERSION}/mpich-${MPIC rm -rf mpich-${MPICH_VERSION}.tar.gz mpich-${MPICH_VERSION} # Install bundle tooling for creating small Docker images -RUN wget -q https://github.com/haampie/libtree/releases/download/v1.0.3/libtree_x86_64.tar.gz && \ +RUN wget -q https://github.com/haampie/libtree/releases/download/v1.1.3/libtree_x86_64.tar.gz && \ tar -xzf libtree_x86_64.tar.gz && \ - rm libtree_x86_64.tar.gz + rm libtree_x86_64.tar.gz && \ + ln -s /root/libtree/libtree /usr/local/bin/libtree diff --git a/docker/deploy/Dockerfile b/docker/deploy/Dockerfile index 5749cdbffc71a26b76456166006adeffc3edff29..443427df2bd93984fc9ec40d127c06f9487d65cb 100644 --- a/docker/deploy/Dockerfile +++ b/docker/deploy/Dockerfile @@ -5,14 +5,22 @@ ARG BUILD_ENV +ARG SOURCE_DIR=/arbor +ARG BUILD_DIR=/arbor-build +ARG BUNDLE_DIR=/root/arbor.bundle + FROM $BUILD_ENV as builder +ARG SOURCE_DIR +ARG BUILD_DIR +ARG BUNDLE_DIR + # Build arbor COPY . /arbor # Build and bundle binaries -RUN mkdir /arbor/build && cd /arbor/build && \ - CC=mpicc CXX=mpicxx cmake .. \ +RUN mkdir ${BUILD_DIR} && cd ${BUILD_DIR} && \ + CC=mpicc CXX=mpicxx cmake ${SOURCE_DIR} \ -DARB_VECTORIZE=ON \ -DARB_ARCH=broadwell \ -DARB_WITH_PYTHON=OFF \ @@ -21,28 +29,35 @@ RUN mkdir /arbor/build && cd /arbor/build && \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=/usr && \ make -j$(nproc) tests && \ - /root/libtree/libtree --chrpath --strip \ - -d /root/arbor.bundle \ - /arbor/build/bin/modcc \ - /arbor/build/bin/unit \ - /arbor/build/bin/unit-local \ - /arbor/build/bin/unit-modcc \ - /arbor/build/bin/unit-mpi && \ - rm -rf /arbor + libtree --chrpath --strip \ + -d ${BUNDLE_DIR} \ + ${BUILD_DIR}/bin/modcc \ + ${BUILD_DIR}/bin/unit \ + ${BUILD_DIR}/bin/unit-local \ + ${BUILD_DIR}/bin/unit-modcc \ + ${BUILD_DIR}/bin/unit-mpi && \ + rm -rf ${BUILD_DIR} + +# Only keep the sources for tests, not the git history +RUN rm -rf ${SOURCE_DIR}/.git FROM ubuntu:18.04 +ARG SOURCE_DIR +ARG BUNDLE_DIR + # This is the only thing necessary really from nvidia/cuda's ubuntu18.04 runtime image ENV NVIDIA_VISIBLE_DEVICES all ENV NVIDIA_DRIVER_CAPABILITIES compute,utility ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=396,driver<397 brand=tesla,driver>=410,driver<411" -COPY --from=builder /root/arbor.bundle /root/arbor.bundle +COPY --from=builder ${BUNDLE_DIR} ${BUNDLE_DIR} +COPY --from=builder ${SOURCE_DIR} ${SOURCE_DIR} # Make it easy to call our binaries. -ENV PATH="/root/arbor.bundle/usr/bin:$PATH" +ENV PATH="${BUNDLE_DIR}/usr/bin:$PATH" -RUN echo "/root/arbor.bundle/usr/lib/" > /etc/ld.so.conf.d/arbor.conf && ldconfig +RUN echo "${BUNDLE_DIR}/usr/lib/" > /etc/ld.so.conf.d/arbor.conf && ldconfig -WORKDIR /root/arbor.bundle/usr/bin +WORKDIR ${BUNDLE_DIR}/usr/bin