From 7b2b5209bfa4d3231356d462c88af77adce2e35f Mon Sep 17 00:00:00 2001 From: Alexander Peyser <apeyser@users.noreply.github.com> Date: Tue, 7 Feb 2017 18:57:05 +0100 Subject: [PATCH] Extra profiling (#148) Add some finer grained profiling to track exactly what time is spent inside of mpi calls. --- src/communication/mpi.cpp | 8 ++++++++ src/communication/mpi.hpp | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/communication/mpi.cpp b/src/communication/mpi.cpp index 0481ec15..9d08fb49 100644 --- a/src/communication/mpi.cpp +++ b/src/communication/mpi.cpp @@ -16,15 +16,21 @@ void init(int *argc, char ***argv) { int provided; // initialize with thread serialized level of thread safety + PE("MPI", "Init"); MPI_Init_thread(argc, argv, MPI_THREAD_SERIALIZED, &provided); assert(provided>=MPI_THREAD_SERIALIZED); + PL(2); + PE("rank-size"); MPI_Comm_rank(MPI_COMM_WORLD, &state::rank); MPI_Comm_size(MPI_COMM_WORLD, &state::size); + PL(); } void finalize() { + PE("MPI", "Finalize"); MPI_Finalize(); + PL(2); } bool is_root() { @@ -49,7 +55,9 @@ bool ballot(bool vote) { char result; char value = vote ? 1 : 0; + PE("MPI", "Allreduce-ballot"); MPI_Allreduce(&value, &result, 1, traits::mpi_type(), MPI_LAND, MPI_COMM_WORLD); + PL(2); return result; } diff --git a/src/communication/mpi.hpp b/src/communication/mpi.hpp index 472c6403..34b08864 100644 --- a/src/communication/mpi.hpp +++ b/src/communication/mpi.hpp @@ -12,6 +12,8 @@ #include <algorithms.hpp> #include <communication/gathered_vector.hpp> #include <util/debug.hpp> +#include <profiling/profiler.hpp> + namespace nest { namespace mc { @@ -71,9 +73,11 @@ namespace mpi { auto buffer_size = (rank()==root) ? size() : 0; std::vector<T> buffer(buffer_size); + PE("MPI", "Gather"); MPI_Gather( &value, traits::count(), traits::mpi_type(), // send buffer buffer.data(), traits::count(), traits::mpi_type(), // receive buffer root, MPI_COMM_WORLD); + PL(2); return buffer; } @@ -90,9 +94,11 @@ namespace mpi { using traits = mpi_traits<T>; std::vector<T> buffer(size()); + PE("MPI", "Allgather"); MPI_Allgather( &value, traits::count(), traits::mpi_type(), // send buffer buffer.data(), traits::count(), traits::mpi_type(), // receive buffer MPI_COMM_WORLD); + PL(2); return buffer; } @@ -112,6 +118,7 @@ namespace mpi { std::vector<T> buffer(displs.back()/traits::count()); + PE("MPI", "Allgatherv"); MPI_Allgatherv( // send buffer values.data(), counts[rank()], traits::mpi_type(), @@ -119,6 +126,7 @@ namespace mpi { buffer.data(), counts.data(), displs.data(), traits::mpi_type(), MPI_COMM_WORLD ); + PL(2); return buffer; } @@ -142,6 +150,7 @@ namespace mpi { std::vector<T> buffer(displs.back()/traits::count()); + PE("MPI", "Allgatherv-partition"); MPI_Allgatherv( // send buffer values.data(), counts[rank()], traits::mpi_type(), @@ -149,6 +158,7 @@ namespace mpi { buffer.data(), counts.data(), displs.data(), traits::mpi_type(), MPI_COMM_WORLD ); + PL(2); for (auto& d : displs) { d /= traits::count(); @@ -169,7 +179,9 @@ namespace mpi { T result; + PE("MPI", "Reduce"); MPI_Reduce(&value, &result, 1, traits::mpi_type(), op, root, MPI_COMM_WORLD); + PL(2); return result; } @@ -183,7 +195,9 @@ namespace mpi { T result; + PE("MPI", "Allreduce"); MPI_Allreduce(&value, &result, 1, traits::mpi_type(), op, MPI_COMM_WORLD); + PL(2); return result; } @@ -206,7 +220,9 @@ namespace mpi { using traits = mpi_traits<T>; + PE("MPI", "Bcast"); MPI_Bcast(&value, traits::count(), traits::mpi_type(), root, MPI_COMM_WORLD); + PL(2); return value; } @@ -220,7 +236,9 @@ namespace mpi { using traits = mpi_traits<T>; T value; + PE("MPI", "Bcast-void"); MPI_Bcast(&value, traits::count(), traits::mpi_type(), root, MPI_COMM_WORLD); + PL(2); return value; } -- GitLab