diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..4e99f1d65578237bcb3cafdbe1fdb861510a6a83 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tests/ubench/google-benchmark"] + path = tests/ubench/google-benchmark + url = https://github.com/google/benchmark diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 97638a34f782bed57cd394e38fbec014363363a6..d5a9353348015c2bfd098ac28db038e194114ec2 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,6 +13,10 @@ add_subdirectory(global_communication) # Tests for performance: This could include stand alone tests. These do not necessarily be run automatically add_subdirectory(performance) +# Microbenchmarks. +add_subdirectory(ubench) + + # modcc tests if(NOT use_external_modcc) add_subdirectory(modcc) diff --git a/tests/ubench/CMakeLists.txt b/tests/ubench/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ccbe839945944c276f3fab4c580887c4fc5a248 --- /dev/null +++ b/tests/ubench/CMakeLists.txt @@ -0,0 +1,51 @@ +include(ExternalProject) + +# List of micro benchmarks to build. + +set(bench_sources + accumulate_functor_values.cpp) + +# Set up google benchmark as an external project. + +set(gbench_src_dir "${CMAKE_CURRENT_SOURCE_DIR}/google-benchmark") +set(gbench_install_dir "${PROJECT_BINARY_DIR}/gbench") + +set(gbench_cmake_args + "-DCMAKE_BUILD_TYPE=release" + "-DCMAKE_INSTALL_PREFIX=${gbench_install_dir}" + "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" + "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") + + +# Attempt to update git submodule if required. +find_package(Git) +if(NOT EXISTS "${gbench_src_dir}/.git") + if(GIT_FOUND) + exec_program("${GIT_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}" + ARGS submodule update --init google-benchmark) + else() + message(WARNING "Unable to update the google-benchmark submodule: git not found.") + endif() +endif() + +ExternalProject_Add(gbench + SOURCE_DIR "${gbench_src_dir}" + CMAKE_ARGS "${gbench_cmake_args}" + INSTALL_DIR "${gbench_install_dir}" +) +set_target_properties(gbench PROPERTIES EXCLUDE_FROM_ALL TRUE) + +# Build benches. + +foreach(bench_src ${bench_sources}) + string(REGEX REPLACE "\\.[^.]*$" "" bench_exe "${bench_src}") + add_executable("${bench_exe}" EXCLUDE_FROM_ALL "${bench_src}") + add_dependencies("${bench_exe}" gbench) + target_include_directories("${bench_exe}" PRIVATE "${gbench_install_dir}/include") + target_link_libraries("${bench_exe}" "${gbench_install_dir}/lib/libbenchmark.a") + + list(APPEND bench_exe_list ${bench_exe}) +endforeach() + +add_custom_target(ubenches DEPENDS ${bench_exe_list}) + diff --git a/tests/ubench/README.md b/tests/ubench/README.md new file mode 100644 index 0000000000000000000000000000000000000000..06e6d5afbb6a819414f70b4bc96f84898125a4c2 --- /dev/null +++ b/tests/ubench/README.md @@ -0,0 +1,65 @@ +# Library microbenchmarks + +The benchmarks here are intended to: +* answer questions regarding choices of implementation in the library where performance is a concern; +* track the performance behaviour of isolated bits of library functionality across different platforms. + + +## Building and running + +The micro-benchmarks are not built by default. After configuring CMake, they can be built with +`make ubenches`. Each benchmark is provided by a stand-alone C++ source file in `tests/ubench`; +the resulting executables are found in `test/ubench` relative to the build directory. + +[Google benchmark](https://github.com/google/benchmark) is used as a harness. It is included +in the repository via a git submodule, and the provided CMake scripts will attempt to +run `git submodule update --init` on the submodule if it appears not to have been instantiated. + + +## Adding new benchmarks + +New benchmarks are added by placing the corresponding implementation as a stand-alone +`.cpp` file in `tests/ubench` and adding the name of this file to the list `bench_sources` +in `tests/ubench/CMakeLists.txt`. + +Each new benchmark should also have a corresponding entry in this `README.md`, describing +the motivation for the test and summarising at least one benchmark result. + +Results in this file are destined to become out of date; we should consider some form +of semi-automated registration of results in a database should the number of benchmarks +become otherwise unwieldy. + + +## Benchmarks + +### `accumulate_functor_values` + +#### Motivation + +The problem arises when constructing the partition of an integral range where the sizes of each +sub-interval are given by a function of the index. This requires the computation of the sizes +> d<sub><i>i</i></sub> = Σ<sub><i>j</i><<i>i</i></sub> <i>f</i>(<i>j</i>). + +One approach using the provided range utilities is to use `std::partial_sum` with +`util::transform_view` and `util::span`; the other is to simply write a loop that +performs the accumulation directly. What is the extra cost, if any, of the +transform-based approach? + +The micro-benchmark compares the two implementations, where the function is a simple +integer square operation, called either via a function pointer or a functional object. + +#### Results + +Results here are presented only for vector size _n_ equal to 1024. + +Platform: +* Xeon E3-1220 v2 with base clock 3.1 GHz and max clock 3.5 GHz. +* Linux 4.4.34 +* gcc version 6.2.0 +* clang version 3.8.1 + +| Compiler | direct/function | transform/function | direct/object | transform/object | +|:------------|----------------:|-------------------:|--------------:|-----------------:| +| g++ -O3 | 907 ns | 2090 ns | 907 ns | 614 ns | +| clang++ -O3 | 1063 ns | 533 ns | 1051 ns | 532 ns | + diff --git a/tests/ubench/accumulate_functor_values.cpp b/tests/ubench/accumulate_functor_values.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19fa94a61ccb34fc9b768b55a961946d2b958512 --- /dev/null +++ b/tests/ubench/accumulate_functor_values.cpp @@ -0,0 +1,87 @@ +// Compare implementations of partial summation of the f(i) for i=1..n, +// for a simple square function. + +// Explicitly undef NDEBUG for assert below. +#undef NDEBUG + +#include <cassert> +#include <numeric> +#include <vector> + +#include <benchmark/benchmark.h> + +#include <util/span.hpp> +#include <util/transform.hpp> + +#define NOINLINE __attribute__((noinline)) + +using namespace nest::mc; + +inline long long square_function(long long x) { return x*x; } + +struct square_object { + long long operator()(long long x) const { return x*x; } +}; + +using result_vec = std::vector<long long>; + +template <typename Func> +void partial_sums_direct(Func f, int upto, result_vec& psum) { + long long sum = 0; + for (int i=1; i<=upto; ++i) { + sum += f(i); + psum[i-1] = sum; + } +} + +template <typename Func> +void partial_sums_transform(Func f, int upto, result_vec& psum) { + auto nums = util::span<long long>(1, upto+1); + auto values = util::transform_view(nums, f); + std::partial_sum(values.begin(), values.end(), psum.begin()); +} + +template <typename Impl> +void bench_generic(benchmark::State& state, const Impl& impl) { + int upto = state.range(0); + result_vec psum(upto); + + while (state.KeepRunning()) { + impl(upto, psum); + benchmark::ClobberMemory(); + } + + // validate result + auto sum_squares_to = [](long long x) {return (2*x*x*x+3*x*x+x)/6; }; + for (int i = 0; i<upto; ++i) { + assert(sum_squares_to(i+1)==psum[i]); + } +} + +void accum_direct_function(benchmark::State& state) { + bench_generic(state, + [](int upto, result_vec& psum) { partial_sums_direct(square_function, upto, psum); }); +} + +void accum_direct_object(benchmark::State& state) { + bench_generic(state, + [](int upto, result_vec& psum) { partial_sums_direct(square_object{}, upto, psum); }); +} + +void accum_transform_function(benchmark::State& state) { + bench_generic(state, + [](int upto, result_vec& psum) { partial_sums_transform(square_function, upto, psum); }); +} + +void accum_transform_object(benchmark::State& state) { + bench_generic(state, + [](int upto, result_vec& psum) { partial_sums_transform(square_object{}, upto, psum); }); +} + +BENCHMARK(accum_direct_function)->Range(64, 1024); +BENCHMARK(accum_transform_function)->Range(64, 1024); +BENCHMARK(accum_direct_object)->Range(64, 1024); +BENCHMARK(accum_transform_object)->Range(64, 1024); + +BENCHMARK_MAIN(); + diff --git a/tests/ubench/google-benchmark b/tests/ubench/google-benchmark new file mode 160000 index 0000000000000000000000000000000000000000..9a5072d1bf9187b32ce9a88842dffa31ef416442 --- /dev/null +++ b/tests/ubench/google-benchmark @@ -0,0 +1 @@ +Subproject commit 9a5072d1bf9187b32ce9a88842dffa31ef416442