From 120316d0e46e5fed18cd472433239a893953e5e0 Mon Sep 17 00:00:00 2001 From: Sam Yates <yates@cscs.ch> Date: Wed, 22 Aug 2018 10:59:40 +0200 Subject: [PATCH] Work-around for gcc version < 8.2 versus std::fma (#572) Use a compat::fma wrapper for std::fma to avoid a bug in the tree optimizer in GCC version < 8.2. See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87046 Fixes #568. --- include/arbor/math.hpp | 4 +++- include/arbor/simd/implbase.hpp | 4 +++- include/arbor/util/compat.hpp | 10 ++++++++++ test/unit/test_simd.cpp | 3 ++- test/validation/interpolate.hpp | 4 +++- 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/arbor/math.hpp b/include/arbor/math.hpp index d0b24600..f8fba17e 100644 --- a/include/arbor/math.hpp +++ b/include/arbor/math.hpp @@ -5,6 +5,8 @@ #include <type_traits> #include <utility> +#include <arbor/util/compat.hpp> + namespace arb { namespace math { @@ -58,7 +60,7 @@ T constexpr area_sphere(T r) { // Linear interpolation by u in interval [a,b]: (1-u)*a + u*b. template <typename T, typename U> T constexpr lerp(T a, T b, U u) { - return std::fma(u, b, std::fma(-u, a, a)); + return compat::fma(T(u), b, compat::fma(T(-u), a, a)); } // Return -1, 0 or 1 according to sign of parameter. diff --git a/include/arbor/simd/implbase.hpp b/include/arbor/simd/implbase.hpp index ebdf6552..31046021 100644 --- a/include/arbor/simd/implbase.hpp +++ b/include/arbor/simd/implbase.hpp @@ -33,6 +33,8 @@ #include <iterator> #include <type_traits> +#include <arbor/util/compat.hpp> + // Derived class I must at minimum provide: // // * specialization of simd_traits. @@ -240,7 +242,7 @@ struct implbase { I::copy_to(w, c); for (unsigned i = 0; i<width; ++i) { - r[i] = std::fma(a[i], b[i], c[i]); + r[i] = compat::fma(a[i], b[i], c[i]); } return I::copy_from(r); } diff --git a/include/arbor/util/compat.hpp b/include/arbor/util/compat.hpp index 026041ba..5ddf7e38 100644 --- a/include/arbor/util/compat.hpp +++ b/include/arbor/util/compat.hpp @@ -37,4 +37,14 @@ inline void compiler_barrier_if_icc_leq(unsigned ver) { #endif } +// Work-around for bad vectorization of fma in gcc version < 8.2 + +template <typename T> +#if defined(__GNUC__) && (100*__GNUC__ + __GNUC_MINOR__ < 802) +__attribute((optimize("no-tree-vectorize"))) +#endif +inline auto fma(T a, T b, T c) { + return std::fma(a, b, c); +} + } // namespace compat diff --git a/test/unit/test_simd.cpp b/test/unit/test_simd.cpp index 0aea5359..8b37db65 100644 --- a/test/unit/test_simd.cpp +++ b/test/unit/test_simd.cpp @@ -7,6 +7,7 @@ #include <arbor/simd/simd.hpp> #include <arbor/simd/avx.hpp> +#include <arbor/util/compat.hpp> #include "common.hpp" @@ -266,7 +267,7 @@ TYPED_TEST_P(simd_value, arithmetic) { for (unsigned i = 0; i<N; ++i) u_divide_v[i] = u[i]/v[i]; scalar fma_u_v_w[N]; - for (unsigned i = 0; i<N; ++i) fma_u_v_w[i] = std::fma(u[i],v[i],w[i]); + for (unsigned i = 0; i<N; ++i) fma_u_v_w[i] = compat::fma(u[i],v[i],w[i]); simd us(u), vs(v), ws(w); diff --git a/test/validation/interpolate.hpp b/test/validation/interpolate.hpp index 54d5da0a..bb6ec242 100644 --- a/test/validation/interpolate.hpp +++ b/test/validation/interpolate.hpp @@ -2,9 +2,11 @@ #include <cmath> +#include <arbor/util/compat.hpp> + template <typename T, typename U> inline T lerp(T a, T b, U u) { - return std::fma(u, b, std::fma(-u, a, a)); + return compat::fma(T(u), b, compat::fma(T(-u), a, a)); } // Piece-wise linear interpolation across a sequence of points (u_i, x_i), -- GitLab