diff --git a/include/arbor/math.hpp b/include/arbor/math.hpp index d0b2460065de85fca09c43b0f79a3bd51f0a9970..f8fba17e39868ade737002f55ffa4a8b2d0f1c98 100644 --- a/include/arbor/math.hpp +++ b/include/arbor/math.hpp @@ -5,6 +5,8 @@ #include <type_traits> #include <utility> +#include <arbor/util/compat.hpp> + namespace arb { namespace math { @@ -58,7 +60,7 @@ T constexpr area_sphere(T r) { // Linear interpolation by u in interval [a,b]: (1-u)*a + u*b. template <typename T, typename U> T constexpr lerp(T a, T b, U u) { - return std::fma(u, b, std::fma(-u, a, a)); + return compat::fma(T(u), b, compat::fma(T(-u), a, a)); } // Return -1, 0 or 1 according to sign of parameter. diff --git a/include/arbor/simd/implbase.hpp b/include/arbor/simd/implbase.hpp index ebdf6552d5477197054c053998ad8f5bd05000e3..31046021f8a2f54b2ca7bd0ee48191515306204b 100644 --- a/include/arbor/simd/implbase.hpp +++ b/include/arbor/simd/implbase.hpp @@ -33,6 +33,8 @@ #include <iterator> #include <type_traits> +#include <arbor/util/compat.hpp> + // Derived class I must at minimum provide: // // * specialization of simd_traits. @@ -240,7 +242,7 @@ struct implbase { I::copy_to(w, c); for (unsigned i = 0; i<width; ++i) { - r[i] = std::fma(a[i], b[i], c[i]); + r[i] = compat::fma(a[i], b[i], c[i]); } return I::copy_from(r); } diff --git a/include/arbor/util/compat.hpp b/include/arbor/util/compat.hpp index 026041ba8b1d3065034f72f9eb4d907f55a5886e..5ddf7e3814476cf9935c0a7c9840c3e0720900af 100644 --- a/include/arbor/util/compat.hpp +++ b/include/arbor/util/compat.hpp @@ -37,4 +37,14 @@ inline void compiler_barrier_if_icc_leq(unsigned ver) { #endif } +// Work-around for bad vectorization of fma in gcc version < 8.2 + +template <typename T> +#if defined(__GNUC__) && (100*__GNUC__ + __GNUC_MINOR__ < 802) +__attribute((optimize("no-tree-vectorize"))) +#endif +inline auto fma(T a, T b, T c) { + return std::fma(a, b, c); +} + } // namespace compat diff --git a/test/unit/test_simd.cpp b/test/unit/test_simd.cpp index 0aea5359b5f607ff6f6ce8225b8198a32e6ed067..8b37db6503e5e4b9dc71ee3384a7a27d69b3f9c2 100644 --- a/test/unit/test_simd.cpp +++ b/test/unit/test_simd.cpp @@ -7,6 +7,7 @@ #include <arbor/simd/simd.hpp> #include <arbor/simd/avx.hpp> +#include <arbor/util/compat.hpp> #include "common.hpp" @@ -266,7 +267,7 @@ TYPED_TEST_P(simd_value, arithmetic) { for (unsigned i = 0; i<N; ++i) u_divide_v[i] = u[i]/v[i]; scalar fma_u_v_w[N]; - for (unsigned i = 0; i<N; ++i) fma_u_v_w[i] = std::fma(u[i],v[i],w[i]); + for (unsigned i = 0; i<N; ++i) fma_u_v_w[i] = compat::fma(u[i],v[i],w[i]); simd us(u), vs(v), ws(w); diff --git a/test/validation/interpolate.hpp b/test/validation/interpolate.hpp index 54d5da0a7c7e30822b682dca2a2d5b5dbdb6381c..bb6ec2423099e39737c175ed945cb42452507431 100644 --- a/test/validation/interpolate.hpp +++ b/test/validation/interpolate.hpp @@ -2,9 +2,11 @@ #include <cmath> +#include <arbor/util/compat.hpp> + template <typename T, typename U> inline T lerp(T a, T b, U u) { - return std::fma(u, b, std::fma(-u, a, a)); + return compat::fma(T(u), b, compat::fma(T(-u), a, a)); } // Piece-wise linear interpolation across a sequence of points (u_i, x_i),