From 120316d0e46e5fed18cd472433239a893953e5e0 Mon Sep 17 00:00:00 2001
From: Sam Yates <yates@cscs.ch>
Date: Wed, 22 Aug 2018 10:59:40 +0200
Subject: [PATCH] Work-around for gcc version < 8.2 versus std::fma (#572)

Use a compat::fma wrapper for std::fma to avoid a bug in the tree optimizer in GCC version < 8.2.

See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87046
Fixes #568.
---
 include/arbor/math.hpp          |  4 +++-
 include/arbor/simd/implbase.hpp |  4 +++-
 include/arbor/util/compat.hpp   | 10 ++++++++++
 test/unit/test_simd.cpp         |  3 ++-
 test/validation/interpolate.hpp |  4 +++-
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/include/arbor/math.hpp b/include/arbor/math.hpp
index d0b24600..f8fba17e 100644
--- a/include/arbor/math.hpp
+++ b/include/arbor/math.hpp
@@ -5,6 +5,8 @@
 #include <type_traits>
 #include <utility>
 
+#include <arbor/util/compat.hpp>
+
 namespace arb {
 namespace math {
 
@@ -58,7 +60,7 @@ T constexpr area_sphere(T r) {
 // Linear interpolation by u in interval [a,b]: (1-u)*a + u*b.
 template <typename T, typename U>
 T constexpr lerp(T a, T b, U u) {
-    return std::fma(u, b, std::fma(-u, a, a));
+    return compat::fma(T(u), b, compat::fma(T(-u), a, a));
 }
 
 // Return -1, 0 or 1 according to sign of parameter.
diff --git a/include/arbor/simd/implbase.hpp b/include/arbor/simd/implbase.hpp
index ebdf6552..31046021 100644
--- a/include/arbor/simd/implbase.hpp
+++ b/include/arbor/simd/implbase.hpp
@@ -33,6 +33,8 @@
 #include <iterator>
 #include <type_traits>
 
+#include <arbor/util/compat.hpp>
+
 // Derived class I must at minimum provide:
 //
 // * specialization of simd_traits.
@@ -240,7 +242,7 @@ struct implbase {
         I::copy_to(w, c);
 
         for (unsigned i = 0; i<width; ++i) {
-            r[i] = std::fma(a[i], b[i], c[i]);
+            r[i] = compat::fma(a[i], b[i], c[i]);
         }
         return I::copy_from(r);
     }
diff --git a/include/arbor/util/compat.hpp b/include/arbor/util/compat.hpp
index 026041ba..5ddf7e38 100644
--- a/include/arbor/util/compat.hpp
+++ b/include/arbor/util/compat.hpp
@@ -37,4 +37,14 @@ inline void compiler_barrier_if_icc_leq(unsigned ver) {
 #endif
 }
 
+// Work-around for bad vectorization of fma in gcc version < 8.2
+
+template <typename T>
+#if defined(__GNUC__) && (100*__GNUC__ + __GNUC_MINOR__ < 802)
+__attribute((optimize("no-tree-vectorize")))
+#endif
+inline auto fma(T a, T b, T c) {
+    return std::fma(a, b, c);
+}
+
 } // namespace compat
diff --git a/test/unit/test_simd.cpp b/test/unit/test_simd.cpp
index 0aea5359..8b37db65 100644
--- a/test/unit/test_simd.cpp
+++ b/test/unit/test_simd.cpp
@@ -7,6 +7,7 @@
 
 #include <arbor/simd/simd.hpp>
 #include <arbor/simd/avx.hpp>
+#include <arbor/util/compat.hpp>
 
 #include "common.hpp"
 
@@ -266,7 +267,7 @@ TYPED_TEST_P(simd_value, arithmetic) {
         for (unsigned i = 0; i<N; ++i) u_divide_v[i] = u[i]/v[i];
 
         scalar fma_u_v_w[N];
-        for (unsigned i = 0; i<N; ++i) fma_u_v_w[i] = std::fma(u[i],v[i],w[i]);
+        for (unsigned i = 0; i<N; ++i) fma_u_v_w[i] = compat::fma(u[i],v[i],w[i]);
 
         simd us(u), vs(v), ws(w);
 
diff --git a/test/validation/interpolate.hpp b/test/validation/interpolate.hpp
index 54d5da0a..bb6ec242 100644
--- a/test/validation/interpolate.hpp
+++ b/test/validation/interpolate.hpp
@@ -2,9 +2,11 @@
 
 #include <cmath>
 
+#include <arbor/util/compat.hpp>
+
 template <typename T, typename U>
 inline T lerp(T a, T b, U u) {
-    return std::fma(u, b, std::fma(-u, a, a));
+    return compat::fma(T(u), b, compat::fma(T(-u), a, a));
 }
 
 // Piece-wise linear interpolation across a sequence of points (u_i, x_i),
-- 
GitLab