diff --git a/src/simd/avx.hpp b/src/simd/avx.hpp
index 80c2a26b2ae1797c06b95d66c6ea7cdd3a85f96a..dfef7c0a6f0b2a2aa9826eaaeda29723edc5bd1c 100644
--- a/src/simd/avx.hpp
+++ b/src/simd/avx.hpp
@@ -454,7 +454,10 @@ struct avx_double4: implbase<avx_double4> {
         auto odd = mul(g, horner(gg, P0exp, P1exp, P2exp));
         auto even = horner(gg, Q0exp, Q1exp, Q2exp, Q3exp);
 
-        auto expgm1 = mul(two, div(odd, sub(even, odd)));
+        // Note: multiply by two, *then* divide: avoids a subnormal
+        // intermediate that will get truncated to zero with default
+        // icpc options.
+        auto expgm1 = div(mul(two, odd), sub(even, odd));
 
         // For small x (n zero), bypass scaling step to avoid underflow.
         // Otherwise, compute result 2^n * expgm1 + (2^n-1) by:
@@ -772,7 +775,7 @@ struct avx2_double4: avx_double4 {
         auto odd = mul(g, horner(gg, P0exp, P1exp, P2exp));
         auto even = horner(gg, Q0exp, Q1exp, Q2exp, Q3exp);
 
-        auto expgm1 = mul(two, div(odd, sub(even, odd)));
+        auto expgm1 = div(mul(two, odd), sub(even, odd));
 
         auto nm1 = _mm256_cvtpd_epi32(sub(n, one));
         auto scaled = mul(add(sub(exp2int(nm1), half), ldexp_normal(expgm1, nm1)), two);
diff --git a/src/simd/avx512.hpp b/src/simd/avx512.hpp
index f956bb00e9e96dfae90a259eb78cf0d487cf3664..4b1a877eee82d3053e96bbd044d8d403ab3d0208 100644
--- a/src/simd/avx512.hpp
+++ b/src/simd/avx512.hpp
@@ -614,7 +614,7 @@ struct avx512_double8: implbase<avx512_double8> {
 
         // Compute R(g)/R(-g) -1 = 2*g*P(g^2) / (Q(g^2)-g*P(g^2))
 
-        auto expgm1 = mul(broadcast(2), div(odd, sub(even, odd)));
+        auto expgm1 = div(mul(broadcast(2), odd), sub(even, odd));
 
         // For small x (n zero), bypass scaling step to avoid underflow.
         // Otherwise, compute result 2^n * expgm1 + (2^n-1) by: