From 5574e05fdc95986d3c32a71988bb2d9a4d38767d Mon Sep 17 00:00:00 2001 From: noraabiakar <nora.abiakar@gmail.com> Date: Mon, 14 Jan 2019 09:05:36 +0100 Subject: [PATCH] fix avx512 gather specialization (#670) * Fix incorrect specialization of AVX512 gather in SIMD library. Related to #637. Improves avx512 performance on Intel Xeon Gold 6130. --- include/arbor/simd/avx512.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/arbor/simd/avx512.hpp b/include/arbor/simd/avx512.hpp index 0f062d11..e00871d4 100644 --- a/include/arbor/simd/avx512.hpp +++ b/include/arbor/simd/avx512.hpp @@ -381,19 +381,19 @@ struct avx512_int8: implbase<avx512_int8> { // Specialized 8-wide gather and scatter for avx512_int8 implementation. - static __m512i gather(avx512_int8, const int32* p, const __m512i& index) { + static __m512i gather(tag<avx512_int8>, const int32* p, const __m512i& index) { return _mm512_mask_i32gather_epi32(_mm512_setzero_epi32(), lo(), index, p, 4); } - static __m512i gather(avx512_int8, __m512i a, const int32* p, const __m512i& index, const __mmask8& mask) { + static __m512i gather(tag<avx512_int8>, __m512i a, const int32* p, const __m512i& index, const __mmask8& mask) { return _mm512_mask_i32gather_epi32(a, mask, index, p, 4); } - static void scatter(avx512_int8, const __m512i& s, int32* p, const __m512i& index) { + static void scatter(tag<avx512_int8>, const __m512i& s, int32* p, const __m512i& index) { _mm512_mask_i32scatter_epi32(p, lo(), index, s, 4); } - static void scatter(avx512_int8, const __m512i& s, int32* p, const __m512i& index, const __mmask8& mask) { + static void scatter(tag<avx512_int8>, const __m512i& s, int32* p, const __m512i& index, const __mmask8& mask) { _mm512_mask_i32scatter_epi32(p, mask, index, s, 4); } }; @@ -561,19 +561,19 @@ struct avx512_double8: implbase<avx512_double8> { // Specialized 8-wide gather and scatter for avx512_int8 implementation. - static __m512d gather(avx512_int8, const double* p, const __m512i& index) { + static __m512d gather(tag<avx512_int8>, const double* p, const __m512i& index) { return _mm512_i32gather_pd(_mm512_castsi512_si256(index), p, 8); } - static __m512d gather(avx512_int8, __m512d a, const double* p, const __m512i& index, const __mmask8& mask) { + static __m512d gather(tag<avx512_int8>, __m512d a, const double* p, const __m512i& index, const __mmask8& mask) { return _mm512_mask_i32gather_pd(a, mask, _mm512_castsi512_si256(index), p, 8); } - static void scatter(avx512_int8, const __m512d& s, double* p, const __m512i& index) { + static void scatter(tag<avx512_int8>, const __m512d& s, double* p, const __m512i& index) { _mm512_i32scatter_pd(p, _mm512_castsi512_si256(index), s, 8); } - static void scatter(avx512_int8, const __m512d& s, double* p, const __m512i& index, const __mmask8& mask) { + static void scatter(tag<avx512_int8>, const __m512d& s, double* p, const __m512i& index, const __mmask8& mask) { _mm512_mask_i32scatter_pd(p, mask, _mm512_castsi512_si256(index), s, 8); } -- GitLab