aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/AVX512/Complex.h
diff options
context:
space:
mode:
authorGravatar Mark D Ryan <mark.d.ryan@intel.com>2019-01-03 14:33:04 +0100
committerGravatar Mark D Ryan <mark.d.ryan@intel.com>2019-01-03 14:33:04 +0100
commitbc5dd4cafd5c4e29b6bf1cd3bf532bac407248bb (patch)
tree130a5f59935381e00a2b7c270304859496681212 /Eigen/src/Core/arch/AVX512/Complex.h
parent697fba3bb0f09ecef77a23703df68956880ec7dd (diff)
PR560: Fix the AVX512f only builds
Commit c53eececb0415834b961cb61cd466907261b4b2f introduced AVX512 support for complex numbers but required avx512dq to build. Commit 1d683ae2f5a340a6e2681c8cd0782f4db6b807ea fixed some but not, it would seem all, of the hard avx512dq dependencies. Build failures are still evident on Eigen and TensorFlow when compiling with just avx512f and no avx512dq using gcc 7.3. Looking at the code there does indeed seem to be a problem. Commit c53eececb0415834b961cb61cd466907261b4b2f calls avx512dq intrinsics directly, e.g, _mm512_extractf32x8_ps and _mm512_and_ps. This commit fixes the issue by replacing the direct intrinsic calls with the various wrapper functions that are safe to use on avx512f only builds.
Diffstat (limited to 'Eigen/src/Core/arch/AVX512/Complex.h')
-rw-r--r--Eigen/src/Core/arch/AVX512/Complex.h32
1 files changed, 16 insertions, 16 deletions
diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h
index 247f89860..42cdfcd25 100644
--- a/Eigen/src/Core/arch/AVX512/Complex.h
+++ b/Eigen/src/Core/arch/AVX512/Complex.h
@@ -66,7 +66,7 @@ template<> EIGEN_STRONG_INLINE Packet8cf pconj(const Packet8cf& a)
const __m512 mask = _mm512_castsi512_ps(_mm512_setr_epi32(
0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,
0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
- return Packet8cf(_mm512_xor_ps(a.v,mask));
+ return Packet8cf(pxor(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet8cf pmul<Packet8cf>(const Packet8cf& a, const Packet8cf& b)
@@ -75,10 +75,10 @@ template<> EIGEN_STRONG_INLINE Packet8cf pmul<Packet8cf>(const Packet8cf& a, con
return Packet8cf(_mm512_fmaddsub_ps(_mm512_moveldup_ps(a.v), b.v, tmp2));
}
-template<> EIGEN_STRONG_INLINE Packet8cf pand <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_and_ps(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet8cf por <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_or_ps(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet8cf pxor <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_xor_ps(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet8cf pandnot<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(_mm512_andnot_ps(b.v,a.v)); }
+template<> EIGEN_STRONG_INLINE Packet8cf pand <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(pand(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet8cf por <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(por(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet8cf pxor <Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(pxor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet8cf pandnot<Packet8cf>(const Packet8cf& a, const Packet8cf& b) { return Packet8cf(pandnot(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet8cf pload <Packet8cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet8cf(pload<Packet16f>(&numext::real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet8cf ploadu<Packet8cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet8cf(ploadu<Packet16f>(&numext::real_ref(*from))); }
@@ -124,20 +124,20 @@ template<> EIGEN_STRONG_INLINE Packet8cf preverse(const Packet8cf& a) {
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet8cf>(const Packet8cf& a)
{
- return predux(padd(Packet4cf(_mm512_extractf32x8_ps(a.v,0)),
- Packet4cf(_mm512_extractf32x8_ps(a.v,1))));
+ return predux(padd(Packet4cf(extract256<0>(a.v)),
+ Packet4cf(extract256<1>(a.v))));
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet8cf>(const Packet8cf& a)
{
- return predux_mul(pmul(Packet4cf(_mm512_extractf32x8_ps(a.v, 0)),
- Packet4cf(_mm512_extractf32x8_ps(a.v, 1))));
+ return predux_mul(pmul(Packet4cf(extract256<0>(a.v)),
+ Packet4cf(extract256<1>(a.v))));
}
template <>
EIGEN_STRONG_INLINE Packet4cf predux_half_dowto4<Packet8cf>(const Packet8cf& a) {
- __m256 lane0 = _mm512_extractf32x8_ps(a.v, 0);
- __m256 lane1 = _mm512_extractf32x8_ps(a.v, 1);
+ __m256 lane0 = extract256<0>(a.v);
+ __m256 lane1 = extract256<1>(a.v);
__m256 res = _mm256_add_ps(lane0, lane1);
return Packet4cf(res);
}
@@ -262,10 +262,10 @@ template<> EIGEN_STRONG_INLINE Packet4cd pmul<Packet4cd>(const Packet4cd& a, con
return Packet4cd(_mm512_fmaddsub_pd(tmp1, b.v, odd));
}
-template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_and_pd(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_or_pd(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_xor_pd(a.v,b.v)); }
-template<> EIGEN_STRONG_INLINE Packet4cd pandnot<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(_mm512_andnot_pd(b.v,a.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cd pand <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pand(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cd por <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(por(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cd pxor <Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pxor(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet4cd pandnot<Packet4cd>(const Packet4cd& a, const Packet4cd& b) { return Packet4cd(pandnot(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cd pload <Packet4cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet4cd(pload<Packet8d>((const double*)from)); }
@@ -308,7 +308,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet4c
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Packet4cd& a)
{
- __m128d low = _mm512_extractf64x2_pd(a.v, 0);
+ __m128d low = extract128<0>(a.v);
EIGEN_ALIGN16 double res[2];
_mm_store_pd(res, low);
return std::complex<double>(res[0],res[1]);