aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2018-11-30 14:32:06 +0100
committerGravatar Gael Guennebaud <g.gael@free.fr>2018-11-30 14:32:06 +0100
commitf91500d3035fd34683210eea6064b95a7aad4306 (patch)
tree3eb4c20b6857c1ca36471c462f8e6d6c9bbe8068 /Eigen
parentb477d60bc604dd8970380e252f8ed3a6021bc081 (diff)
Fix pandnot order in AVX512
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/Core2
-rw-r--r--Eigen/src/Core/arch/AVX512/PacketMath.h10
2 files changed, 7 insertions, 5 deletions
diff --git a/Eigen/Core b/Eigen/Core
index 41529bb63..bc6cf8a96 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -154,8 +154,10 @@ using std::ptrdiff_t;
#if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/TypeCasting.h"
+ #include "src/Core/arch/SSE/Complex.h"
#include "src/Core/arch/AVX/PacketMath.h"
#include "src/Core/arch/AVX/TypeCasting.h"
+ #include "src/Core/arch/AVX/Complex.h"
#include "src/Core/arch/AVX512/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h"
#include "src/Core/arch/AVX/MathFunctions.h"
diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h
index 86cefba92..9a053fb1a 100644
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -393,24 +393,24 @@ template <>
EIGEN_STRONG_INLINE Packet16f pandnot<Packet16f>(const Packet16f& a,
const Packet16f& b) {
#ifdef EIGEN_VECTORIZE_AVX512DQ
- return _mm512_andnot_ps(a, b);
+ return _mm512_andnot_ps(b, a);
#else
Packet16f res = _mm512_undefined_ps();
Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0);
Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0);
- res = _mm512_insertf32x4(res, _mm_andnot_ps(lane0_a, lane0_b), 0);
+ res = _mm512_insertf32x4(res, pandnot(lane0_a, lane0_b), 0);
Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1);
Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1);
- res = _mm512_insertf32x4(res, _mm_andnot_ps(lane1_a, lane1_b), 1);
+ res = _mm512_insertf32x4(res, pandnot(lane1_a, lane1_b), 1);
Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2);
Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2);
- res = _mm512_insertf32x4(res, _mm_andnot_ps(lane2_a, lane2_b), 2);
+ res = _mm512_insertf32x4(res, pandnot(lane2_a, lane2_b), 2);
Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3);
Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3);
- res = _mm512_insertf32x4(res, _mm_andnot_ps(lane3_a, lane3_b), 3);
+ res = _mm512_insertf32x4(res, pandnot(lane3_a, lane3_b), 3);
return res;
#endif