aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/eigen3
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-04 09:01:52 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-08 13:40:21 -0800
commit11c7506bbb351cac785e139a7982fad49f3f1022 (patch)
tree068b093276cf1918eee0a8545e9252781fddc81f /third_party/eigen3
parent0430539223cd9dd8108a13cbd62ceb6bfe1fa894 (diff)
Fixed the fixed point code to make sure it compiles when AVX2
instructions are enabled
Diffstat (limited to 'third_party/eigen3')
-rw-r--r--third_party/eigen3/Eigen/Cholesky2
-rw-r--r--third_party/eigen3/Eigen/Core2
-rw-r--r--third_party/eigen3/Eigen/Eigenvalues3
-rw-r--r--third_party/eigen3/Eigen/LU2
-rw-r--r--third_party/eigen3/Eigen/QR2
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint1
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/Tensor3
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h79
8 files changed, 30 insertions, 64 deletions
diff --git a/third_party/eigen3/Eigen/Cholesky b/third_party/eigen3/Eigen/Cholesky
index ef31fc971b..637246f775 100644
--- a/third_party/eigen3/Eigen/Cholesky
+++ b/third_party/eigen3/Eigen/Cholesky
@@ -1 +1 @@
-#include "eigen-eigen-8cd7c2c6e9e1/Eigen/Cholesky" \ No newline at end of file
+#include "eigen-eigen-fa22401ededc/Eigen/Cholesky"
diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core
index a330b6166f..d200142819 100644
--- a/third_party/eigen3/Eigen/Core
+++ b/third_party/eigen3/Eigen/Core
@@ -1 +1 @@
-#include "eigen-eigen-8cd7c2c6e9e1/Eigen/Core"
+#include "eigen-eigen-fa22401ededc/Eigen/Core"
diff --git a/third_party/eigen3/Eigen/Eigenvalues b/third_party/eigen3/Eigen/Eigenvalues
index 30158ba1ea..681a1c80d1 100644
--- a/third_party/eigen3/Eigen/Eigenvalues
+++ b/third_party/eigen3/Eigen/Eigenvalues
@@ -1 +1,2 @@
-#include "eigen-eigen-8cd7c2c6e9e1/Eigen/Eigenvalues"
+#include "eigen-eigen-fa22401ededc/Eigen/Eigenvalues"
+
diff --git a/third_party/eigen3/Eigen/LU b/third_party/eigen3/Eigen/LU
index 5637771a51..b79d709e47 100644
--- a/third_party/eigen3/Eigen/LU
+++ b/third_party/eigen3/Eigen/LU
@@ -1 +1 @@
-#include "eigen-eigen-8cd7c2c6e9e1/Eigen/LU"
+#include "eigen-eigen-fa22401ededc/Eigen/LU"
diff --git a/third_party/eigen3/Eigen/QR b/third_party/eigen3/Eigen/QR
index 360ba8e5e3..2dba67e4b6 100644
--- a/third_party/eigen3/Eigen/QR
+++ b/third_party/eigen3/Eigen/QR
@@ -1 +1 @@
-#include "eigen-eigen-8cd7c2c6e9e1/Eigen/QR"
+#include "eigen-eigen-fa22401ededc/Eigen/QR"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
index 35b55de46d..9d6b9c3f01 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
@@ -33,7 +33,6 @@
// Use optimized implementations whenever available
#ifdef EIGEN_VECTORIZE_AVX2
#define EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT
-#include "src/Tensor/TensorContractionThreadPool.h"
#include "src/FixedPoint/PacketMathAVX2.h"
#include "src/FixedPoint/MatMatProductAVX2.h"
#include "src/FixedPoint/TypeCastingAVX2.h"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
index eb293afd04..f24c0e0a17 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
@@ -1,2 +1 @@
-
-#include "eigen-eigen-8cd7c2c6e9e1/unsupported/Eigen/CXX11/Tensor"
+#include "eigen-eigen-fa22401ededc/unsupported/Eigen/CXX11/Tensor"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index cae1a0b06d..356bf19758 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -117,19 +117,19 @@ template <>
struct unpacket_traits<Packet32q8i> {
typedef QInt8 type;
typedef Packet16q8i half;
- enum { size = 32 };
+ enum { size = 32, alignment=Aligned32 };
};
template <>
struct unpacket_traits<Packet32q8u> {
typedef QUInt8 type;
typedef Packet16q8u half;
- enum { size = 32 };
+ enum { size = 32, alignment=Aligned32 };
};
template <>
struct unpacket_traits<Packet8q32i> {
typedef QInt32 type;
typedef Packet4q32i half;
- enum { size = 8 };
+ enum { size = 8, alignment=Aligned32 };
};
// Unaligned load
@@ -342,67 +342,34 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet32q8i>(const Packet32q8i& a) {
return std::max(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1));
}
-// Comparisons
-template <>
-EIGEN_STRONG_INLINE Packet8q32i peq<Packet8q32i>(const Packet8q32i& a,
- const Packet8q32i& b) {
- return _mm256_cmpeq_epi32(a.val, b.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i peq<Packet32q8i>(const Packet32q8i& a,
- const Packet32q8i& b) {
- return _mm256_cmpeq_epi8(a.val, b.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8u peq<Packet32q8u>(const Packet32q8u& a,
- const Packet32q8u& b) {
- return _mm256_cmpeq_epi8(a.val, b.val);
-}
+// Vectorized scaling of Packet32q8i by float.
+template<>
+struct scalar_multiple2_op<QInt32, double> {
+ typedef Packet8q32i Packet1;
+ typedef typename scalar_product_traits<QInt32, double>::ReturnType result_type;
+ typedef typename packet_traits<result_type>::type packet_result_type;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const double& other) : m_other(other) { }
-// Note: There are no instructions in AVX2 for unsigned lt/gt comparison.
-// These are added in AVX-512.
-template <>
-EIGEN_STRONG_INLINE Packet8q32i ple<Packet8q32i>(const Packet8q32i& a,
- const Packet8q32i& b) {
- const __m256i gt = _mm256_cmpgt_epi32(a.val, b.val);
- return _mm256_xor_si256(gt, gt);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i ple<Packet32q8i>(const Packet32q8i& a,
- const Packet32q8i& b) {
- const __m256i gt = _mm256_cmpgt_epi8(a.val, b.val);
- return _mm256_xor_si256(gt, gt);
-}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const QInt32& a) const { return a * m_other; }
-template <>
-EIGEN_STRONG_INLINE Packet8q32i plt<Packet8q32i>(const Packet8q32i& a,
- const Packet8q32i& b) {
- return _mm256_cmpgt_epi32(b.val, a.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i plt<Packet32q8i>(const Packet32q8i& a,
- const Packet32q8i& b) {
- return _mm256_cmpgt_epi8(b.val, a.val);
-}
+ EIGEN_STRONG_INLINE const Packet8q32i packetOp(const Packet8q32i& a) const {
+ __m256d scale = _mm256_set1_pd(m_other);
+ __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
+ __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo));
+ __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1));
+ __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi));
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1);
+ }
+
+ const double m_other;
+};
-// Vectorized scaling of Packet32q8i by float.
template <>
struct functor_traits<scalar_multiple2_op<QInt32, double>> {
enum { Cost = 4 * NumTraits<float>::MulCost, PacketAccess = true };
};
-template <>
-EIGEN_STRONG_INLINE const Packet8q32i
-scalar_multiple2_op<QInt32, double>::packetOp(const Packet8q32i& a) const {
- __m256d scale = _mm256_set1_pd(m_other);
- __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
- __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo));
- __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1));
- __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi));
- return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi,
- 1);
-}
-
} // end namespace internal
} // end namespace Eigen