aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/eigen3
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-10-19 09:54:28 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-10-19 11:05:53 -0700
commit75c9eb60e761a76b3c23df44152dfee2b623a3e2 (patch)
tree7b28222310015c900681f26fc1836550bc28d3bf /third_party/eigen3
parent418daaaca01d9f50e76d1f912a997a32d207665e (diff)
Internal change.
Change: 136615121
Diffstat (limited to 'third_party/eigen3')
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint1
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h1
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h57
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h83
4 files changed, 60 insertions, 82 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
index 35b55de46d..9d6b9c3f01 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
@@ -33,7 +33,6 @@
// Use optimized implementations whenever available
#ifdef EIGEN_VECTORIZE_AVX2
#define EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT
-#include "src/Tensor/TensorContractionThreadPool.h"
#include "src/FixedPoint/PacketMathAVX2.h"
#include "src/FixedPoint/MatMatProductAVX2.h"
#include "src/FixedPoint/TypeCastingAVX2.h"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
index 564729ce48..6b625abc3e 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
@@ -90,6 +90,7 @@ struct QInt32 {
QInt32() {}
QInt32(const int8_t v) : value(v) {}
QInt32(const int32_t v) : value(v) {}
+ QInt32(const uint32_t v) : value(static_cast<int32_t>(v)) {}
QInt32(const QInt8 v) : value(v.value) {}
QInt32(const float v) : value(static_cast<int32_t>(lrint(v))) {}
#ifdef EIGEN_MAKING_DOCS
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
index d561b79fbd..6b4b0edcfb 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
@@ -42,39 +42,50 @@ public:
// Specialized blocking for quantized implementations.
// Used by TensorContractionThreadPool, inputs must have dimensions that are
// multiples of 32.
-template<int KcFactor, typename Index>
-struct ComputeGemmByColBlockingSizes<QInt8, QUInt8, KcFactor, Index> {
- void operator()(Index& k, Index& m, Index& n, Index num_threads)
+template<typename Index,
+ typename LeftTensor,
+ typename left_nocontract_t, typename left_contract_t,
+ bool left_inner_dim_contiguous, bool left_inner_dim_reordered, int LeftAlignment,
+ typename RightTensor,
+ typename right_nocontract_t, typename right_contract_t,
+ bool right_inner_dim_contiguous, bool right_inner_dim_reordered, int RightAlignment, int ShardingType>
+class TensorContractionBlocking<TensorContractionInputMapper<QInt8, Index, Lhs, LeftTensor, left_nocontract_t, left_contract_t, 32, left_inner_dim_contiguous, left_inner_dim_reordered, LeftAlignment>, TensorContractionInputMapper<QUInt8, Index, Rhs, RightTensor, right_nocontract_t, right_contract_t, 32, right_inner_dim_contiguous, right_inner_dim_reordered, RightAlignment>, Index, ShardingType> {
+ public:
+
+ typedef QInt8 LhsScalar;
+ typedef QUInt8 RhsScalar;
+
+ TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
+ kc_(k), mc_(m), nc_(n)
{
eigen_assert(m % 32 == 0);
- eigen_assert(n % 32 == 0);
eigen_assert(k % 32 == 0);
if (!k || !m || !n) {
return;
}
- n = (((n / num_threads) + 31) / 32) * 32;
- }
-};
-// Specialized blocking for quantized implementations.
-// Used by TensorContractionThreadPool, inputs must have dimensions that are
-// multiples of 32.
-template<int KcFactor, typename Index>
-struct ComputeGemmByRowBlockingSizes<QInt8, QUInt8, KcFactor, Index> {
- void operator()(Index& k, Index& m, Index& n, Index num_threads)
- {
- eigen_assert(m % 32 == 0);
- eigen_assert(n % 32 == 0 || n == 1);
- eigen_assert(k % 32 == 0);
- if (!k || !m || !n) {
- return;
+ if (ShardingType == ShardByCol) {
+ eigen_assert(n % 32 == 0);
+ nc_ = (((n / num_threads) + 31) / 32) * 32;
}
- // Special case to avoid breaking the unimplemented matrix-vector case
- if (n == 1) {
- n = 32;
+ else {
+ eigen_assert(n % 32 == 0 || n == 1);
+ // Special case to avoid breaking the unimplemented matrix-vector case
+ if (n == 1) {
+ nc_ = 32;
+ }
+ mc_ = (((m / num_threads) + 31) / 32) * 32;
}
- m = (((m / num_threads) + 31) / 32) * 32;
}
+
+ EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
+ EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
+ EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
+
+ private:
+ Index kc_;
+ Index mc_;
+ Index nc_;
};
// Specialized blocking for quantized implementations.
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index cae1a0b06d..e71c2d8aea 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -117,19 +117,19 @@ template <>
struct unpacket_traits<Packet32q8i> {
typedef QInt8 type;
typedef Packet16q8i half;
- enum { size = 32 };
+ enum { size = 32, alignment=Aligned32 };
};
template <>
struct unpacket_traits<Packet32q8u> {
typedef QUInt8 type;
typedef Packet16q8u half;
- enum { size = 32 };
+ enum { size = 32, alignment=Aligned32 };
};
template <>
struct unpacket_traits<Packet8q32i> {
typedef QInt32 type;
typedef Packet4q32i half;
- enum { size = 8 };
+ enum { size = 8, alignment=Aligned32 };
};
// Unaligned load
@@ -342,67 +342,34 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet32q8i>(const Packet32q8i& a) {
return std::max(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1));
}
-// Comparisons
-template <>
-EIGEN_STRONG_INLINE Packet8q32i peq<Packet8q32i>(const Packet8q32i& a,
- const Packet8q32i& b) {
- return _mm256_cmpeq_epi32(a.val, b.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i peq<Packet32q8i>(const Packet32q8i& a,
- const Packet32q8i& b) {
- return _mm256_cmpeq_epi8(a.val, b.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8u peq<Packet32q8u>(const Packet32q8u& a,
- const Packet32q8u& b) {
- return _mm256_cmpeq_epi8(a.val, b.val);
-}
-
-// Note: There are no instructions in AVX2 for unsigned lt/gt comparison.
-// These are added in AVX-512.
-template <>
-EIGEN_STRONG_INLINE Packet8q32i ple<Packet8q32i>(const Packet8q32i& a,
- const Packet8q32i& b) {
- const __m256i gt = _mm256_cmpgt_epi32(a.val, b.val);
- return _mm256_xor_si256(gt, gt);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i ple<Packet32q8i>(const Packet32q8i& a,
- const Packet32q8i& b) {
- const __m256i gt = _mm256_cmpgt_epi8(a.val, b.val);
- return _mm256_xor_si256(gt, gt);
-}
+// Vectorized scaling of Packet32q8i by float.
+template<>
+struct scalar_product_op<QInt32, double> : binary_op_base<QInt32, double> {
+ typedef typename ScalarBinaryOpTraits<QInt32, double>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+#else
+ scalar_product_op() {
+ EIGEN_SCALAR_BINARY_OP_PLUGIN
+ }
+#endif
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const QInt32& a, const double& b) const { return a * b; }
-template <>
-EIGEN_STRONG_INLINE Packet8q32i plt<Packet8q32i>(const Packet8q32i& a,
- const Packet8q32i& b) {
- return _mm256_cmpgt_epi32(b.val, a.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i plt<Packet32q8i>(const Packet32q8i& a,
- const Packet32q8i& b) {
- return _mm256_cmpgt_epi8(b.val, a.val);
-}
+ EIGEN_STRONG_INLINE const Packet8q32i packetOp(const Packet8q32i& a, const double& b) const {
+ __m256d scale = _mm256_set1_pd(b);
+ __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
+ __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo));
+ __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1));
+ __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi));
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1);
+ }
+};
-// Vectorized scaling of Packet32q8i by float.
template <>
-struct functor_traits<scalar_multiple2_op<QInt32, double>> {
+struct functor_traits<scalar_product_op<QInt32, double>> {
enum { Cost = 4 * NumTraits<float>::MulCost, PacketAccess = true };
};
-template <>
-EIGEN_STRONG_INLINE const Packet8q32i
-scalar_multiple2_op<QInt32, double>::packetOp(const Packet8q32i& a) const {
- __m256d scale = _mm256_set1_pd(m_other);
- __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
- __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo));
- __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1));
- __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi));
- return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi,
- 1);
-}
-
} // end namespace internal
} // end namespace Eigen