Internal change.

Change: 136615121
author: A. Unique TensorFlower <gardener@tensorflow.org> 2016-10-19 09:54:28 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-10-19 11:05:53 -0700
commit: 75c9eb60e761a76b3c23df44152dfee2b623a3e2 (patch)
tree: 7b28222310015c900681f26fc1836550bc28d3bf /third_party/eigen3
parent: 418daaaca01d9f50e76d1f912a997a32d207665e (diff)
4 files changed, 60 insertions, 82 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
index 35b55de46d..9d6b9c3f01 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
@@ -33,7 +33,6 @@
 // Use optimized implementations whenever available
 #ifdef EIGEN_VECTORIZE_AVX2
 #define EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT
-#include "src/Tensor/TensorContractionThreadPool.h"
 #include "src/FixedPoint/PacketMathAVX2.h"
 #include "src/FixedPoint/MatMatProductAVX2.h"
 #include "src/FixedPoint/TypeCastingAVX2.h"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
index 564729ce48..6b625abc3e 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
@@ -90,6 +90,7 @@ struct QInt32 {
   QInt32() {}
   QInt32(const int8_t v) : value(v) {}
   QInt32(const int32_t v) : value(v) {}
+  QInt32(const uint32_t v) : value(static_cast<int32_t>(v)) {}
   QInt32(const QInt8 v) : value(v.value) {}
   QInt32(const float v) : value(static_cast<int32_t>(lrint(v))) {}
 #ifdef EIGEN_MAKING_DOCS
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
index d561b79fbd..6b4b0edcfb 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
@@ -42,39 +42,50 @@ public:
 // Specialized blocking for quantized implementations.
 // Used by TensorContractionThreadPool, inputs must have dimensions that are
 // multiples of 32.
-template<int KcFactor, typename Index>
-struct ComputeGemmByColBlockingSizes<QInt8, QUInt8, KcFactor, Index> {
-  void operator()(Index& k, Index& m, Index& n, Index num_threads)
+template<typename Index,
+         typename LeftTensor,
+         typename left_nocontract_t, typename left_contract_t,
+         bool left_inner_dim_contiguous, bool left_inner_dim_reordered, int LeftAlignment,
+         typename RightTensor,
+         typename right_nocontract_t, typename right_contract_t,
+         bool right_inner_dim_contiguous, bool right_inner_dim_reordered, int RightAlignment, int ShardingType>
+class TensorContractionBlocking<TensorContractionInputMapper<QInt8, Index, Lhs, LeftTensor, left_nocontract_t, left_contract_t, 32, left_inner_dim_contiguous, left_inner_dim_reordered, LeftAlignment>, TensorContractionInputMapper<QUInt8, Index, Rhs, RightTensor, right_nocontract_t, right_contract_t, 32, right_inner_dim_contiguous, right_inner_dim_reordered, RightAlignment>, Index, ShardingType> {
+ public:
+
+  typedef QInt8  LhsScalar;
+  typedef QUInt8 RhsScalar;
+
+  TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
+      kc_(k), mc_(m), nc_(n)
   {
     eigen_assert(m % 32 == 0);
-    eigen_assert(n % 32 == 0);
     eigen_assert(k % 32 == 0);
     if (!k || !m || !n) {
       return;
     }
-    n = (((n / num_threads) + 31) / 32) * 32;
-  }
-};
 
-// Specialized blocking for quantized implementations.
-// Used by TensorContractionThreadPool, inputs must have dimensions that are
-// multiples of 32.
-template<int KcFactor, typename Index>
-struct ComputeGemmByRowBlockingSizes<QInt8, QUInt8, KcFactor, Index> {
-  void operator()(Index& k, Index& m, Index& n, Index num_threads)
-  {
-    eigen_assert(m % 32 == 0);
-    eigen_assert(n % 32 == 0 || n == 1);
-    eigen_assert(k % 32 == 0);
-    if (!k || !m || !n) {
-      return;
+    if (ShardingType == ShardByCol) {
+      eigen_assert(n % 32 == 0);
+      nc_ = (((n / num_threads) + 31) / 32) * 32;
     }
-    // Special case to avoid breaking the unimplemented matrix-vector case
-    if (n == 1) {
-      n = 32;
+    else {
+      eigen_assert(n % 32 == 0 || n == 1);
+      // Special case to avoid breaking the unimplemented matrix-vector case
+      if (n == 1) {
+        nc_ = 32;
+      }
+      mc_ = (((m / num_threads) + 31) / 32) * 32;
     }
-    m = (((m / num_threads) + 31) / 32) * 32;
   }
+
+  EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
+  EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
+  EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
+
+ private:
+  Index kc_;
+  Index mc_;
+  Index nc_;
 };
 
 // Specialized blocking for quantized implementations.
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index cae1a0b06d..e71c2d8aea 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -117,19 +117,19 @@ template <>
 struct unpacket_traits<Packet32q8i> {
   typedef QInt8 type;
   typedef Packet16q8i half;
-  enum { size = 32 };
+  enum { size = 32, alignment=Aligned32 };
 };
 template <>
 struct unpacket_traits<Packet32q8u> {
   typedef QUInt8 type;
   typedef Packet16q8u half;
-  enum { size = 32 };
+  enum { size = 32, alignment=Aligned32 };
 };
 template <>
 struct unpacket_traits<Packet8q32i> {
   typedef QInt32 type;
   typedef Packet4q32i half;
-  enum { size = 8 };
+  enum { size = 8, alignment=Aligned32 };
 };
 
 // Unaligned load
@@ -342,67 +342,34 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet32q8i>(const Packet32q8i& a) {
   return std::max(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1));
 }
 
-// Comparisons
-template <>
-EIGEN_STRONG_INLINE Packet8q32i peq<Packet8q32i>(const Packet8q32i& a,
-                                                 const Packet8q32i& b) {
-  return _mm256_cmpeq_epi32(a.val, b.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i peq<Packet32q8i>(const Packet32q8i& a,
-                                                 const Packet32q8i& b) {
-  return _mm256_cmpeq_epi8(a.val, b.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8u peq<Packet32q8u>(const Packet32q8u& a,
-                                                 const Packet32q8u& b) {
-  return _mm256_cmpeq_epi8(a.val, b.val);
-}
-
-// Note: There are no instructions in AVX2 for unsigned lt/gt comparison.
-// These are added in AVX-512.
-template <>
-EIGEN_STRONG_INLINE Packet8q32i ple<Packet8q32i>(const Packet8q32i& a,
-                                                 const Packet8q32i& b) {
-  const __m256i gt = _mm256_cmpgt_epi32(a.val, b.val);
-  return _mm256_xor_si256(gt, gt);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i ple<Packet32q8i>(const Packet32q8i& a,
-                                                 const Packet32q8i& b) {
-  const __m256i gt = _mm256_cmpgt_epi8(a.val, b.val);
-  return _mm256_xor_si256(gt, gt);
-}
+// Vectorized scaling of Packet32q8i by float.
+template<>
+struct scalar_product_op<QInt32, double> : binary_op_base<QInt32, double> {
+  typedef typename ScalarBinaryOpTraits<QInt32, double>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+#else
+  scalar_product_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const QInt32& a, const double& b) const { return a * b; }
 
-template <>
-EIGEN_STRONG_INLINE Packet8q32i plt<Packet8q32i>(const Packet8q32i& a,
-                                                 const Packet8q32i& b) {
-  return _mm256_cmpgt_epi32(b.val, a.val);
-}
-template <>
-EIGEN_STRONG_INLINE Packet32q8i plt<Packet32q8i>(const Packet32q8i& a,
-                                                 const Packet32q8i& b) {
-  return _mm256_cmpgt_epi8(b.val, a.val);
-}
+  EIGEN_STRONG_INLINE const Packet8q32i packetOp(const Packet8q32i& a, const double& b) const {
+    __m256d scale = _mm256_set1_pd(b);
+    __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
+    __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo));
+    __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1));
+    __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi));
+    return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1);
+  }
+};
 
-// Vectorized scaling of Packet32q8i by float.
 template <>
-struct functor_traits<scalar_multiple2_op<QInt32, double>> {
+struct functor_traits<scalar_product_op<QInt32, double>> {
   enum { Cost = 4 * NumTraits<float>::MulCost, PacketAccess = true };
 };
 
-template <>
-EIGEN_STRONG_INLINE const Packet8q32i
-scalar_multiple2_op<QInt32, double>::packetOp(const Packet8q32i& a) const {
-  __m256d scale = _mm256_set1_pd(m_other);
-  __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a));
-  __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo));
-  __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1));
-  __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi));
-  return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi,
-                                 1);
-}
-
 }  // end namespace internal
 }  // end namespace Eigen
author	A. Unique TensorFlower <gardener@tensorflow.org>	2016-10-19 09:54:28 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-10-19 11:05:53 -0700
commit	75c9eb60e761a76b3c23df44152dfee2b623a3e2 (patch)
tree	7b28222310015c900681f26fc1836550bc28d3bf /third_party/eigen3
parent	418daaaca01d9f50e76d1f912a997a32d207665e (diff)