diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2016-10-19 09:54:28 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-10-19 11:05:53 -0700 |
commit | 75c9eb60e761a76b3c23df44152dfee2b623a3e2 (patch) | |
tree | 7b28222310015c900681f26fc1836550bc28d3bf /third_party/eigen3/unsupported/Eigen | |
parent | 418daaaca01d9f50e76d1f912a997a32d207665e (diff) |
Internal change.
Change: 136615121
Diffstat (limited to 'third_party/eigen3/unsupported/Eigen')
4 files changed, 60 insertions, 82 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint index 35b55de46d..9d6b9c3f01 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint +++ b/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint @@ -33,7 +33,6 @@ // Use optimized implementations whenever available #ifdef EIGEN_VECTORIZE_AVX2 #define EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT -#include "src/Tensor/TensorContractionThreadPool.h" #include "src/FixedPoint/PacketMathAVX2.h" #include "src/FixedPoint/MatMatProductAVX2.h" #include "src/FixedPoint/TypeCastingAVX2.h" diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h index 564729ce48..6b625abc3e 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h @@ -90,6 +90,7 @@ struct QInt32 { QInt32() {} QInt32(const int8_t v) : value(v) {} QInt32(const int32_t v) : value(v) {} + QInt32(const uint32_t v) : value(static_cast<int32_t>(v)) {} QInt32(const QInt8 v) : value(v.value) {} QInt32(const float v) : value(static_cast<int32_t>(lrint(v))) {} #ifdef EIGEN_MAKING_DOCS diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h index d561b79fbd..6b4b0edcfb 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h @@ -42,39 +42,50 @@ public: // Specialized blocking for quantized implementations. // Used by TensorContractionThreadPool, inputs must have dimensions that are // multiples of 32. -template<int KcFactor, typename Index> -struct ComputeGemmByColBlockingSizes<QInt8, QUInt8, KcFactor, Index> { - void operator()(Index& k, Index& m, Index& n, Index num_threads) +template<typename Index, + typename LeftTensor, + typename left_nocontract_t, typename left_contract_t, + bool left_inner_dim_contiguous, bool left_inner_dim_reordered, int LeftAlignment, + typename RightTensor, + typename right_nocontract_t, typename right_contract_t, + bool right_inner_dim_contiguous, bool right_inner_dim_reordered, int RightAlignment, int ShardingType> +class TensorContractionBlocking<TensorContractionInputMapper<QInt8, Index, Lhs, LeftTensor, left_nocontract_t, left_contract_t, 32, left_inner_dim_contiguous, left_inner_dim_reordered, LeftAlignment>, TensorContractionInputMapper<QUInt8, Index, Rhs, RightTensor, right_nocontract_t, right_contract_t, 32, right_inner_dim_contiguous, right_inner_dim_reordered, RightAlignment>, Index, ShardingType> { + public: + + typedef QInt8 LhsScalar; + typedef QUInt8 RhsScalar; + + TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : + kc_(k), mc_(m), nc_(n) { eigen_assert(m % 32 == 0); - eigen_assert(n % 32 == 0); eigen_assert(k % 32 == 0); if (!k || !m || !n) { return; } - n = (((n / num_threads) + 31) / 32) * 32; - } -}; -// Specialized blocking for quantized implementations. -// Used by TensorContractionThreadPool, inputs must have dimensions that are -// multiples of 32. -template<int KcFactor, typename Index> -struct ComputeGemmByRowBlockingSizes<QInt8, QUInt8, KcFactor, Index> { - void operator()(Index& k, Index& m, Index& n, Index num_threads) - { - eigen_assert(m % 32 == 0); - eigen_assert(n % 32 == 0 || n == 1); - eigen_assert(k % 32 == 0); - if (!k || !m || !n) { - return; + if (ShardingType == ShardByCol) { + eigen_assert(n % 32 == 0); + nc_ = (((n / num_threads) + 31) / 32) * 32; } - // Special case to avoid breaking the unimplemented matrix-vector case - if (n == 1) { - n = 32; + else { + eigen_assert(n % 32 == 0 || n == 1); + // Special case to avoid breaking the unimplemented matrix-vector case + if (n == 1) { + nc_ = 32; + } + mc_ = (((m / num_threads) + 31) / 32) * 32; } - m = (((m / num_threads) + 31) / 32) * 32; } + + EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } + EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } + EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } + + private: + Index kc_; + Index mc_; + Index nc_; }; // Specialized blocking for quantized implementations. diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h index cae1a0b06d..e71c2d8aea 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h @@ -117,19 +117,19 @@ template <> struct unpacket_traits<Packet32q8i> { typedef QInt8 type; typedef Packet16q8i half; - enum { size = 32 }; + enum { size = 32, alignment=Aligned32 }; }; template <> struct unpacket_traits<Packet32q8u> { typedef QUInt8 type; typedef Packet16q8u half; - enum { size = 32 }; + enum { size = 32, alignment=Aligned32 }; }; template <> struct unpacket_traits<Packet8q32i> { typedef QInt32 type; typedef Packet4q32i half; - enum { size = 8 }; + enum { size = 8, alignment=Aligned32 }; }; // Unaligned load @@ -342,67 +342,34 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet32q8i>(const Packet32q8i& a) { return std::max(_mm256_extract_epi8(tmp, 0), _mm256_extract_epi8(tmp, 1)); } -// Comparisons -template <> -EIGEN_STRONG_INLINE Packet8q32i peq<Packet8q32i>(const Packet8q32i& a, - const Packet8q32i& b) { - return _mm256_cmpeq_epi32(a.val, b.val); -} -template <> -EIGEN_STRONG_INLINE Packet32q8i peq<Packet32q8i>(const Packet32q8i& a, - const Packet32q8i& b) { - return _mm256_cmpeq_epi8(a.val, b.val); -} -template <> -EIGEN_STRONG_INLINE Packet32q8u peq<Packet32q8u>(const Packet32q8u& a, - const Packet32q8u& b) { - return _mm256_cmpeq_epi8(a.val, b.val); -} - -// Note: There are no instructions in AVX2 for unsigned lt/gt comparison. -// These are added in AVX-512. -template <> -EIGEN_STRONG_INLINE Packet8q32i ple<Packet8q32i>(const Packet8q32i& a, - const Packet8q32i& b) { - const __m256i gt = _mm256_cmpgt_epi32(a.val, b.val); - return _mm256_xor_si256(gt, gt); -} -template <> -EIGEN_STRONG_INLINE Packet32q8i ple<Packet32q8i>(const Packet32q8i& a, - const Packet32q8i& b) { - const __m256i gt = _mm256_cmpgt_epi8(a.val, b.val); - return _mm256_xor_si256(gt, gt); -} +// Vectorized scaling of Packet32q8i by float. +template<> +struct scalar_product_op<QInt32, double> : binary_op_base<QInt32, double> { + typedef typename ScalarBinaryOpTraits<QInt32, double>::ReturnType result_type; +#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN + EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) +#else + scalar_product_op() { + EIGEN_SCALAR_BINARY_OP_PLUGIN + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const QInt32& a, const double& b) const { return a * b; } -template <> -EIGEN_STRONG_INLINE Packet8q32i plt<Packet8q32i>(const Packet8q32i& a, - const Packet8q32i& b) { - return _mm256_cmpgt_epi32(b.val, a.val); -} -template <> -EIGEN_STRONG_INLINE Packet32q8i plt<Packet32q8i>(const Packet32q8i& a, - const Packet32q8i& b) { - return _mm256_cmpgt_epi8(b.val, a.val); -} + EIGEN_STRONG_INLINE const Packet8q32i packetOp(const Packet8q32i& a, const double& b) const { + __m256d scale = _mm256_set1_pd(b); + __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a)); + __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo)); + __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1)); + __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi)); + return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, 1); + } +}; -// Vectorized scaling of Packet32q8i by float. template <> -struct functor_traits<scalar_multiple2_op<QInt32, double>> { +struct functor_traits<scalar_product_op<QInt32, double>> { enum { Cost = 4 * NumTraits<float>::MulCost, PacketAccess = true }; }; -template <> -EIGEN_STRONG_INLINE const Packet8q32i -scalar_multiple2_op<QInt32, double>::packetOp(const Packet8q32i& a) const { - __m256d scale = _mm256_set1_pd(m_other); - __m256d a_lo = _mm256_cvtepi32_pd(_mm256_castsi256_si128(a)); - __m128i result_lo = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_lo)); - __m256d a_hi = _mm256_cvtepi32_pd(_mm256_extracti128_si256(a, 1)); - __m128i result_hi = _mm256_cvtpd_epi32(_mm256_mul_pd(scale, a_hi)); - return _mm256_insertf128_si256(_mm256_castsi128_si256(result_lo), result_hi, - 1); -} - } // end namespace internal } // end namespace Eigen |