From 6f5b126e6d23f1339d15b26fe87916132397d619 Mon Sep 17 00:00:00 2001 From: Mark D Ryan Date: Tue, 31 Jul 2018 09:33:37 +0100 Subject: Fix tensor contraction for AVX512 machines This patch modifies the TensorContraction class to ensure that the kc_ field is always a multiple of the packet_size, if the packet_size is > 8. Without this change spatial convolutions in Tensorflow do not work properly as the code that re-arranges the input matrices can assert if kc_ is not a multiple of the packet_size. This leads to a unit test failure, //tensorflow/python/kernel_tests:conv_ops_test, on AVX512 builds of tensorflow. --- unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h index 71fd19774..c51f3f8dd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -51,6 +51,10 @@ class TensorContractionBlocking { else { computeProductBlockingSizes(kc_, nc_, mc_, num_threads); } + + const int rhs_packet_size = internal::packet_traits::size; + kc_ = (rhs_packet_size <= 8 || kc_ <= rhs_packet_size) ? + kc_ : (kc_ / rhs_packet_size) * rhs_packet_size; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; } -- cgit v1.2.3