Fix tensor contraction for AVX512 machines

This patch modifies the TensorContraction class to ensure that the kc_ field is always a multiple of the packet_size, if the packet_size is > 8. Without this change spatial convolutions in Tensorflow do not work properly as the code that re-arranges the input matrices can assert if kc_ is not a multiple of the packet_size. This leads to a unit test failure, //tensorflow/python/kernel_tests:conv_ops_test, on AVX512 builds of tensorflow.
author: Mark D Ryan <mark.d.ryan@intel.com> 2018-07-31 09:33:37 +0100
committer: Mark D Ryan <mark.d.ryan@intel.com> 2018-07-31 09:33:37 +0100
commit: 6f5b126e6d23f1339d15b26fe87916132397d619 (patch)
tree: 0dc235c360eb5e5c7576db2f5de37a8201a9dcd6 /unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
parent: 77b447c24e3344e43ff64eb932d4bb35a2db01ce (diff)
1 files changed, 4 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
index 71fd19774..c51f3f8dd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
@@ -51,6 +51,10 @@ class TensorContractionBlocking {
     else {
       computeProductBlockingSizes<LhsScalar, RhsScalar, 1>(kc_, nc_, mc_, num_threads);
     }
+
+    const int rhs_packet_size = internal::packet_traits<RhsScalar>::size;
+    kc_ = (rhs_packet_size <= 8 || kc_ <= rhs_packet_size) ?
+      kc_ : (kc_ / rhs_packet_size) * rhs_packet_size;
   }
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; }
author	Mark D Ryan <mark.d.ryan@intel.com>	2018-07-31 09:33:37 +0100
committer	Mark D Ryan <mark.d.ryan@intel.com>	2018-07-31 09:33:37 +0100
commit	6f5b126e6d23f1339d15b26fe87916132397d619 (patch)
tree	0dc235c360eb5e5c7576db2f5de37a8201a9dcd6 /unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
parent	77b447c24e3344e43ff64eb932d4bb35a2db01ce (diff)