Updated the tensor type casting code as follow: in the case where TgtRatio < SrcRatio, disable the vectorization of the source expression unless is has direct-access.

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-03-02 10:11:40 -0800
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2015-03-02 10:11:40 -0800
commit: b0f2b6f297b47585ef007073ec7550fe4cf5bde6 (patch)
tree: 10b5a2aefcf9651cbbf7ccaa718d4df1ac259b67 /unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
parent: d9cb604a5deda96ae0643debbf9d90705ced2187 (diff)
1 files changed, 4 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index f594725a8..fb1f1f6ea 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -93,7 +93,10 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
   template<int LoadMode, typename Index>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
     const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
-    if (index + SrcPacketSize < m_maxIndex) {
+    // Only call m_impl.packet() when we have direct access to the underlying data. This
+    // ensures that we don't compute the subexpression twice. We may however load some
+    // coefficients twice, but in practice this doesn't negatively impact performance.
+    if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
       // Force unaligned memory loads since we can't ensure alignment anymore
       return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
     } else {
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-03-02 10:11:40 -0800
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2015-03-02 10:11:40 -0800
commit	b0f2b6f297b47585ef007073ec7550fe4cf5bde6 (patch)
tree	10b5a2aefcf9651cbbf7ccaa718d4df1ac259b67 /unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
parent	d9cb604a5deda96ae0643debbf9d90705ced2187 (diff)