aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h5
1 files changed, 4 insertions, 1 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index f594725a8..fb1f1f6ea 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -93,7 +93,10 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> {
template<int LoadMode, typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const {
const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
- if (index + SrcPacketSize < m_maxIndex) {
+ // Only call m_impl.packet() when we have direct access to the underlying data. This
+ // ensures that we don't compute the subexpression twice. We may however load some
+ // coefficients twice, but in practice this doesn't negatively impact performance.
+ if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
// Force unaligned memory loads since we can't ensure alignment anymore
return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
} else {