From 21eb97d3e07ca3e314f36c1511a3669d7a2f1ed3 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Fri, 8 Feb 2019 15:47:25 -0800 Subject: Add PacketConv implementation for non-vectorizable src expressions --- .../Eigen/CXX11/src/Tensor/TensorConversion.h | 32 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 27c9d4a20..938fd0f34 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -230,8 +230,21 @@ struct PacketConv { } }; -template -struct PacketConv { +template +struct PacketConv { + typedef typename internal::unpacket_traits::type TargetType; + static const int PacketSize = internal::unpacket_traits::size; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator& impl, Index index) { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i); + return internal::pload(values); + } +}; + +template +struct PacketConv { template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator& impl, Index index) { return impl.template packet(index); @@ -287,10 +300,17 @@ struct TensorEvaluator, Device> } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const - { - const bool Vectorizable = TensorEvaluator::PacketAccess & - internal::type_casting_traits::VectorizedCast; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType + packet(Index index) const { + // If we are not going to do the cast, we just need to check that base + // TensorEvaluator has packet access. Otherwise we also need to make sure, + // that we have an implementation of vectorized cast. + const bool Vectorizable = + IsSameType + ? TensorEvaluator::PacketAccess + : TensorEvaluator::PacketAccess & + internal::type_casting_traits::VectorizedCast; + return internal::PacketConv::run(m_impl, index); } -- cgit v1.2.3