aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-02-08 15:47:25 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-02-08 15:47:25 -0800
commit21eb97d3e07ca3e314f36c1511a3669d7a2f1ed3 (patch)
tree1738fa9225f2b00587ec709d024041be345d56da /unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
parent1e36166ed1cd9a2e6fd5a946e2ec418406963a1a (diff)
Add PacketConv implementation for non-vectorizable src expressions
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h32
1 files changed, 26 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index 27c9d4a20..938fd0f34 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -230,8 +230,21 @@ struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
}
};
-template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize>
-struct PacketConv<SrcPacket, TargetPacket, LoadMode, ActuallyVectorize, true> {
+template <typename SrcPacket, typename TargetPacket, int LoadMode>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
+ typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+ static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+
+ template <typename ArgType, typename Device>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+ EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+ for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
+ return internal::pload<TargetPacket>(values);
+ }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
template <typename ArgType, typename Device>
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
return impl.template packet<LoadMode>(index);
@@ -287,10 +300,17 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
}
template<int LoadMode>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
- {
- const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
- internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
+ packet(Index index) const {
+ // If we are not going to do the cast, we just need to check that base
+ // TensorEvaluator has packet access. Otherwise we also need to make sure,
+ // that we have an implementation of vectorized cast.
+ const bool Vectorizable =
+ IsSameType
+ ? TensorEvaluator<ArgType, Device>::PacketAccess
+ : TensorEvaluator<ArgType, Device>::PacketAccess &
+ internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
+
return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
Vectorizable, IsSameType>::run(m_impl, index);
}