Add PacketConv implementation for non-vectorizable src expressions

author: Eugene Zhulenev <ezhulenev@google.com> 2019-02-08 15:47:25 -0800
committer: Eugene Zhulenev <ezhulenev@google.com> 2019-02-08 15:47:25 -0800
commit: 21eb97d3e07ca3e314f36c1511a3669d7a2f1ed3 (patch)
tree: 1738fa9225f2b00587ec709d024041be345d56da /unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
parent: 1e36166ed1cd9a2e6fd5a946e2ec418406963a1a (diff)
1 files changed, 26 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
index 27c9d4a20..938fd0f34 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
@@ -230,8 +230,21 @@ struct PacketConv<SrcPacket, TargetPacket, LoadMode, true, IsSameT> {
   }
 };
 
-template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize>
-struct PacketConv<SrcPacket, TargetPacket, LoadMode, ActuallyVectorize, true> {
+template <typename SrcPacket, typename TargetPacket, int LoadMode>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
+  typedef typename internal::unpacket_traits<TargetPacket>::type TargetType;
+  static const int PacketSize = internal::unpacket_traits<TargetPacket>::size;
+
+  template <typename ArgType, typename Device>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
+    EIGEN_ALIGN_MAX typename internal::remove_const<TargetType>::type values[PacketSize];
+    for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
+    return internal::pload<TargetPacket>(values);
+  }
+};
+
+template <typename SrcPacket, typename TargetPacket, int LoadMode>
+struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
   template <typename ArgType, typename Device>
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator<ArgType, Device>& impl, Index index) {
     return impl.template packet<LoadMode>(index);
@@ -287,10 +300,17 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
   }
 
   template<int LoadMode>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
-  {
-    const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess &
-        internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType
+  packet(Index index) const {
+    // If we are not going to do the cast, we just need to check that base
+    // TensorEvaluator has packet access. Otherwise we also need to make sure,
+    // that we have an implementation of vectorized cast.
+    const bool Vectorizable =
+        IsSameType
+        ? TensorEvaluator<ArgType, Device>::PacketAccess
+        : TensorEvaluator<ArgType, Device>::PacketAccess &
+          internal::type_casting_traits<SrcType, TargetType>::VectorizedCast;
+
     return internal::PacketConv<PacketSourceType, PacketReturnType, LoadMode,
                                 Vectorizable, IsSameType>::run(m_impl, index);
   }
author	Eugene Zhulenev <ezhulenev@google.com>	2019-02-08 15:47:25 -0800
committer	Eugene Zhulenev <ezhulenev@google.com>	2019-02-08 15:47:25 -0800
commit	21eb97d3e07ca3e314f36c1511a3669d7a2f1ed3 (patch)
tree	1738fa9225f2b00587ec709d024041be345d56da /unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h
parent	1e36166ed1cd9a2e6fd5a946e2ec418406963a1a (diff)