diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-05-26 14:37:09 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-05-26 14:37:09 -0700 |
commit | 1a47844529ecac71b4d25b4fdde87e04081b0564 (patch) | |
tree | 0ed1439e3f194f61a94ca9e63ffb9060183d43b1 /unsupported/Eigen/CXX11/src/Tensor | |
parent | 36369ab63c2acfbff111b20db189c6c38bfc15c8 (diff) |
Preserve the ability to vectorize the evaluation of an expression even when it involves a cast that isn't vectorized (e.g fp16 to float)
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 1cbf051be..860a6949a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -193,7 +193,7 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> enum { IsAligned = false, - PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast, + PacketAccess = true, Layout = TensorEvaluator<ArgType, Device>::Layout, RawAccess = false }; @@ -224,11 +224,9 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; - const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; - PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, - SrcCoeffRatio, TgtCoeffRatio> converter(m_impl); - return converter.template packet<LoadMode>(index); + const bool Vectorizable = TensorEvaluator<ArgType, Device>::PacketAccess & + internal::type_casting_traits<SrcType, TargetType>::VectorizedCast; + return PacketConv<LoadMode, Vectorizable>::run(m_impl, index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost @@ -249,7 +247,31 @@ struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } protected: - TensorEvaluator<ArgType, Device> m_impl; + template <int LoadMode, bool ActuallyVectorize> + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + internal::scalar_cast_op<SrcType, TargetType> converter; + EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = converter(impl.coeff(index+i)); + } + PacketReturnType rslt = internal::pload<PacketReturnType>(values); + return rslt; + } + }; + + template <int LoadMode> + struct PacketConv<LoadMode, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator<ArgType, Device>& impl, Index index) { + const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; + PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, + SrcCoeffRatio, TgtCoeffRatio> converter(impl); + return converter.template packet<LoadMode>(index); + } + }; + + TensorEvaluator<ArgType, Device> m_impl; }; } // end namespace Eigen |