diff options
author | Antonio Sanchez <cantonios@google.com> | 2020-06-25 14:31:16 -0700 |
---|---|---|
committer | Antonio Sánchez <cantonios@google.com> | 2020-06-30 18:53:55 +0000 |
commit | 9cb8771e9c4a1f44ba59741c9fac495d1872bb25 (patch) | |
tree | 5348c34ac0673d09fe97aea29770e7b236e85510 /unsupported/Eigen/CXX11 | |
parent | 145e51516fdac7b30d22c11c6878c2805fc3d724 (diff) |
Fix tensor casts for large packets and casts to/from std::complex
The original tensor casts were only defined for
`SrcCoeffRatio`:`TgtCoeffRatio` 1:1, 1:2, 2:1, 4:1. Here we add the
missing 1:N and 8:1.
We also add casting `Eigen::half` to/from `std::complex<T>`, which
was missing to make it consistent with `Eigen:bfloat16`, and
generalize the overload to work for any complex type.
Tests were added to `basicstuff`, `packetmath`, and
`cxx11_tensor_casts` to test all cast configurations.
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 33 |
1 files changed, 31 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index cdbafbbb1..44493906d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -51,7 +51,10 @@ struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorCo template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> -struct PacketConverter { +struct PacketConverter; + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl) {} @@ -109,7 +112,33 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { }; template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> -struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template<int LoadMode, typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; + + SrcPacket src1 = m_impl.template packet<LoadMode>(index); + SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize); + SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); + SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize); + SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize); + SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize); + SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize); + TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator& impl) : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} |