diff options
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 5 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 20 |
2 files changed, 25 insertions, 0 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 8f63af7cb..02882bdea 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -134,6 +134,11 @@ pcast(const SrcPacket& a, const SrcPacket& /*b*/) { return static_cast<TgtPacket>(a); } +template <typename SrcPacket, typename TgtPacket> +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { + return static_cast<TgtPacket>(a); +} /** \internal \returns a + b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h index 877bcd0df..d2defcaf4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -86,6 +86,26 @@ struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { const TensorEvaluator& m_impl; }; +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template<int LoadMode, typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; + + SrcPacket src1 = m_impl.template packet<LoadMode>(index); + SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); + SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize); + TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { |