diff options
-rw-r--r-- | Eigen/Core | 2 | ||||
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 22 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX/TypeCasting.h | 51 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/TypeCasting.h | 77 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/Tensor | 1 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 4 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h | 202 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 1 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_casts.cpp | 56 |
9 files changed, 413 insertions, 3 deletions
diff --git a/Eigen/Core b/Eigen/Core index 0b8eaa61c..b7205bda5 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -297,10 +297,12 @@ using std::ptrdiff_t; #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/TypeCasting.h" #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" #include "src/Core/arch/AltiVec/Complex.h" diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 721280b2c..678938c6b 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -98,6 +98,28 @@ template<typename T> struct packet_traits : default_packet_traits template<typename T> struct packet_traits<const T> : packet_traits<T> { }; +template <typename Src, typename Tgt> struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */ +template <typename SrcPacket, typename TgtPacket> +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast<TgtPacket>(a); +} +template <typename SrcPacket, typename TgtPacket> +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast<TgtPacket>(a); +} + + /** \internal \returns a + b (coeff-wise) */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, diff --git a/Eigen/src/Core/arch/AVX/TypeCasting.h b/Eigen/src/Core/arch/AVX/TypeCasting.h new file mode 100644 index 000000000..83bfdc604 --- /dev/null +++ b/Eigen/src/Core/arch/AVX/TypeCasting.h @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_AVX_H +#define EIGEN_TYPE_CASTING_AVX_H + +namespace Eigen { + +namespace internal { + +// For now we use SSE to handle integers, so we can't use AVX instructions to cast +// from int to float +template <> +struct type_casting_traits<float, int> { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template <> +struct type_casting_traits<int, float> { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + + +template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) { + return _mm256_cvtps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) { + return _mm256_cvtepi32_ps(a); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_AVX_H diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h new file mode 100644 index 000000000..454f4d38d --- /dev/null +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -0,0 +1,77 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TYPE_CASTING_SSE_H +#define EIGEN_TYPE_CASTING_SSE_H + +namespace Eigen { + +namespace internal { + +template <> +struct type_casting_traits<float, int> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) { + return _mm_cvtps_epi32(a); +} + + +template <> +struct type_casting_traits<int, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) { + return _mm_cvtepi32_ps(a); +} + + +template <> +struct type_casting_traits<double, float> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) { + return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); +} + +template <> +struct type_casting_traits<float, double> { + enum { + VectorizedCast = 1, + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 + }; +}; + +template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { + // Simply discard the second half of the input + return _mm_cvtps_pd(a); +} + + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TYPE_CASTING_SSE_H diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index 34107ae71..7bd8cc9d4 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -65,6 +65,7 @@ #include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h" +#include "unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h" #include "unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 13709b504..e22dd4de0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -164,9 +164,9 @@ class TensorBase<Derived, ReadOnlyAccessors> } template <typename NewType> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> + EIGEN_STRONG_INLINE const TensorConversionOp<NewType, const Derived> cast() const { - return unaryExpr(internal::scalar_cast_op<Scalar, NewType>()); + return TensorConversionOp<NewType, const Derived>(derived()); } // Generic binary operation support. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h new file mode 100644 index 000000000..29f536cf9 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -0,0 +1,202 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H + +namespace Eigen { + +/** \class TensorConversionOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor conversion class. This class makes it possible to vectorize + * type casting operations when the number of scalars per packet in the source + * and the destination type differ + */ +namespace internal { +template<typename TargetType, typename XprType> +struct traits<TensorConversionOp<TargetType, XprType> > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef TargetType Scalar; + typedef typename packet_traits<Scalar>::type Packet; + typedef typename traits<XprType>::StorageKind StorageKind; + typedef typename traits<XprType>::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference<Nested>::type _Nested; + static const int NumDimensions = traits<XprType>::NumDimensions; + static const int Layout = traits<XprType>::Layout; + enum { Flags = 0 }; +}; + +template<typename TargetType, typename XprType> +struct eval<TensorConversionOp<TargetType, XprType>, Eigen::Dense> +{ + typedef const TensorConversionOp<TargetType, XprType>& type; +}; + +template<typename TargetType, typename XprType> +struct nested<TensorConversionOp<TargetType, XprType>, 1, typename eval<TensorConversionOp<TargetType, XprType> >::type> +{ + typedef TensorConversionOp<TargetType, XprType> type; +}; + +} // end namespace internal + + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio> +struct PacketConverter { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template<int LoadMode, typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); + } + + private: + const TensorEvaluator& m_impl; +}; + + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template<int LoadMode, typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; + + SrcPacket src1 = m_impl.template packet<LoadMode>(index); + SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize); + TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + + +template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket> +struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 2> { + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} + + template<int LoadMode, typename Index> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size; + if (index + SrcPacketSize < m_maxIndex) { + return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index)); + } else { + const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size; + EIGEN_ALIGN_DEFAULT typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize]; + for (int i = 0; i < TgtPacketSize; ++i) { + values[i] = m_impl.coeff(index+i); + } + TgtPacket rslt = internal::pload<TgtPacket>(values); + return rslt; + } + } + + private: + const TensorEvaluator& m_impl; + const typename TensorEvaluator::Index m_maxIndex; +}; + +template<typename TargetType, typename XprType> +class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits<TensorConversionOp>::Scalar Scalar; + typedef typename internal::traits<TensorConversionOp>::Packet Packet; + typedef typename internal::traits<TensorConversionOp>::StorageKind StorageKind; + typedef typename internal::traits<TensorConversionOp>::Index Index; + typedef typename internal::nested<TensorConversionOp>::type Nested; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketReturnType PacketReturnType; + typedef typename NumTraits<Scalar>::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) + : m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all<typename XprType::Nested>::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + + + + +// Eval as rvalue +template<typename TargetType, typename ArgType, typename Device> +struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device> +{ + typedef TensorConversionOp<TargetType, ArgType> XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator<ArgType, Device>::Dimensions Dimensions; + typedef TargetType Scalar; + typedef TargetType CoeffReturnType; + typedef typename internal::remove_all<typename internal::traits<ArgType>::Scalar>::type SrcType; + typedef typename internal::traits<XprType>::Packet PacketReturnType; + typedef typename internal::packet_traits<SrcType>::type PacketSourceType; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess && internal::type_casting_traits<SrcType, TargetType>::VectorizedCast, + Layout = TensorEvaluator<ArgType, Device>::Layout, + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) + { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + internal::scalar_cast_op<SrcType, TargetType> converter; + return converter(m_impl.coeff(index)); + } + + template<int LoadMode> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int SrcCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits<SrcType, TargetType>::TgtCoeffRatio; + PacketConverter<TensorEvaluator<ArgType, Device>, PacketSourceType, PacketReturnType, + SrcCoeffRatio, TgtCoeffRatio> converter(m_impl); + return converter.template packet<LoadMode>(index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + TensorEvaluator<ArgType, Device> m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 7bec2b10a..3607fe3fe 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -25,6 +25,7 @@ template<typename IfXprType, typename ThenXprType, typename ElseXprType> class T template<typename Op, typename Dims, typename XprType> class TensorReductionOp; template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp; template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp; +template<typename TargetType, typename XprType> class TensorConversionOp; template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp; template<typename PatchDim, typename XprType> class TensorPatchOp; template<DenseIndex Rows, DenseIndex Cols, typename XprType> class TensorImagePatchOp; diff --git a/unsupported/test/cxx11_tensor_casts.cpp b/unsupported/test/cxx11_tensor_casts.cpp index 4f7ff7067..f53679d7b 100644 --- a/unsupported/test/cxx11_tensor_casts.cpp +++ b/unsupported/test/cxx11_tensor_casts.cpp @@ -17,7 +17,7 @@ using Eigen::array; static void test_simple_cast() { Tensor<float, 2> ftensor(20,30); - ftensor.setRandom(); + ftensor = ftensor.random() * 100.f; Tensor<char, 2> chartensor(20,30); chartensor.setRandom(); Tensor<std::complex<float>, 2> cplextensor(20,30); @@ -35,7 +35,61 @@ static void test_simple_cast() } +static void test_vectorized_cast() +{ + Tensor<int, 2> itensor(20,30); + itensor = itensor.random() / 1000; + Tensor<float, 2> ftensor(20,30); + ftensor.setRandom(); + Tensor<double, 2> dtensor(20,30); + dtensor.setRandom(); + + ftensor = itensor.cast<float>(); + dtensor = itensor.cast<double>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j))); + VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j))); + } + } +} + + +static void test_big_to_small_type_cast() +{ + Tensor<double, 2> dtensor(20, 30); + dtensor.setRandom(); + Tensor<float, 2> ftensor(20, 30); + ftensor = dtensor.cast<float>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j))); + } + } +} + + +static void test_small_to_big_type_cast() +{ + Tensor<float, 2> ftensor(20, 30); + ftensor.setRandom(); + Tensor<double, 2> dtensor(20, 30); + dtensor = ftensor.cast<double>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j))); + } + } +} + + void test_cxx11_tensor_casts() { CALL_SUBTEST(test_simple_cast()); + CALL_SUBTEST(test_vectorized_cast()); + CALL_SUBTEST(test_big_to_small_type_cast()); + CALL_SUBTEST(test_small_to_big_type_cast()); } |