diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-27 14:34:07 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-27 14:34:07 -0700 |
commit | c144bb355b74f4600156284e8202fcf9c0c135d8 (patch) | |
tree | 3e35d145c624b544906a25a447e07104960cd77e | |
parent | 35d90e89600ff2524ec8bdd4ef4b95dd7c78b656 (diff) | |
parent | 57472886764ff71ad45338c6538649f7a8fa3d0e (diff) |
Merge with upstream eigen/default
49 files changed, 668 insertions, 556 deletions
diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index dc199ece6..adaf52858 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -10,7 +10,7 @@ #ifndef EIGEN_CHOLMODSUPPORT_H #define EIGEN_CHOLMODSUPPORT_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -79,12 +79,12 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > res.dtype = 0; res.stype = -1; - + if (internal::is_same<_StorageIndex,int>::value) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_StorageIndex,long>::value) + else if (internal::is_same<_StorageIndex,SuiteSparse_long>::value) { res.itype = CHOLMOD_LONG; } @@ -95,9 +95,9 @@ cholmod_sparse viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_StorageIndex> > // setup res.xtype internal::cholmod_configure_matrix<_Scalar>::run(res); - + res.stype = 0; - + return res; } @@ -121,7 +121,7 @@ template<typename _Scalar, int _Options, typename _Index, unsigned int UpLo> cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Scalar,_Options,_Index>, UpLo>& mat) { cholmod_sparse res = viewAsCholmod(Ref<SparseMatrix<_Scalar,_Options,_Index> >(mat.matrix().const_cast_derived())); - + if(UpLo==Upper) res.stype = 1; if(UpLo==Lower) res.stype = -1; // swap stype for rowmajor matrices (only works for real matrices) @@ -168,11 +168,11 @@ namespace internal { #define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ template<typename _StorageIndex> inline ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \ - template<> inline ret cm_ ## name<long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } + template<> inline ret cm_ ## name<SuiteSparse_long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } #define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ template<typename _StorageIndex> inline ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \ - template<> inline ret cm_ ## name<long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } + template<> inline ret cm_ ## name<SuiteSparse_long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } EIGEN_CHOLMOD_SPECIALIZE0(int, start) EIGEN_CHOLMOD_SPECIALIZE0(int, finish) @@ -184,15 +184,15 @@ EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A) EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) template<typename _StorageIndex> inline cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); } -template<> inline cholmod_dense* cm_solve<long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } +template<> inline cholmod_dense* cm_solve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } template<typename _StorageIndex> inline cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); } -template<> inline cholmod_sparse* cm_spsolve<long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } +template<> inline cholmod_sparse* cm_spsolve<SuiteSparse_long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } template<typename _StorageIndex> inline int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); } template<> -inline int cm_factorize_p<long> (cholmod_sparse* A, double beta[2], long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } +inline int cm_factorize_p<SuiteSparse_long> (cholmod_sparse* A, double beta[2], SuiteSparse_long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } #undef EIGEN_CHOLMOD_SPECIALIZE0 #undef EIGEN_CHOLMOD_SPECIALIZE1 @@ -254,10 +254,10 @@ class CholmodBase : public SparseSolverBase<Derived> internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod); internal::cm_finish<StorageIndex>(m_cholmod); } - + inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); } inline StorageIndex rows() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); } - + /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was successful, @@ -276,11 +276,11 @@ class CholmodBase : public SparseSolverBase<Derived> factorize(matrix); return derived(); } - + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. - * + * * \sa factorize() */ void analyzePattern(const MatrixType& matrix) @@ -292,13 +292,13 @@ class CholmodBase : public SparseSolverBase<Derived> } cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>()); m_cholmodFactor = internal::cm_analyze<StorageIndex>(A, m_cholmod); - + this->m_isInitialized = true; this->m_info = Success; m_analysisIsOk = true; m_factorizationIsOk = false; } - + /** Performs a numeric decomposition of \a matrix * * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been performed. @@ -315,11 +315,11 @@ class CholmodBase : public SparseSolverBase<Derived> this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); m_factorizationIsOk = true; } - + /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations. * See the Cholmod user guide for details. */ cholmod_common& cholmod() { return m_cholmod; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template<typename Rhs,typename Dest> @@ -329,7 +329,7 @@ class CholmodBase : public SparseSolverBase<Derived> const Index size = m_cholmodFactor->n; EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); - + // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref. Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived()); @@ -345,7 +345,7 @@ class CholmodBase : public SparseSolverBase<Derived> dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols()); internal::cm_free_dense<StorageIndex>(x_cd, m_cholmod); } - + /** \internal */ template<typename RhsDerived, typename DestDerived> void _solve_impl(const SparseMatrixBase<RhsDerived> &b, SparseMatrixBase<DestDerived> &dest) const @@ -370,8 +370,8 @@ class CholmodBase : public SparseSolverBase<Derived> internal::cm_free_sparse<StorageIndex>(x_cs, m_cholmod); } #endif // EIGEN_PARSED_BY_DOXYGEN - - + + /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization. * * During the numerical factorization, an offset term is added to the diagonal coefficients:\n @@ -386,7 +386,7 @@ class CholmodBase : public SparseSolverBase<Derived> m_shiftOffset[0] = double(offset); return derived(); } - + /** \returns the determinant of the underlying matrix from the current factorization */ Scalar determinant() const { @@ -441,7 +441,7 @@ class CholmodBase : public SparseSolverBase<Derived> template<typename Stream> void dumpMemory(Stream& /*s*/) {} - + protected: mutable cholmod_common m_cholmod; cholmod_factor* m_cholmodFactor; @@ -478,11 +478,11 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl { typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT> Base; using Base::m_cholmod; - + public: - + typedef _MatrixType MatrixType; - + CholmodSimplicialLLT() : Base() { init(); } CholmodSimplicialLLT(const MatrixType& matrix) : Base() @@ -529,11 +529,11 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp { typedef CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT> Base; using Base::m_cholmod; - + public: - + typedef _MatrixType MatrixType; - + CholmodSimplicialLDLT() : Base() { init(); } CholmodSimplicialLDLT(const MatrixType& matrix) : Base() @@ -578,11 +578,11 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper { typedef CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT> Base; using Base::m_cholmod; - + public: - + typedef _MatrixType MatrixType; - + CholmodSupernodalLLT() : Base() { init(); } CholmodSupernodalLLT(const MatrixType& matrix) : Base() @@ -629,11 +629,11 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom { typedef CholmodBase<_MatrixType, _UpLo, CholmodDecomposition> Base; using Base::m_cholmod; - + public: - + typedef _MatrixType MatrixType; - + CholmodDecomposition() : Base() { init(); } CholmodDecomposition(const MatrixType& matrix) : Base() @@ -643,7 +643,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom } ~CholmodDecomposition() {} - + void setMode(CholmodMode mode) { switch(mode) diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index 50406400b..563df6e84 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -66,6 +66,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic,scalar_logistic_op,logistic function,\sa ArrayBase::logistic) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) @@ -89,7 +90,7 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) - + /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. * * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). @@ -124,21 +125,21 @@ namespace Eigen * * Example: \include Cwise_array_power_array.cpp * Output: \verbinclude Cwise_array_power_array.out - * + * * \sa ArrayBase::pow() * * \relates ArrayBase */ template<typename Derived,typename ExponentDerived> inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived> - pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) + pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) { return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); } - + /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. * * This function computes the coefficient-wise power between a scalar and an array of exponents. @@ -147,7 +148,7 @@ namespace Eigen * * Example: \include Cwise_scalar_power_array.cpp * Output: \verbinclude Cwise_scalar_power_array.out - * + * * \sa ArrayBase::pow() * * \relates ArrayBase diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 020f939ad..668922ffc 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> enum { RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime, + InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime, SizeAtCompileTime = Base::SizeAtCompileTime }; @@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors> void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const { #if EIGEN_MAX_ALIGN_BYTES>0 + // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value: + const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime); + EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride); eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0) - || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned"); + || (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned"); #endif } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 4e2e916de..9e66575a9 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -634,13 +634,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a) template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a) { // _mm512_abs_ps intrinsic not found, so hack around it - return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff)); + return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff))); } template <> EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { // _mm512_abs_ps intrinsic not found, so hack around it - return (__m512d)_mm512_and_si512((__m512i)a, - _mm512_set1_epi64(0x7fffffffffffffff)); + return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a), + _mm512_set1_epi64(0x7fffffffffffffff))); } #ifdef EIGEN_VECTORIZE_AVX512DQ diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index bfc046556..c1cc2ab3b 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -701,7 +701,7 @@ template<typename Scalar> struct scalar_isnan_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { #if defined(__SYCL_DEVICE_ONLY__) return numext::isnan(a); -#else +#else return (numext::isnan)(a); #endif } @@ -815,7 +815,7 @@ struct scalar_sign_op<Scalar,true> { template<typename Scalar> struct functor_traits<scalar_sign_op<Scalar> > { enum { - Cost = + Cost = NumTraits<Scalar>::IsComplex ? ( 8*NumTraits<Scalar>::MulCost ) // roughly : ( 3*NumTraits<Scalar>::AddCost), @@ -823,6 +823,34 @@ struct functor_traits<scalar_sign_op<Scalar> > }; }; +/** \internal + * \brief Template functor to compute the logistic function of a scalar + * \sa class CwiseUnaryOp, ArrayBase::logistic() + */ +template <typename T> +struct scalar_logistic_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_logistic_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { + const T one = T(1); + return one / (one + numext::exp(-x)); + } + + template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(const Packet& x) const { + const Packet one = pset1<Packet>(T(1)); + return pdiv(one, padd(one, pexp(pnegate(x)))); + } +}; +template <typename T> +struct functor_traits<scalar_logistic_op<T> > { + enum { + Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6, + PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv && + packet_traits<T>::HasNegate && packet_traits<T>::HasExp + }; +}; + + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index e3231c712..836ff4711 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -405,7 +405,7 @@ template<typename T> struct plain_matrix_type_row_major typedef Matrix<typename traits<T>::Scalar, Rows, Cols, - (MaxCols==1&&MaxRows!=1) ? RowMajor : ColMajor, + (MaxCols==1&&MaxRows!=1) ? ColMajor : RowMajor, MaxRows, MaxCols > type; diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 4bb95eb8b..354e33de5 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat) template<typename Scalar, int Flags, typename Index> MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat) { - eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR - || (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC); + eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR) + || ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC)); Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow; diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 43615bd56..e928db467 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -21,6 +21,7 @@ typedef CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived> AcosReturn typedef CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived> AsinReturnType; typedef CwiseUnaryOp<internal::scalar_atan_op<Scalar>, const Derived> AtanReturnType; typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturnType; +typedef CwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> LogisticReturnType; typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType; typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType; typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType; @@ -335,6 +336,15 @@ cosh() const return CoshReturnType(derived()); } +/** \returns an expression of the coefficient-wise logistic of *this. + */ +EIGEN_DEVICE_FUNC +inline const LogisticReturnType +logistic() const +{ + return LogisticReturnType(derived()); +} + /** \returns an expression of the coefficient-wise inverse of *this. * * Example: \include Cwise_inverse.cpp diff --git a/doc/snippets/DirectionWise_hnormalized.cpp b/doc/snippets/DirectionWise_hnormalized.cpp index 3410790a8..2451f6e7b 100644 --- a/doc/snippets/DirectionWise_hnormalized.cpp +++ b/doc/snippets/DirectionWise_hnormalized.cpp @@ -1,7 +1,6 @@ -typedef Matrix<double,4,Dynamic> Matrix4Xd; Matrix4Xd M = Matrix4Xd::Random(4,5); Projective3d P(Matrix4d::Random()); cout << "The matrix M is:" << endl << M << endl << endl; cout << "M.colwise().hnormalized():" << endl << M.colwise().hnormalized() << endl << endl; cout << "P*M:" << endl << P*M << endl << endl; -cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl;
\ No newline at end of file +cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl; diff --git a/doc/snippets/VectorwiseOp_homogeneous.cpp b/doc/snippets/VectorwiseOp_homogeneous.cpp index aba4fed0e..67cf5737d 100644 --- a/doc/snippets/VectorwiseOp_homogeneous.cpp +++ b/doc/snippets/VectorwiseOp_homogeneous.cpp @@ -1,7 +1,6 @@ -typedef Matrix<double,3,Dynamic> Matrix3Xd; Matrix3Xd M = Matrix3Xd::Random(3,5); Projective3d P(Matrix4d::Random()); cout << "The matrix M is:" << endl << M << endl << endl; cout << "M.colwise().homogeneous():" << endl << M.colwise().homogeneous() << endl << endl; cout << "P * M.colwise().homogeneous():" << endl << P * M.colwise().homogeneous() << endl << endl; -cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl;
\ No newline at end of file +cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl; diff --git a/test/array.cpp b/test/array.cpp index c01653668..d9c4626c0 100644 --- a/test/array.cpp +++ b/test/array.cpp @@ -231,6 +231,7 @@ template<typename ArrayType> void array_real(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + VERIFY_IS_APPROX(m1.logistic(), logistic(m1)); VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY_IS_APPROX(m1.round(), round(m1)); @@ -266,6 +267,7 @@ template<typename ArrayType> void array_real(const ArrayType& m) VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); + VERIFY_IS_APPROX(logistic(m1), (1.0/(1.0+exp(-m1)))); VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast<Scalar>())*std::acos(-1.0)); VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); VERIFY((Eigen::isnan)((m1*0.0)/0.0).all()); @@ -345,6 +347,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m) VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + VERIFY_IS_APPROX(m1.logistic(), logistic(m1)); VERIFY_IS_APPROX(m1.arg(), arg(m1)); VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); @@ -368,6 +371,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m) VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); + VERIFY_IS_APPROX(logistic(m1), (1.0/(1.0 + exp(-m1)))); for (Index i = 0; i < m.rows(); ++i) for (Index j = 0; j < m.cols(); ++j) diff --git a/test/geo_quaternion.cpp b/test/geo_quaternion.cpp index ed801c71b..27219db10 100644 --- a/test/geo_quaternion.cpp +++ b/test/geo_quaternion.cpp @@ -290,6 +290,8 @@ template<typename PlainObjectType> void check_const_correctness(const PlainObjec // Regression for bug 1573 struct MovableClass { + // The following line is a workaround for gcc 4.7 and 4.8 (see bug 1573 comments). + static_assert(std::is_nothrow_move_constructible<Quaternionf>::value,""); MovableClass() = default; MovableClass(const MovableClass&) = default; MovableClass(MovableClass&&) noexcept = default; diff --git a/test/main.h b/test/main.h index de8a4865f..36784b1f4 100644 --- a/test/main.h +++ b/test/main.h @@ -125,7 +125,7 @@ inline void on_temporary_creation(long int size) { if(nb_temporaries!=(N)) { std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; }\ VERIFY( (#XPR) && nb_temporaries==(N) ); \ } - + #endif #include "split_test_helper.h" @@ -328,7 +328,7 @@ namespace Eigen #define VERIFY_RAISES_STATIC_ASSERT(a) \ std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n"; #endif - + #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) #define EIGEN_USE_CUSTOM_ASSERT #endif @@ -845,4 +845,4 @@ int main(int argc, char *argv[]) #ifdef _MSC_VER // 4503 - decorated name length exceeded, name was truncated #pragma warning( disable : 4503) -#endif
\ No newline at end of file +#endif diff --git a/test/meta.cpp b/test/meta.cpp index a6a67b85c..ea9607fe7 100644 --- a/test/meta.cpp +++ b/test/meta.cpp @@ -102,7 +102,13 @@ EIGEN_DECLARE_TEST(meta) } STATIC_CHECK(( !internal::is_convertible<MyInterface, MyImpl>::value )); + #if (!EIGEN_COMP_GNUC_STRICT) || (EIGEN_GNUC_AT_LEAST(4,8)) + // GCC prior to 4.8 fails to compile this test: + // error: cannot allocate an object of abstract type 'MyInterface' + // In other word, it does not obey SFINAE. + // Nevertheless, we don't really care about supporting abstract type as scalar type! STATIC_CHECK(( !internal::is_convertible<MyImpl, MyInterface>::value )); + #endif STATIC_CHECK(( internal::is_convertible<MyImpl, const MyInterface&>::value )); { int i; diff --git a/unsupported/Eigen/CXX11/ThreadPool b/unsupported/Eigen/CXX11/ThreadPool index cbb3bbf2c..1dcc4eb6c 100644 --- a/unsupported/Eigen/CXX11/ThreadPool +++ b/unsupported/Eigen/CXX11/ThreadPool @@ -44,17 +44,27 @@ #include <thread> #include <functional> #include <memory> - #include "src/util/CXX11Meta.h" #include "src/util/MaxSizeVector.h" #include "src/ThreadPool/ThreadLocal.h" +#ifndef EIGEN_THREAD_LOCAL +// There are non-parenthesized calls to "max" in the <unordered_map> header, +// which trigger a check in test/main.h causing compilation to fail. +// We work around the check here by removing the check for max in +// the case where we have to emulate thread_local. +#ifdef max +#undef max +#endif +#include <unordered_map> +#endif #include "src/ThreadPool/ThreadYield.h" #include "src/ThreadPool/ThreadCancel.h" #include "src/ThreadPool/EventCount.h" #include "src/ThreadPool/RunQueue.h" #include "src/ThreadPool/ThreadPoolInterface.h" #include "src/ThreadPool/ThreadEnvironment.h" +#include "src/ThreadPool/Barrier.h" #include "src/ThreadPool/NonBlockingThreadPool.h" #endif @@ -62,4 +72,3 @@ #include <Eigen/src/Core/util/ReenableStupidWarnings.h> #endif // EIGEN_CXX11_THREADPOOL_MODULE - diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index 9ec1ec726..06bf422c5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -189,7 +189,7 @@ struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(TensorBlock* block) { if (TensorEvaluator<LeftArgType, Device>::RawAccess && - m_leftImpl.data() != nullptr) { + m_leftImpl.data() != NULL) { TensorBlock left_block(block->first_coeff_index(), block->block_sizes(), block->tensor_strides(), block->tensor_strides(), m_leftImpl.data() + block->first_coeff_index()); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index ab3731952..9b9d330c1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -200,9 +200,9 @@ class TensorBase<Derived, ReadOnlyAccessors> } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sigmoid_op<Scalar>, const Derived> + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived> sigmoid() const { - return unaryExpr(internal::scalar_sigmoid_op<Scalar>()); + return unaryExpr(internal::scalar_logistic_op<Scalar>()); } EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index f111964dd..6d90af2d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -62,7 +62,7 @@ struct cond<RowMajor> { */ enum TensorBlockShapeType { kUniformAllDims, - kSkewedInnerDims, + kSkewedInnerDims }; struct TensorOpResourceRequirements { @@ -73,7 +73,7 @@ struct TensorOpResourceRequirements { // expression tree (like reductions) to communicate resources // requirements based on local state (like the total number of reductions // to be computed). - TensorOpResourceRequirements(internal::TensorBlockShapeType shape, + TensorOpResourceRequirements(TensorBlockShapeType shape, const Index size) : block_shape(shape), block_total_size(size) {} }; @@ -90,9 +90,9 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements( *block_shape = resources[0].block_shape; *block_total_size = resources[0].block_total_size; for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { - if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && - *block_shape != TensorBlockShapeType::kSkewedInnerDims) { - *block_shape = TensorBlockShapeType::kSkewedInnerDims; + if (resources[i].block_shape == kSkewedInnerDims && + *block_shape != kSkewedInnerDims) { + *block_shape = kSkewedInnerDims; } *block_total_size = numext::maxi(*block_total_size, resources[i].block_total_size); @@ -152,11 +152,11 @@ struct TensorBlockCopyOp { const Scalar* src_base = &src_data[src_index]; Scalar* dst_base = &dst_data[dst_index]; - typedef const Eigen::Array<Scalar, Dynamic, 1> Src; - typedef Eigen::Array<Scalar, Dynamic, 1> Dst; + typedef const Array<Scalar, Dynamic, 1> Src; + typedef Array<Scalar, Dynamic, 1> Dst; - typedef Eigen::Map<Src, 0, InnerStride<>> SrcMap; - typedef Eigen::Map<Dst, 0, InnerStride<>> DstMap; + typedef Map<Src, 0, InnerStride<> > SrcMap; + typedef Map<Dst, 0, InnerStride<> > DstMap; const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride)); DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride)); @@ -178,10 +178,8 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout, bool BlockRead> class TensorBlockIO { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef typename internal::TensorBlockCopyOp<Scalar, StorageIndex> - TensorBlockCopyOp; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockCopyOp<Scalar, StorageIndex> BlockCopyOp; protected: struct BlockIteratorState { @@ -194,7 +192,7 @@ class TensorBlockIO { }; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Copy( - const TensorBlock& block, StorageIndex first_coeff_index, + const Block& block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data, Scalar* dst_data) { @@ -214,11 +212,11 @@ class TensorBlockIO { num_size_one_inner_dims, NumDims - num_size_one_inner_dims - 1); const StorageIndex block_dim_for_tensor_stride1_dim = NumDims == 0 ? 1 : tensor_to_block_dim_map[tensor_stride1_dim]; - Index block_inner_dim_size = + StorageIndex block_inner_dim_size = NumDims == 0 ? 1 : block.block_sizes()[block_dim_for_tensor_stride1_dim]; - for (int i = num_size_one_inner_dims + 1; i < NumDims; ++i) { - const int dim = cond<Layout>()(i, NumDims - i - 1); + for (Index i = num_size_one_inner_dims + 1; i < NumDims; ++i) { + const Index dim = cond<Layout>()(i, NumDims - i - 1); const StorageIndex block_stride = block.block_strides()[tensor_to_block_dim_map[dim]]; if (block_inner_dim_size == block_stride && @@ -260,8 +258,8 @@ class TensorBlockIO { // Initialize block iterator state. Squeeze away any dimension of size 1. int num_squeezed_dims = 0; - for (int i = num_size_one_inner_dims; i < NumDims - 1; ++i) { - const int dim = cond<Layout>()(i + 1, NumDims - i - 2); + for (Index i = num_size_one_inner_dims; i < NumDims - 1; ++i) { + const Index dim = cond<Layout>()(i + 1, NumDims - i - 2); const StorageIndex size = block.block_sizes()[tensor_to_block_dim_map[dim]]; if (size == 1) { continue; @@ -290,8 +288,8 @@ class TensorBlockIO { const StorageIndex block_total_size = NumDims == 0 ? 1 : block.block_sizes().TotalSize(); for (StorageIndex i = 0; i < block_total_size; i += block_inner_dim_size) { - TensorBlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, - dst_data, inputIndex, input_stride, src_data); + BlockCopyOp::Run(block_inner_dim_size, outputIndex, output_stride, + dst_data, inputIndex, input_stride, src_data); // Update index. for (int j = 0; j < num_squeezed_dims; ++j) { if (++block_iter_state[j].count < block_iter_state[j].size) { @@ -320,13 +318,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> - Base; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/true> Base; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, const Scalar* src_data) { + Block* block, const Scalar* src_data) { array<StorageIndex, NumDims> tensor_to_block_dim_map; for (int i = 0; i < NumDims; ++i) { tensor_to_block_dim_map[i] = i; @@ -336,7 +332,7 @@ class TensorBlockReader : public TensorBlockIO<Scalar, StorageIndex, NumDims, } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - TensorBlock* block, StorageIndex first_coeff_index, + Block* block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, const Scalar* src_data) { Base::Copy(*block, first_coeff_index, tensor_to_block_dim_map, @@ -357,13 +353,11 @@ template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; - typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> - Base; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; + typedef TensorBlockIO<Scalar, StorageIndex, NumDims, Layout, /*BlockRead=*/false> Base; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, Scalar* dst_data) { + const Block& block, Scalar* dst_data) { array<StorageIndex, NumDims> tensor_to_block_dim_map; for (int i = 0; i < NumDims; ++i) { tensor_to_block_dim_map[i] = i; @@ -373,7 +367,7 @@ class TensorBlockWriter : public TensorBlockIO<Scalar, StorageIndex, NumDims, } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run( - const TensorBlock& block, StorageIndex first_coeff_index, + const Block& block, StorageIndex first_coeff_index, const array<StorageIndex, NumDims>& tensor_to_block_dim_map, const array<StorageIndex, NumDims>& tensor_strides, Scalar* dst_data) { Base::Copy(block, first_coeff_index, tensor_to_block_dim_map, @@ -542,13 +536,13 @@ struct TensorBlockCwiseBinaryOp { const StorageIndex left_stride, const LeftScalar* left_data, const StorageIndex right_index, const StorageIndex right_stride, const RightScalar* right_data) { - typedef const Eigen::Array<LeftScalar, Dynamic, 1> Lhs; - typedef const Eigen::Array<RightScalar, Dynamic, 1> Rhs; - typedef Eigen::Array<OutputScalar, Dynamic, 1> Out; + typedef const Array<LeftScalar, Dynamic, 1> Lhs; + typedef const Array<RightScalar, Dynamic, 1> Rhs; + typedef Array<OutputScalar, Dynamic, 1> Out; - typedef Eigen::Map<Lhs, 0, InnerStride<>> LhsMap; - typedef Eigen::Map<Rhs, 0, InnerStride<>> RhsMap; - typedef Eigen::Map<Out, 0, InnerStride<>> OutMap; + typedef Map<Lhs, 0, InnerStride<> > LhsMap; + typedef Map<Rhs, 0, InnerStride<> > RhsMap; + typedef Map<Out, 0, InnerStride<> > OutMap; const LeftScalar* lhs_base = &left_data[left_index]; const RightScalar* rhs_base = &right_data[right_index]; @@ -558,8 +552,7 @@ struct TensorBlockCwiseBinaryOp { const RhsMap rhs(rhs_base, num_coeff, InnerStride<>(right_stride)); OutMap out(out_base, num_coeff, InnerStride<>(output_stride)); - out = - Eigen::CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor); + out = CwiseBinaryOp<BinaryFunctor, LhsMap, RhsMap>(lhs, rhs, functor); } }; @@ -575,8 +568,7 @@ struct TensorBlockCwiseBinaryOp { template <typename BinaryFunctor, typename StorageIndex, typename OutputScalar, int NumDims, int Layout> struct TensorBlockCwiseBinaryIO { - typedef typename internal::TensorBlock<OutputScalar, StorageIndex, NumDims, - Layout>::Dimensions Dimensions; + typedef typename TensorBlock<OutputScalar, StorageIndex, NumDims, Layout>::Dimensions Dimensions; struct BlockIteratorState { StorageIndex output_stride, output_span; @@ -642,7 +634,7 @@ struct TensorBlockCwiseBinaryIO { if (size == 1) { continue; } - auto& state = block_iter_state[num_squeezed_dims]; + BlockIteratorState& state = block_iter_state[num_squeezed_dims]; state.output_stride = block_strides[dim]; state.left_stride = left_strides[dim]; state.right_stride = right_strides[dim]; @@ -664,7 +656,7 @@ struct TensorBlockCwiseBinaryIO { right_stride, right_data); // Update index. for (int j = 0; j < num_squeezed_dims; ++j) { - auto& state = block_iter_state[j]; + BlockIteratorState& state = block_iter_state[j]; if (++state.count < state.size) { output_index += state.output_stride; left_index += state.left_stride; @@ -768,15 +760,14 @@ struct TensorBlockView { template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorBlockMapper { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; typedef DSizes<StorageIndex, NumDims> Dimensions; TensorBlockMapper(const Dimensions& dims, const TensorBlockShapeType block_shape, Index min_target_size) : m_dimensions(dims), - m_block_dim_sizes(BlockDimensions(dims, block_shape, min_target_size)) { + m_block_dim_sizes(BlockDimensions(dims, block_shape, internal::convert_index<StorageIndex>(min_target_size))) { // Calculate block counts by dimension and total block count. DSizes<StorageIndex, NumDims> block_count; for (Index i = 0; i < block_count.rank(); ++i) { @@ -804,7 +795,7 @@ class TensorBlockMapper { } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block GetBlockForIndex(StorageIndex block_index, Scalar* data) const { StorageIndex first_coeff_index = 0; DSizes<StorageIndex, NumDims> coords; @@ -852,8 +843,7 @@ class TensorBlockMapper { } } - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); + return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { @@ -868,8 +858,8 @@ class TensorBlockMapper { private: static Dimensions BlockDimensions(const Dimensions& tensor_dims, const TensorBlockShapeType block_shape, - Index min_target_size) { - min_target_size = numext::maxi<Index>(1, min_target_size); + StorageIndex min_target_size) { + min_target_size = numext::maxi<StorageIndex>(1, min_target_size); // If tensor fully fits into the target size, we'll treat it a single block. Dimensions block_dim_sizes = tensor_dims; @@ -883,12 +873,12 @@ class TensorBlockMapper { block_dim_sizes[i] = 1; } } else if (block_dim_sizes.TotalSize() > min_target_size) { - if (block_shape == TensorBlockShapeType::kUniformAllDims) { + if (block_shape == kUniformAllDims) { // Tensor will not fit within 'min_target_size' budget: calculate tensor // block dimension sizes based on "square" dimension size target. - const Index dim_size_target = static_cast<Index>( - std::pow(static_cast<float>(min_target_size), - 1.0 / static_cast<float>(block_dim_sizes.rank()))); + const StorageIndex dim_size_target = internal::convert_index<StorageIndex>( + std::pow(static_cast<float>(min_target_size), + 1.0f / static_cast<float>(block_dim_sizes.rank()))); for (Index i = 0; i < block_dim_sizes.rank(); ++i) { // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it // a multiple of the packet size. Note that reducing @@ -913,7 +903,7 @@ class TensorBlockMapper { total_size = total_size_other_dims * block_dim_sizes[dim]; } } - } else if (block_shape == TensorBlockShapeType::kSkewedInnerDims) { + } else if (block_shape == kSkewedInnerDims) { StorageIndex coeff_to_allocate = min_target_size; for (int i = 0; i < NumDims; ++i) { const int dim = cond<Layout>()(i, NumDims - i - 1); @@ -929,8 +919,9 @@ class TensorBlockMapper { } } - eigen_assert(block_dim_sizes.TotalSize() >= - numext::mini<Index>(min_target_size, tensor_dims.TotalSize())); + eigen_assert( + block_dim_sizes.TotalSize() >= + numext::mini<Index>(min_target_size, tensor_dims.TotalSize())); return block_dim_sizes; } @@ -957,8 +948,7 @@ class TensorBlockMapper { template <typename Scalar, typename StorageIndex, int NumDims, int Layout> class TensorSliceBlockMapper { public: - typedef typename internal::TensorBlock<Scalar, StorageIndex, NumDims, Layout> - TensorBlock; + typedef TensorBlock<Scalar, StorageIndex, NumDims, Layout> Block; typedef DSizes<StorageIndex, NumDims> Dimensions; TensorSliceBlockMapper(const Dimensions& tensor_dims, @@ -974,7 +964,7 @@ class TensorSliceBlockMapper { m_total_block_count(1) { // Calculate block counts by dimension and total block count. DSizes<StorageIndex, NumDims> block_count; - for (size_t i = 0; i < block_count.rank(); ++i) { + for (Index i = 0; i < block_count.rank(); ++i) { block_count[i] = divup(m_tensor_slice_extents[i], m_block_dim_sizes[i]); } m_total_block_count = array_prod(block_count); @@ -999,7 +989,7 @@ class TensorSliceBlockMapper { } } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block GetBlockForIndex(StorageIndex block_index, Scalar* data) const { StorageIndex first_coeff_index = 0; DSizes<StorageIndex, NumDims> coords; @@ -1056,8 +1046,7 @@ class TensorSliceBlockMapper { } } - return TensorBlock(first_coeff_index, sizes, strides, m_tensor_strides, - data); + return Block(first_coeff_index, sizes, strides, m_tensor_strides, data); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE StorageIndex total_block_count() const { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 5e812b04d..02d061a9c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; - bool isCopy= false, nByOne = false, oneByN = false; + bool isCopy, nByOne, oneByN; enum { IsAligned = true, @@ -134,9 +134,10 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_device(device), - m_broadcast(op.broadcast()), - m_impl(op.expression(), device) { + : isCopy(false), nByOne(false), oneByN(false), + m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device) + { + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // and store the result in a scalar. Instead one should reshape the scalar into a a N-D // tensor with N >= 1 of 1 element first and then broadcast. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index c459fc649..f0f61fade 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -152,13 +152,7 @@ struct TensorContractionParams { // 1. Elementwise Relu transformation following Conv2D. // 2. AddBias to the Conv2D output channels dimension. // -// See expected implementation in NoOpOutputKernel. -struct OutputKernel { - template <typename Index, typename Scalar> - using OutputMapper = internal::blas_data_mapper<Scalar, Index, ColMajor>; -}; - -// Output kernel that does absolutely nothing. +// The NoOpOutputKernel implements an output kernel that does absolutely nothing. struct NoOpOutputKernel { /** * Tensor contraction evaluator calls this kernel after finishing each block @@ -177,7 +171,7 @@ struct NoOpOutputKernel { */ template <typename Index, typename Scalar> EIGEN_ALWAYS_INLINE void operator()( - const OutputKernel::OutputMapper<Index, Scalar>& /*output_mapper*/, + const internal::blas_data_mapper<Scalar, Index, ColMajor>& /*output_mapper*/, const TensorContractionParams& /*params*/, Index /*i*/, Index /*j*/, Index /*num_rows*/, Index /*num_cols*/) const {} }; @@ -354,7 +348,7 @@ struct TensorContractionEvaluatorBase // dimensions and right non-contracting dimensions. m_lhs_inner_dim_contiguous = true; int dim_idx = 0; - unsigned int nocontract_idx = 0; + Index nocontract_idx = 0; for (int i = 0; i < LDims; i++) { // find if we are contracting on index i of left tensor @@ -667,7 +661,7 @@ struct TensorContractionEvaluatorBase // call gebp (matrix kernel) // The parameters here are copied from Eigen's GEMM implementation - const auto output_mapper = output.getSubMapper(i2, j2); + const OutputMapper output_mapper = output.getSubMapper(i2, j2); gebp(output_mapper, blockA, blockB, actual_mc, actual_kc, actual_nc, Scalar(1), -1, -1, 0, 0); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index d71b2e34b..6ee3827f3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -88,6 +88,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; + typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; enum { IsAligned = false, @@ -107,12 +108,12 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { if (data) { evalTo(data); return false; } else { - m_result = static_cast<CoeffReturnType*>( + m_result = static_cast<PointerT>( m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; @@ -140,23 +141,22 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC typename Eigen::internal::traits<XprType>::PointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } #ifdef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } #endif protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result( - data, m_dimensions); + EIGEN_DEVICE_FUNC void evalTo(PointerT data) { + TensorMap<Tensor<CoeffReturnType, NumDims, Layout, Index> > result(data, m_dimensions); m_op.func().eval(m_op.expression(), result, m_device); } Dimensions m_dimensions; const ArgType m_op; const Device& m_device; - CoeffReturnType* m_result; + PointerT m_result; }; @@ -251,6 +251,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType; typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; static const int PacketSize = PacketType<CoeffReturnType, Device>::size; + typedef typename PointerType<CoeffReturnType, Device>::Type PointerT; enum { IsAligned = false, @@ -270,12 +271,12 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(PointerT data) { if (data) { evalTo(data); return false; } else { - m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); + m_result = static_cast<PointerT>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(CoeffReturnType))); evalTo(m_result); return true; } @@ -302,22 +303,22 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC typename internal::traits<XprType>::PointerType data() const { return m_result; } + EIGEN_DEVICE_FUNC PointerT data() const { return m_result; } #ifdef EIGEN_USE_SYCL EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device& device() const { return m_device; } #endif protected: - EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { - TensorMap<Tensor<Scalar, NumDims, Layout> > result(data, m_dimensions); + EIGEN_DEVICE_FUNC void evalTo(PointerT data) { + TensorMap<Tensor<CoeffReturnType, NumDims, Layout> > result(data, m_dimensions); m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); } Dimensions m_dimensions; const XprType m_op; const Device& m_device; - CoeffReturnType* m_result; + PointerT m_result; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index cc134228a..6fc6688d3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -12,56 +12,6 @@ namespace Eigen { -// Barrier is an object that allows one or more threads to wait until -// Notify has been called a specified number of times. -class Barrier { - public: - Barrier(unsigned int count) : state_(count << 1), notified_(false) { - eigen_assert(((count << 1) >> 1) == count); - } - ~Barrier() { - eigen_assert((state_>>1) == 0); - } - - void Notify() { - unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; - if (v != 1) { - eigen_assert(((v + 2) & ~1) != 0); - return; // either count has not dropped to 0, or waiter is not waiting - } - std::unique_lock<std::mutex> l(mu_); - eigen_assert(!notified_); - notified_ = true; - cv_.notify_all(); - } - - void Wait() { - unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); - if ((v >> 1) == 0) return; - std::unique_lock<std::mutex> l(mu_); - while (!notified_) { - cv_.wait(l); - } - } - - private: - std::mutex mu_; - std::condition_variable cv_; - std::atomic<unsigned int> state_; // low bit is waiter flag - bool notified_; -}; - - -// Notification is an object that allows a user to to wait for another -// thread to signal a notification that an event has occurred. -// -// Multiple threads can wait on the same Notification object, -// but only one caller must call Notify() on the object. -struct Notification : Barrier { - Notification() : Barrier(1) {}; -}; - - // Runs an arbitrary function and then calls Notify() on the passed in // Notification. template <typename Function, typename... Args> struct FunctionWrapperWithNotification @@ -102,7 +52,7 @@ class Allocator { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr) + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = NULL) : pool_(pool), num_threads_(num_cores), allocator_(allocator) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { @@ -282,7 +232,7 @@ struct ThreadPoolDevice { // Convenience wrapper for parallelFor that does not align blocks. void parallelFor(Index n, const TensorOpCost& cost, std::function<void(Index, Index)> f) const { - parallelFor(n, cost, nullptr, std::move(f)); + parallelFor(n, cost, NULL, std::move(f)); } // Thread pool accessor. diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h index 4f973a5b7..ce91bc2a6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -32,12 +32,12 @@ namespace Eigen { // Boilerplate code namespace internal { -template<std::size_t n, typename Dimension> struct dget { +template<std::ptrdiff_t n, typename Dimension> struct dget { static const std::ptrdiff_t value = get<n, Dimension>::value; }; -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor> struct fixed_size_tensor_index_linearization_helper { template <typename Dimensions> EIGEN_DEVICE_FUNC @@ -50,7 +50,7 @@ struct fixed_size_tensor_index_linearization_helper } }; -template<typename Index, std::size_t NumIndices, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor> struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> { template <typename Dimensions> EIGEN_DEVICE_FUNC @@ -60,7 +60,7 @@ struct fixed_size_tensor_index_linearization_helper<Index, NumIndices, 0, RowMaj } }; -template<typename Index, std::size_t n> +template<typename Index, std::ptrdiff_t n> struct fixed_size_tensor_index_extraction_helper { template <typename Dimensions> EIGEN_DEVICE_FUNC @@ -94,7 +94,7 @@ struct Sizes { typedef internal::numeric_list<std::ptrdiff_t, Indices...> Base; const Base t = Base(); static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); - static const size_t count = Base::count; + static const ptrdiff_t count = Base::count; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { return Base::count; @@ -121,16 +121,16 @@ struct Sizes { return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::ptrdiff_t index) const { return internal::fixed_size_tensor_index_extraction_helper<std::ptrdiff_t, Base::count>::run(index, t); } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, t); } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, t); } }; @@ -144,25 +144,25 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<Indi #else -template <std::size_t n> +template <std::ptrdiff_t n> struct non_zero_size { - typedef internal::type2val<std::size_t, n> type; + typedef internal::type2val<std::ptrdiff_t, n> type; }; template <> struct non_zero_size<0> { typedef internal::null_type type; }; -template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0, std::size_t V5=0> struct Sizes { +template <std::ptrdiff_t V1=0, std::ptrdiff_t V2=0, std::ptrdiff_t V3=0, std::ptrdiff_t V4=0, std::ptrdiff_t V5=0> struct Sizes { typedef typename internal::make_type_list<typename non_zero_size<V1>::type, typename non_zero_size<V2>::type, typename non_zero_size<V3>::type, typename non_zero_size<V4>::type, typename non_zero_size<V5>::type >::type Base; - static const size_t count = Base::count; - static const std::size_t total_size = internal::arg_prod<Base>::value; + static const std::ptrdiff_t count = Base::count; + static const std::ptrdiff_t total_size = internal::arg_prod<Base>::value; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t rank() const { return count; } - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ptrdiff_t TotalSize() { return internal::arg_prod<Base>::value; } @@ -178,7 +178,7 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 #if EIGEN_HAS_VARIADIC_TEMPLATES template <typename... DenseIndex> Sizes(DenseIndex... /*indices*/) { } - explicit Sizes(std::initializer_list<std::size_t>) { + explicit Sizes(std::initializer_list<std::ptrdiff_t>) { // todo: add assertion } #else @@ -213,18 +213,18 @@ template <std::size_t V1=0, std::size_t V2=0, std::size_t V3=0, std::size_t V4=0 } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfColMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, false>::run(indices, *reinterpret_cast<const Base*>(this)); } template <typename DenseIndex> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - size_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { + ptrdiff_t IndexOfRowMajor(const array<DenseIndex, Base::count>& indices) const { return internal::fixed_size_tensor_index_linearization_helper<DenseIndex, Base::count, Base::count, true>::run(indices, *reinterpret_cast<const Base*>(this)); } }; namespace internal { -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { +template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes<V1, V2, V3, V4, V5>&) { return Sizes<V1, V2, V3, V4, V5>::total_size; } } @@ -233,7 +233,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes<V1, V2, // Boilerplate namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor> struct tensor_index_linearization_helper { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -245,7 +245,7 @@ struct tensor_index_linearization_helper } }; -template<typename Index, std::size_t NumIndices, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor> struct tensor_index_linearization_helper<Index, NumIndices, 0, RowMajor> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -264,7 +264,7 @@ struct DSizes : array<DenseIndex, NumDims> { typedef array<DenseIndex, NumDims> Base; static const int count = NumDims; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumDims; } @@ -298,7 +298,7 @@ struct DSizes : array<DenseIndex, NumDims> { } } #else - template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> + template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC DSizes(const Sizes<V1, V2, V3, V4, V5>& a) { for (int i = 0 ; i < NumDims; ++i) { (*this)[i] = a[i]; @@ -359,7 +359,7 @@ struct DSizes : array<DenseIndex, NumDims> { // Boilerplate namespace internal { -template<typename Index, std::size_t NumIndices, std::size_t n, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, std::ptrdiff_t n, bool RowMajor> struct tensor_vsize_index_linearization_helper { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -371,7 +371,7 @@ struct tensor_vsize_index_linearization_helper } }; -template<typename Index, std::size_t NumIndices, bool RowMajor> +template<typename Index, std::ptrdiff_t NumIndices, bool RowMajor> struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -386,10 +386,10 @@ struct tensor_vsize_index_linearization_helper<Index, NumIndices, 0, RowMajor> namespace internal { template <typename DenseIndex, int NumDims> struct array_size<const DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; + static const ptrdiff_t value = NumDims; }; template <typename DenseIndex, int NumDims> struct array_size<DSizes<DenseIndex, NumDims> > { - static const size_t value = NumDims; + static const ptrdiff_t value = NumDims; }; #ifndef EIGEN_EMULATE_CXX11_META_H template <typename std::ptrdiff_t... Indices> struct array_size<const Sizes<Indices...> > { @@ -399,33 +399,33 @@ template <typename std::ptrdiff_t... Indices> struct array_size<Sizes<Indices... static const std::ptrdiff_t value = Sizes<Indices...>::count; }; template <std::ptrdiff_t n, typename std::ptrdiff_t... Indices> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<Indices...>&) { - return get<n, internal::numeric_list<std::size_t, Indices...> >::value; + return get<n, internal::numeric_list<std::ptrdiff_t, Indices...> >::value; } template <std::ptrdiff_t n> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { eigen_assert(false && "should never be called"); return -1; } #else -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; +template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<const Sizes<V1,V2,V3,V4,V5> > { + static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count; }; -template <std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { - static const size_t value = Sizes<V1,V2,V3,V4,V5>::count; +template <std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> struct array_size<Sizes<V1,V2,V3,V4,V5> > { + static const ptrdiff_t value = Sizes<V1,V2,V3,V4,V5>::count; }; -template <std::size_t n, std::size_t V1, std::size_t V2, std::size_t V3, std::size_t V4, std::size_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { +template <std::ptrdiff_t n, std::ptrdiff_t V1, std::ptrdiff_t V2, std::ptrdiff_t V3, std::ptrdiff_t V4, std::ptrdiff_t V5> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<V1,V2,V3,V4,V5>&) { return get<n, typename Sizes<V1,V2,V3,V4,V5>::Base>::value; } #endif -template <typename Dims1, typename Dims2, size_t n, size_t m> +template <typename Dims1, typename Dims2, ptrdiff_t n, ptrdiff_t m> struct sizes_match_below_dim { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1&, Dims2&) { return false; } }; -template <typename Dims1, typename Dims2, size_t n> +template <typename Dims1, typename Dims2, ptrdiff_t n> struct sizes_match_below_dim<Dims1, Dims2, n, n> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Dims1& dims1, Dims2& dims2) { return (array_get<n-1>(dims1) == array_get<n-1>(dims2)) & diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index d9b61dc70..ba5ab1396 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -133,7 +133,7 @@ class TensorExecutor<Expression, DefaultDevice, Vectorizable, if (needs_assign) { // Size tensor blocks to fit in cache (or requested target block size). Index block_total_size = numext::mini(cache_size, total_size); - TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; + TensorBlockShapeType block_shape = kSkewedInnerDims; // Query expression tree for desired block size/shape. std::vector<TensorOpResourceRequirements> resources; evaluator.getResourceRequirements(&resources); @@ -229,12 +229,8 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, Tileable> { typedef EvalRange<Evaluator, StorageIndex, Vectorizable> EvalRange; Evaluator evaluator(expr, device); - const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - const StorageIndex PacketSize = - Vectorizable - ? unpacket_traits<typename Evaluator::PacketReturnType>::size - : 1; const StorageIndex size = array_prod(evaluator.dimensions()); device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), EvalRange::alignBlockSize, @@ -259,12 +255,11 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr static EIGEN_STRONG_INLINE void run(const Expression& expr, const ThreadPoolDevice& device) { - typedef TensorBlock<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlock; typedef TensorBlockMapper<ScalarNoConst, StorageIndex, NumDims, Evaluator::Layout> TensorBlockMapper; Evaluator evaluator(expr, device); - StorageIndex total_size = array_prod(evaluator.dimensions()); - StorageIndex cache_size = device.firstLevelCacheSize() / sizeof(Scalar); + Index total_size = array_prod(evaluator.dimensions()); + Index cache_size = device.firstLevelCacheSize() / sizeof(Scalar); if (total_size < cache_size) { // TODO(andydavis) Reduce block management overhead for small tensors. internal::TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, @@ -273,9 +268,9 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable, /*Tileable*/ tr return; } - const bool needs_assign = evaluator.evalSubExprsIfNeeded(nullptr); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); if (needs_assign) { - TensorBlockShapeType block_shape = TensorBlockShapeType::kSkewedInnerDims; + TensorBlockShapeType block_shape = kSkewedInnerDims; Index block_total_size = 0; // Query expression tree for desired block size/shape. std::vector<internal::TensorOpResourceRequirements> resources; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index b7a0193fe..04a8b953d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -24,6 +24,14 @@ template<typename T> struct MakePointer { typedef T ScalarType; }; +// The PointerType class is a container of the device specefic pointer +// used for refering to a Pointer on TensorEvaluator class. While the TensorExpression +// is a device-agnostic type and need MakePointer class for type conversion, +// the TensorEvaluator calss can be specialized for a device, hence it is possible +// to construct different types of temproray storage memory in TensorEvaluator +// for different devices by specializing the following PointerType class. +template<typename T, typename Device> struct PointerType : MakePointer<T>{}; + namespace internal{ template<typename A, typename B> struct Pointer_type_promotion { static const bool val=false; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 7ecd4d1ac..cd666c173 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -54,36 +54,6 @@ struct functor_traits<scalar_fmod_op<Scalar> > { PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the sigmoid of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sigmoid() - */ -template <typename T> -struct scalar_sigmoid_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { - const T one = T(1); - return one / (one + numext::exp(-x)); - } - - template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Packet packetOp(const Packet& x) const { - const Packet one = pset1<Packet>(T(1)); - return pdiv(one, padd(one, pexp(pnegate(x)))); - } -}; - -template <typename T> -struct functor_traits<scalar_sigmoid_op<T> > { - enum { - Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 6, - PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasDiv && - packet_traits<T>::HasNegate && packet_traits<T>::HasExp - }; -}; - - template<typename Reducer, typename Device> struct reducer_traits { enum { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h index 98ad661ca..3f7d26b18 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -84,7 +84,7 @@ template<DenseIndex n> struct NumTraits<type2index<n> > namespace internal { template <typename T> EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { - val = new_val; + val = internal::convert_index<T>(new_val); } template <DenseIndex n> EIGEN_DEVICE_FUNC void update_value(type2index<n>& val, DenseIndex new_val) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index a32743677..2f765acb7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -527,7 +527,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) { - for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { + for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) { eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); } @@ -985,7 +985,7 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped; m_is_identity = true; - for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) { + for (Index i = 0; i < internal::array_size<Dimensions>::value; ++i) { if (m_strides[i] != 1 || op.startIndices()[i] != 0 || op.stopIndices()[i] != (m_impl.dimensions()[i] - 1)) { m_is_identity = false; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h new file mode 100644 index 000000000..ef5e9ff18 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2018 Rasmus Munk Larsen <rmlarsen@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Barrier is an object that allows one or more threads to wait until +// Notify has been called a specified number of times. + +#ifndef EIGEN_CXX11_THREADPOOL_BARRIER_H +#define EIGEN_CXX11_THREADPOOL_BARRIER_H + +namespace Eigen { + +class Barrier { + public: + Barrier(unsigned int count) : state_(count << 1), notified_(false) { + eigen_assert(((count << 1) >> 1) == count); + } + ~Barrier() { eigen_plain_assert((state_ >> 1) == 0); } + + void Notify() { + unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; + if (v != 1) { + eigen_assert(((v + 2) & ~1) != 0); + return; // either count has not dropped to 0, or waiter is not waiting + } + std::unique_lock<std::mutex> l(mu_); + eigen_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void Wait() { + unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); + if ((v >> 1) == 0) return; + std::unique_lock<std::mutex> l(mu_); + while (!notified_) { + cv_.wait(l); + } + } + + private: + std::mutex mu_; + std::condition_variable cv_; + std::atomic<unsigned int> state_; // low bit is waiter flag + bool notified_; +}; + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object, +// but only one caller must call Notify() on the object. +struct Notification : Barrier { + Notification() : Barrier(1){}; +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_THREADPOOL_BARRIER_H diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h index 0a7181102..22c952ae1 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h @@ -58,7 +58,7 @@ class EventCount { ~EventCount() { // Ensure there are no waiters. - eigen_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask); + eigen_plain_assert((state_.load() & (kStackMask | kWaiterMask)) == kStackMask); } // Prewait prepares for waiting. @@ -169,7 +169,8 @@ class EventCount { class Waiter { friend class EventCount; - // Align to 128 byte boundary to prevent false sharing with other Waiter objects in the same vector. + // Align to 128 byte boundary to prevent false sharing with other Waiter + // objects in the same vector. EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<Waiter*> next; std::mutex mu; std::condition_variable cv; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index ecd49f382..60a0c9fb6 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -10,7 +10,6 @@ #ifndef EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H #define EIGEN_CXX11_THREADPOOL_NONBLOCKING_THREAD_POOL_H - namespace Eigen { template <typename Environment> @@ -23,7 +22,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { : ThreadPoolTempl(num_threads, true, env) {} ThreadPoolTempl(int num_threads, bool allow_spinning, - Environment env = Environment()) + Environment env = Environment()) : env_(env), num_threads_(num_threads), allow_spinning_(allow_spinning), @@ -58,12 +57,18 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { coprimes_.push_back(i); } } + queues_.resize(num_threads_); +#ifndef EIGEN_THREAD_LOCAL + init_barrier_.reset(new Barrier(num_threads_)); +#endif for (int i = 0; i < num_threads_; i++) { - queues_.push_back(new Queue()); - } - for (int i = 0; i < num_threads_; i++) { - threads_.push_back(env_.CreateThread([this, i]() { WorkerLoop(i); })); + threads_.emplace_back(env_.CreateThread([this, i]() { WorkerLoop(i); })); } +#ifndef EIGEN_THREAD_LOCAL + // Wait for workers to initialize per_thread_map_. Otherwise we might race + // with them in Schedule or CurrentThreadId. + init_barrier_->Wait(); +#endif } ~ThreadPoolTempl() { @@ -78,13 +83,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { // Since we were cancelled, there might be entries in the queues. // Empty them to prevent their destructor from asserting. for (size_t i = 0; i < queues_.size(); i++) { - queues_[i]->Flush(); + queues_[i].Flush(); } } // Join threads explicitly to avoid destruction order issues. - for (size_t i = 0; i < num_threads_; i++) delete threads_[i]; - for (size_t i = 0; i < num_threads_; i++) delete queues_[i]; + threads_.resize(0); + queues_.resize(0); } void Schedule(std::function<void()> fn) { @@ -92,13 +97,13 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { PerThread* pt = GetPerThread(); if (pt->pool == this) { // Worker thread of this pool, push onto the thread's queue. - Queue* q = queues_[pt->thread_id]; - t = q->PushFront(std::move(t)); + Queue& q = queues_[pt->thread_id]; + t = q.PushFront(std::move(t)); } else { // A free-standing thread (or worker of another pool), push onto a random // queue. - Queue* q = queues_[Rand(&pt->rand) % queues_.size()]; - t = q->PushBack(std::move(t)); + Queue& q = queues_[Rand(&pt->rand) % queues_.size()]; + t = q.PushBack(std::move(t)); } // Note: below we touch this after making w available to worker threads. // Strictly speaking, this can lead to a racy-use-after-free. Consider that @@ -109,8 +114,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { // this is kept alive while any threads can potentially be in Schedule. if (!t.f) { ec_.Notify(false); - } - else { + } else { env_.ExecuteTask(t); // Push failed, execute directly. } } @@ -130,13 +134,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { ec_.Notify(true); } - int NumThreads() const final { - return num_threads_; - } + int NumThreads() const final { return num_threads_; } int CurrentThreadId() const final { - const PerThread* pt = - const_cast<ThreadPoolTempl*>(this)->GetPerThread(); + const PerThread* pt = const_cast<ThreadPoolTempl*>(this)->GetPerThread(); if (pt->pool == this) { return pt->thread_id; } else { @@ -148,17 +149,21 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { typedef typename Environment::EnvThread Thread; struct PerThread { - constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) { } + constexpr PerThread() : pool(NULL), rand(0), thread_id(-1) {} ThreadPoolTempl* pool; // Parent pool, or null for normal threads. - uint64_t rand; // Random generator state. - int thread_id; // Worker thread index in pool. + uint64_t rand; // Random generator state. + int thread_id; // Worker thread index in pool. +#ifndef EIGEN_THREAD_LOCAL + // Prevent false sharing. + char pad_[128]; +#endif }; Environment env_; const int num_threads_; const bool allow_spinning_; - MaxSizeVector<Thread*> threads_; - MaxSizeVector<Queue*> queues_; + MaxSizeVector<std::unique_ptr<Thread> > threads_; + MaxSizeVector<Queue> queues_; MaxSizeVector<unsigned> coprimes_; MaxSizeVector<EventCount::Waiter> waiters_; std::atomic<unsigned> blocked_; @@ -166,14 +171,27 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { std::atomic<bool> done_; std::atomic<bool> cancelled_; EventCount ec_; +#ifndef EIGEN_THREAD_LOCAL + std::unique_ptr<Barrier> init_barrier_; + std::mutex per_thread_map_mutex_; // Protects per_thread_map_. + std::unordered_map<uint64_t, std::unique_ptr<PerThread>> per_thread_map_; +#endif // Main worker thread loop. void WorkerLoop(int thread_id) { +#ifndef EIGEN_THREAD_LOCAL + std::unique_ptr<PerThread> new_pt(new PerThread()); + per_thread_map_mutex_.lock(); + eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second); + per_thread_map_mutex_.unlock(); + init_barrier_->Notify(); + init_barrier_->Wait(); +#endif PerThread* pt = GetPerThread(); pt->pool = this; - pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id()); + pt->rand = GlobalThreadIdHash(); pt->thread_id = thread_id; - Queue* q = queues_[thread_id]; + Queue& q = queues_[thread_id]; EventCount::Waiter* waiter = &waiters_[thread_id]; // TODO(dvyukov,rmlarsen): The time spent in Steal() is proportional // to num_threads_ and we assume that new work is scheduled at a @@ -189,10 +207,10 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { // counter-productive for the types of I/O workloads the single thread // pools tend to be used for. while (!cancelled_) { - Task t = q->PopFront(); + Task t = q.PopFront(); for (int i = 0; i < spin_count && !t.f; i++) { if (!cancelled_.load(std::memory_order_relaxed)) { - t = q->PopFront(); + t = q.PopFront(); } } if (!t.f) { @@ -206,7 +224,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { } } else { while (!cancelled_) { - Task t = q->PopFront(); + Task t = q.PopFront(); if (!t.f) { t = Steal(); if (!t.f) { @@ -243,7 +261,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { unsigned inc = coprimes_[r % coprimes_.size()]; unsigned victim = r % size; for (unsigned i = 0; i < size; i++) { - Task t = queues_[victim]->PopBack(); + Task t = queues_[victim].PopBack(); if (t.f) { return t; } @@ -270,7 +288,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { if (cancelled_) { return false; } else { - *t = queues_[victim]->PopBack(); + *t = queues_[victim].PopBack(); return true; } } @@ -278,7 +296,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { // If we are shutting down and all worker threads blocked without work, // that's we are done. blocked_++; - if (done_ && blocked_ == num_threads_) { + // TODO is blocked_ required to be unsigned? + if (done_ && blocked_ == static_cast<unsigned>(num_threads_)) { ec_.CancelWait(waiter); // Almost done, but need to re-check queues. // Consider that all queues are empty and all worker threads are preempted @@ -311,7 +330,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { unsigned inc = coprimes_[r % coprimes_.size()]; unsigned victim = r % size; for (unsigned i = 0; i < size; i++) { - if (!queues_[victim]->Empty()) { + if (!queues_[victim].Empty()) { return victim; } victim += inc; @@ -322,10 +341,24 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { return -1; } - static EIGEN_STRONG_INLINE PerThread* GetPerThread() { + static EIGEN_STRONG_INLINE uint64_t GlobalThreadIdHash() { + return std::hash<std::thread::id>()(std::this_thread::get_id()); + } + + EIGEN_STRONG_INLINE PerThread* GetPerThread() { +#ifndef EIGEN_THREAD_LOCAL + static PerThread dummy; + auto it = per_thread_map_.find(GlobalThreadIdHash()); + if (it == per_thread_map_.end()) { + return &dummy; + } else { + return it->second.get(); + } +#else EIGEN_THREAD_LOCAL PerThread per_thread_; PerThread* pt = &per_thread_; return pt; +#endif } static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) { @@ -333,7 +366,8 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface { // Update the internal state *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; // Generate the random output (using the PCG-XSH-RS scheme) - return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61))); + return static_cast<unsigned>((current ^ (current >> 22)) >> + (22 + (current >> 61))); } }; diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h index cb3690a2e..05c739aa1 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h @@ -10,7 +10,6 @@ #ifndef EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ #define EIGEN_CXX11_THREADPOOL_RUNQUEUE_H_ - namespace Eigen { // RunQueue is a fixed-size, partially non-blocking deque or Work items. @@ -47,7 +46,7 @@ class RunQueue { array_[i].state.store(kEmpty, std::memory_order_relaxed); } - ~RunQueue() { eigen_assert(Size() == 0); } + ~RunQueue() { eigen_plain_assert(Size() == 0); } // PushFront inserts w at the beginning of the queue. // If queue is full returns w, otherwise returns default-constructed Work. @@ -131,9 +130,8 @@ class RunQueue { Elem* e = &array_[mid & kMask]; uint8_t s = e->state.load(std::memory_order_relaxed); if (n == 0) { - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) + if (s != kReady || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) continue; start = mid; } else { diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h index cfa221732..a41731c34 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h @@ -10,13 +10,45 @@ #ifndef EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H #define EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H -// Try to come up with a portable implementation of thread local variables -#if EIGEN_COMP_GNUC && EIGEN_GNUC_AT_MOST(4, 7) -#define EIGEN_THREAD_LOCAL static __thread -#elif EIGEN_COMP_CLANG -#define EIGEN_THREAD_LOCAL static __thread -#else +#if EIGEN_MAX_CPP_VER >= 11 && \ + ((EIGEN_COMP_GNUC && EIGEN_GNUC_AT_LEAST(4, 8)) || \ + __has_feature(cxx_thread_local)) #define EIGEN_THREAD_LOCAL static thread_local #endif +// Disable TLS for Apple and Android builds with older toolchains. +#if defined(__APPLE__) +// Included for TARGET_OS_IPHONE, __IPHONE_OS_VERSION_MIN_REQUIRED, +// __IPHONE_8_0. +#include <Availability.h> +#include <TargetConditionals.h> +#endif +// Checks whether C++11's `thread_local` storage duration specifier is +// supported. +#if defined(__apple_build_version__) && \ + ((__apple_build_version__ < 8000042) || \ + (TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0)) +// Notes: Xcode's clang did not support `thread_local` until version +// 8, and even then not for all iOS < 9.0. +#undef EIGEN_THREAD_LOCAL + +#elif defined(__ANDROID__) && EIGEN_COMP_CLANG +// There are platforms for which TLS should not be used even though the compiler +// makes it seem like it's supported (Android NDK < r12b for example). +// This is primarily because of linker problems and toolchain misconfiguration: +// TLS isn't supported until NDK r12b per +// https://developer.android.com/ndk/downloads/revision_history.html +// Since NDK r16, `__NDK_MAJOR__` and `__NDK_MINOR__` are defined in +// <android/ndk-version.h>. For NDK < r16, users should define these macros, +// e.g. `-D__NDK_MAJOR__=11 -D__NKD_MINOR__=0` for NDK r11. +#if __has_include(<android/ndk-version.h>) +#include <android/ndk-version.h> +#endif // __has_include(<android/ndk-version.h>) +#if defined(__ANDROID__) && defined(__clang__) && defined(__NDK_MAJOR__) && \ + defined(__NDK_MINOR__) && \ + ((__NDK_MAJOR__ < 12) || ((__NDK_MAJOR__ == 12) && (__NDK_MINOR__ < 1))) +#undef EIGEN_THREAD_LOCAL +#endif +#endif // defined(__ANDROID__) && defined(__clang__) + #endif // EIGEN_CXX11_THREADPOOL_THREAD_LOCAL_H diff --git a/unsupported/Eigen/CXX11/src/util/EmulateArray.h b/unsupported/Eigen/CXX11/src/util/EmulateArray.h index d91662d96..d5c000e08 100644 --- a/unsupported/Eigen/CXX11/src/util/EmulateArray.h +++ b/unsupported/Eigen/CXX11/src/util/EmulateArray.h @@ -26,6 +26,11 @@ template <typename T, size_t n> class array { EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE T& at(size_t index) { eigen_assert(index < size()); return values[index]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const T& at(size_t index) const { eigen_assert(index < size()); return values[index]; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T& front() { return values[0]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& front() const { return values[0]; } @@ -202,16 +207,16 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& array_get(const array<T,N>& a) { } template<class T, std::size_t N> struct array_size<array<T,N> > { - static const size_t value = N; + enum { value = N }; }; template<class T, std::size_t N> struct array_size<array<T,N>& > { - static const size_t value = N; + enum { value = N }; }; template<class T, std::size_t N> struct array_size<const array<T,N> > { - static const size_t value = N; + enum { value = N }; }; template<class T, std::size_t N> struct array_size<const array<T,N>& > { - static const size_t value = N; + enum { value = N }; }; } // end namespace internal diff --git a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h index 4bc3dd1ba..bc5b3632c 100644 --- a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +++ b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h @@ -35,7 +35,6 @@ class MaxSizeVector { explicit MaxSizeVector(size_t n) : reserve_(n), size_(0), data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { - for (size_t i = 0; i < n; ++i) { new (&data_[i]) T; } } // Construct a new MaxSizeVector, reserve and resize to n. @@ -44,35 +43,55 @@ class MaxSizeVector { MaxSizeVector(size_t n, const T& init) : reserve_(n), size_(n), data_(static_cast<T*>(internal::aligned_malloc(n * sizeof(T)))) { - for (size_t i = 0; i < n; ++i) { new (&data_[i]) T(init); } + size_t i = 0; + EIGEN_TRY + { + for(; i < size_; ++i) { new (&data_[i]) T(init); } + } + EIGEN_CATCH(...) + { + // Construction failed, destruct in reverse order: + for(; (i+1) > 0; --i) { data_[i-1].~T(); } + internal::aligned_free(data_); + EIGEN_THROW; + } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~MaxSizeVector() { - for (size_t i = 0; i < size_; ++i) { - data_[i].~T(); + for (size_t i = size_; i > 0; --i) { + data_[i-1].~T(); } internal::aligned_free(data_); } void resize(size_t n) { eigen_assert(n <= reserve_); - for (size_t i = size_; i < n; ++i) { - new (&data_[i]) T; + for (; size_ < n; ++size_) { + new (&data_[size_]) T; } - for (size_t i = n; i < size_; ++i) { - data_[i].~T(); + for (; size_ > n; --size_) { + data_[size_-1].~T(); } - size_ = n; + eigen_assert(size_ == n); } // Append new elements (up to reserved size). EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void push_back(const T& t) { eigen_assert(size_ < reserve_); - data_[size_++] = t; + new (&data_[size_++]) T(t); } + // For C++03 compatibility this only takes one argument + template<class X> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void emplace_back(const X& x) { + eigen_assert(size_ < reserve_); + new (&data_[size_++]) T(x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T& operator[] (size_t i) const { eigen_assert(i < size_); @@ -99,11 +118,8 @@ class MaxSizeVector { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pop_back() { - // NOTE: This does not destroy the value at the end the way - // std::vector's version of pop_back() does. That happens when - // the Vector is destroyed. eigen_assert(size_ > 0); - size_--; + data_[--size_].~T(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 2c45b3999..d8cf3e642 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -289,6 +289,7 @@ class FFT void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, Index nfft=-1) { typedef typename ComplexDerived::Scalar src_type; + typedef typename ComplexDerived::RealScalar real_type; typedef typename OutputDerived::Scalar dst_type; const bool realfft= (NumTraits<dst_type>::IsComplex == 0); EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived) @@ -329,9 +330,9 @@ class FFT tmp.head(nhead) = src.head(nhead); tmp.tail(ntail) = src.tail(ntail); if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it - tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5); + tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*real_type(.5); }else{ // expanding -- split the old Nyquist bin into two halves - tmp(nhead) = src(nhead) * src_type(.5); + tmp(nhead) = src(nhead) * real_type(.5); tmp(tmp.size()-nhead) = tmp(nhead); } } diff --git a/unsupported/Eigen/OpenGLSupport b/unsupported/Eigen/OpenGLSupport index 11d99567e..489fd8354 100644 --- a/unsupported/Eigen/OpenGLSupport +++ b/unsupported/Eigen/OpenGLSupport @@ -184,7 +184,7 @@ inline void glRotate(const Rotation2D<float>& rot) } inline void glRotate(const Rotation2D<double>& rot) { - glRotated(rot.angle()*180.0/EIGEN_PI, 0.0, 0.0, 1.0); + glRotated(rot.angle()*180.0/double(EIGEN_PI), 0.0, 0.0, 1.0); } template<typename Derived> void glRotate(const RotationBase<Derived,3>& rot) diff --git a/unsupported/Eigen/src/BVH/KdBVH.h b/unsupported/Eigen/src/BVH/KdBVH.h index 13f792cd0..2d5b76ad0 100644 --- a/unsupported/Eigen/src/BVH/KdBVH.h +++ b/unsupported/Eigen/src/BVH/KdBVH.h @@ -35,6 +35,7 @@ struct get_boxes_helper { { outBoxes.insert(outBoxes.end(), boxBegin, boxEnd); eigen_assert(outBoxes.size() == objects.size()); + EIGEN_ONLY_USED_FOR_DEBUG(objects); } }; diff --git a/unsupported/Eigen/src/Splines/Spline.h b/unsupported/Eigen/src/Splines/Spline.h index 627f6e482..c1cf5b7e4 100644 --- a/unsupported/Eigen/src/Splines/Spline.h +++ b/unsupported/Eigen/src/Splines/Spline.h @@ -249,15 +249,13 @@ namespace Eigen DenseIndex degree, const typename Spline<_Scalar, _Dim, _Degree>::KnotVectorType& knots) { - typedef typename Spline<_Scalar, _Dim, _Degree>::BasisVectorType BasisVectorType; - const DenseIndex p = degree; const DenseIndex i = Spline::Span(u, degree, knots); const KnotVectorType& U = knots; BasisVectorType left(p+1); left(0) = Scalar(0); - BasisVectorType right(p+1); right(0) = Scalar(0); + BasisVectorType right(p+1); right(0) = Scalar(0); VectorBlock<BasisVectorType,Degree>(left,1,p) = u - VectorBlock<const KnotVectorType,Degree>(U,i+1-p,p).reverse(); VectorBlock<BasisVectorType,Degree>(right,1,p) = VectorBlock<const KnotVectorType,Degree>(U,i+1,p) - u; @@ -380,9 +378,6 @@ namespace Eigen typedef Spline<_Scalar, _Dim, _Degree> SplineType; enum { Order = SplineTraits<SplineType>::OrderAtCompileTime }; - typedef typename SplineTraits<SplineType>::Scalar Scalar; - typedef typename SplineTraits<SplineType>::BasisVectorType BasisVectorType; - const DenseIndex span = SplineType::Span(u, p, U); const DenseIndex n = (std::min)(p, order); diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp index 572fc08a3..67533e364 100644 --- a/unsupported/test/EulerAngles.cpp +++ b/unsupported/test/EulerAngles.cpp @@ -197,6 +197,7 @@ template<typename Scalar> void check_singular_cases(const Scalar& singularBeta) template<typename Scalar> void eulerangles_manual() { typedef Matrix<Scalar,3,1> Vector3; + typedef Matrix<Scalar,Dynamic,1> VectorX; const Vector3 Zero = Vector3::Zero(); const Scalar PI = Scalar(EIGEN_PI); @@ -213,13 +214,13 @@ template<typename Scalar> void eulerangles_manual() check_singular_cases(-PI); // non-singular cases - VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); - VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI); - VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); + VectorX alpha = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); + VectorX beta = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI); + VectorX gamma = VectorX::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); for (int i = 0; i < alpha.size(); ++i) { for (int j = 0; j < beta.size(); ++j) { for (int k = 0; k < gamma.size(); ++k) { - check_all_var(Vector3d(alpha(i), beta(j), gamma(k))); + check_all_var(Vector3(alpha(i), beta(j), gamma(k))); } } } diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp index 746f62511..eec282ba7 100644 --- a/unsupported/test/cxx11_tensor_block_access.cpp +++ b/unsupported/test/cxx11_tensor_block_access.cpp @@ -10,6 +10,7 @@ #include "main.h" +#include <algorithm> #include <set> #include <Eigen/CXX11/Tensor> @@ -19,22 +20,21 @@ using Eigen::Index; using Eigen::RowMajor; using Eigen::ColMajor; -using internal::TensorBlockShapeType; template<typename T> static const T& choose(int layout, const T& col, const T& row) { return layout == ColMajor ? col : row; } -static const TensorBlockShapeType RandomShape() { +static internal::TensorBlockShapeType RandomShape() { return internal::random<bool>() - ? internal::TensorBlockShapeType::kUniformAllDims - : internal::TensorBlockShapeType::kSkewedInnerDims; + ? internal::kUniformAllDims + : internal::kSkewedInnerDims; } template <int NumDims> -static std::size_t RandomTargetSize(const DSizes<Index, NumDims>& dims) { - return internal::random<int>(1, dims.TotalSize()); +static Index RandomTargetSize(const DSizes<Index, NumDims>& dims) { + return internal::random<Index>(1, dims.TotalSize()); } template <int NumDims> @@ -44,12 +44,12 @@ static DSizes<Index, NumDims> RandomDims() { dims[i] = internal::random<int>(1, 20); } return DSizes<Index, NumDims>(dims); -}; +} /** Dummy data type to test TensorBlock copy ops. */ struct Data { - Data() : Data(0) {} - explicit Data(int v) { value = v; } + Data() : value(0) {} + explicit Data(int v) : value(v) { } int value; }; @@ -91,21 +91,19 @@ static void Debug(DSizes<Index, NumDims> dims) { template <int Layout> static void test_block_mapper_sanity() { - using T = int; - using TensorBlock = internal::TensorBlock<T, Index, 2, Layout>; - using TensorBlockMapper = internal::TensorBlockMapper<T, Index, 2, Layout>; + typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper; DSizes<Index, 2> tensor_dims(100, 100); // Test uniform blocks. TensorBlockMapper uniform_block_mapper( - tensor_dims, internal::TensorBlockShapeType::kUniformAllDims, 100); + tensor_dims, internal::kUniformAllDims, 100); VERIFY_IS_EQUAL(uniform_block_mapper.total_block_count(), 100); VERIFY_IS_EQUAL(uniform_block_mapper.block_dims_total_size(), 100); // 10x10 blocks - auto uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, nullptr); + typename TensorBlockMapper::Block uniform_b0 = uniform_block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(0), 10); VERIFY_IS_EQUAL(uniform_b0.block_sizes().at(1), 10); // Depending on a layout we stride by cols rows. @@ -117,13 +115,13 @@ static void test_block_mapper_sanity() // Test skewed to inner dims blocks. TensorBlockMapper skewed_block_mapper( - tensor_dims, internal::TensorBlockShapeType::kSkewedInnerDims, 100); + tensor_dims, internal::kSkewedInnerDims, 100); VERIFY_IS_EQUAL(skewed_block_mapper.total_block_count(), 100); VERIFY_IS_EQUAL(skewed_block_mapper.block_dims_total_size(), 100); // 1x100 (100x1) rows/cols depending on a tensor layout. - auto skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, nullptr); + typename TensorBlockMapper::Block skewed_b0 = skewed_block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(0), choose(Layout, 100, 1)); VERIFY_IS_EQUAL(skewed_b0.block_sizes().at(1), choose(Layout, 1, 100)); // Depending on a layout we stride by cols rows. @@ -140,12 +138,13 @@ template <typename T, int Layout, int NumDims> static void UpdateCoeffSet( const internal::TensorBlock<T, Index, NumDims, Layout>& block, Index first_coeff_index, int dim_index, std::set<Index>* visited_coeffs) { - const DSizes<Index, NumDims> block_sizes = block.block_sizes(); - const DSizes<Index, NumDims> tensor_strides = block.tensor_strides(); + const DSizes<Index, NumDims>& block_sizes = block.block_sizes(); + const DSizes<Index, NumDims>& tensor_strides = block.tensor_strides(); for (int i = 0; i < block_sizes[dim_index]; ++i) { if (tensor_strides[dim_index] == 1) { - auto inserted = visited_coeffs->insert(first_coeff_index + i); + typedef std::pair<std::set<Index>::iterator, bool> ReturnType; + ReturnType inserted = visited_coeffs->insert(first_coeff_index + i); VERIFY_IS_EQUAL(inserted.second, true); } else { int next_dim_index = dim_index + choose(Layout, -1, 1); @@ -158,9 +157,8 @@ static void UpdateCoeffSet( template <typename T, int NumDims, int Layout> static void test_block_mapper_maps_every_element() { - using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>; - using TensorBlockMapper = - internal::TensorBlockMapper<T, Index, NumDims, Layout>; + typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock; + typedef internal::TensorBlockMapper<T, Index, NumDims, Layout> TensorBlockMapper; DSizes<Index, NumDims> dims = RandomDims<NumDims>(); @@ -171,7 +169,7 @@ static void test_block_mapper_maps_every_element() { TensorBlockMapper block_mapper(dims, RandomShape(), RandomTargetSize(dims)); for (int i = 0; i < block_mapper.total_block_count(); ++i) { - TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(i, NULL); UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(), choose(Layout, NumDims - 1, 0), &coeff_set); @@ -180,16 +178,15 @@ static void test_block_mapper_maps_every_element() { // Verify that every coefficient in the original Tensor is accessible through // TensorBlock only once. Index total_coeffs = dims.TotalSize(); - VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs); + VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs); VERIFY_IS_EQUAL(*coeff_set.begin(), 0); VERIFY_IS_EQUAL(*coeff_set.rbegin(), total_coeffs - 1); } template <typename T, int NumDims, int Layout> static void test_slice_block_mapper_maps_every_element() { - using TensorBlock = internal::TensorBlock<T, Index, NumDims, Layout>; - using TensorSliceBlockMapper = - internal::TensorSliceBlockMapper<T, Index, NumDims, Layout>; + typedef internal::TensorBlock<T, Index, NumDims, Layout> TensorBlock; + typedef internal::TensorSliceBlockMapper<T, Index, NumDims, Layout> TensorSliceBlockMapper; DSizes<Index, NumDims> tensor_dims = RandomDims<NumDims>(); DSizes<Index, NumDims> tensor_slice_offsets = RandomDims<NumDims>(); @@ -206,12 +203,12 @@ static void test_slice_block_mapper_maps_every_element() { // Keep track of elements indices available via block access. std::set<Index> coeff_set; - auto total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize()); + int total_coeffs = static_cast<int>(tensor_slice_extents.TotalSize()); // Pick a random dimension sizes for the tensor blocks. DSizes<Index, NumDims> block_sizes; for (int i = 0; i < NumDims; ++i) { - block_sizes[i] = internal::random<int>(1, tensor_slice_extents[i]); + block_sizes[i] = internal::random<Index>(1, tensor_slice_extents[i]); } TensorSliceBlockMapper block_mapper(tensor_dims, tensor_slice_offsets, @@ -219,13 +216,13 @@ static void test_slice_block_mapper_maps_every_element() { DimensionList<Index, NumDims>()); for (int i = 0; i < block_mapper.total_block_count(); ++i) { - TensorBlock block = block_mapper.GetBlockForIndex(i, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(i, NULL); UpdateCoeffSet<T, Layout, NumDims>(block, block.first_coeff_index(), choose(Layout, NumDims - 1, 0), &coeff_set); } - VERIFY_IS_EQUAL(coeff_set.size(), total_coeffs); + VERIFY_IS_EQUAL(Index(coeff_set.size()), total_coeffs); } template <typename T, int NumDims, int Layout> @@ -240,7 +237,7 @@ static void test_block_io_copy_data_from_source_to_target() { TensorBlockWriter; DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>(); - const auto input_tensor_size = input_tensor_dims.TotalSize(); + const Index input_tensor_size = input_tensor_dims.TotalSize(); T* input_data = GenerateRandomData<T>(input_tensor_size); T* output_data = new T[input_tensor_size]; @@ -265,14 +262,14 @@ static void test_block_io_copy_data_from_source_to_target() { } template <int Layout, int NumDims> -static int GetInputIndex(Index output_index, +static Index GetInputIndex(Index output_index, const array<Index, NumDims>& output_to_input_dim_map, const array<Index, NumDims>& input_strides, const array<Index, NumDims>& output_strides) { int input_index = 0; if (Layout == ColMajor) { for (int i = NumDims - 1; i > 0; --i) { - const int idx = output_index / output_strides[i]; + const Index idx = output_index / output_strides[i]; input_index += idx * input_strides[output_to_input_dim_map[i]]; output_index -= idx * output_strides[i]; } @@ -280,7 +277,7 @@ static int GetInputIndex(Index output_index, output_index * input_strides[output_to_input_dim_map[0]]; } else { for (int i = 0; i < NumDims - 1; ++i) { - const int idx = output_index / output_strides[i]; + const Index idx = output_index / output_strides[i]; input_index += idx * input_strides[output_to_input_dim_map[i]]; output_index -= idx * output_strides[i]; } @@ -319,7 +316,7 @@ static void test_block_io_copy_using_reordered_dimensions() { TensorBlockWriter; DSizes<Index, NumDims> input_tensor_dims = RandomDims<NumDims>(); - const auto input_tensor_size = input_tensor_dims.TotalSize(); + const Index input_tensor_size = input_tensor_dims.TotalSize(); // Create a random input tensor. T* input_data = GenerateRandomData<T>(input_tensor_size); @@ -327,7 +324,7 @@ static void test_block_io_copy_using_reordered_dimensions() { // Create a random dimension re-ordering/shuffle. std::vector<Index> shuffle; for (int i = 0; i < NumDims; ++i) shuffle.push_back(i); - std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937()); + std::random_shuffle(shuffle.begin(), shuffle.end()); DSizes<Index, NumDims> output_tensor_dims; array<Index, NumDims> input_to_output_dim_map; @@ -342,8 +339,8 @@ static void test_block_io_copy_using_reordered_dimensions() { TensorBlockMapper block_mapper(output_tensor_dims, RandomShape(), RandomTargetSize(input_tensor_dims)); - auto* block_data = new T[block_mapper.block_dims_total_size()]; - auto* output_data = new T[input_tensor_size]; + T* block_data = new T[block_mapper.block_dims_total_size()]; + T* output_data = new T[input_tensor_size]; array<Index, NumDims> input_tensor_strides = ComputeStrides<Layout, NumDims>(input_tensor_dims); @@ -370,6 +367,40 @@ static void test_block_io_copy_using_reordered_dimensions() { delete[] output_data; } +template<typename Scalar, typename StorageIndex, int Dim> +class EqualityChecker +{ + const Scalar* input_data; + const DSizes<StorageIndex, Dim> &input_dims, &input_strides, &output_dims, &output_strides; + void check_recursive(const Scalar* input, const Scalar* output, int depth=0) const + { + if(depth==Dim) + { + VERIFY_IS_EQUAL(*input, *output); + return; + } + + for(int i=0; i<output_dims[depth]; ++i) + { + check_recursive(input + i % input_dims[depth] * input_strides[depth], output + i*output_strides[depth], depth+1); + } + } +public: + EqualityChecker(const Scalar* input_data_, + const DSizes<StorageIndex, Dim> &input_dims_, const DSizes<StorageIndex, Dim> &input_strides_, + const DSizes<StorageIndex, Dim> &output_dims_, const DSizes<StorageIndex, Dim> &output_strides_) + : input_data(input_data_) + , input_dims(input_dims_), input_strides(input_strides_) + , output_dims(output_dims_), output_strides(output_strides_) + {} + + void operator()(const Scalar* output_data) const + { + check_recursive(input_data, output_data); + } +}; + + template <int Layout> static void test_block_io_zero_stride() { @@ -385,8 +416,8 @@ static void test_block_io_zero_stride() input_tensor_dims[0] = 1; input_tensor_dims[2] = 1; input_tensor_dims[4] = 1; - const auto input_tensor_size = input_tensor_dims.TotalSize(); - auto* input_data = GenerateRandomData<float>(input_tensor_size); + const Index input_tensor_size = input_tensor_dims.TotalSize(); + float* input_data = GenerateRandomData<float>(input_tensor_size); DSizes<Index, 5> output_tensor_dims = rnd_dims; @@ -401,33 +432,10 @@ static void test_block_io_zero_stride() input_tensor_strides_with_zeros[4] = 0; // Verify that data was correctly read/written from/into the block. - const auto verify_is_equal = [&](const float* output_data) { - for (int i = 0; i < output_tensor_dims[0]; ++i) { - for (int j = 0; j < output_tensor_dims[1]; ++j) { - for (int k = 0; k < output_tensor_dims[2]; ++k) { - for (int l = 0; l < output_tensor_dims[3]; ++l) { - for (int m = 0; m < output_tensor_dims[4]; ++m) { - const Index output_offset = - i * output_tensor_strides[0] + j * output_tensor_strides[1] + - k * output_tensor_strides[2] + l * output_tensor_strides[3] + - m * output_tensor_strides[4]; - const Index input_offset = - i % input_tensor_dims[0] * input_tensor_strides[0] + - j % input_tensor_dims[1] * input_tensor_strides[1] + - k % input_tensor_dims[2] * input_tensor_strides[2] + - l % input_tensor_dims[3] * input_tensor_strides[3] + - m % input_tensor_dims[4] * input_tensor_strides[4]; - VERIFY_IS_EQUAL(output_data[output_offset], - input_data[input_offset]); - } - } - } - } - } - }; + const EqualityChecker<float, Index, 5> verify_is_equal(input_data, input_tensor_dims, input_tensor_strides, output_tensor_dims, output_tensor_strides); { - auto* output_data = new float[output_tensor_dims.TotalSize()]; + float* output_data = new float[output_tensor_dims.TotalSize()]; TensorBlock read_block(0, output_tensor_dims, output_tensor_strides, input_tensor_strides_with_zeros, output_data); TensorBlockReader::Run(&read_block, input_data); @@ -436,7 +444,7 @@ static void test_block_io_zero_stride() } { - auto* output_data = new float[output_tensor_dims.TotalSize()]; + float* output_data = new float[output_tensor_dims.TotalSize()]; TensorBlock write_block(0, output_tensor_dims, input_tensor_strides_with_zeros, output_tensor_strides, input_data); @@ -459,14 +467,14 @@ static void test_block_io_squeeze_ones() { // Total size > 1. { DSizes<Index, 5> block_sizes(1, 2, 1, 2, 1); - const auto total_size = block_sizes.TotalSize(); + const Index total_size = block_sizes.TotalSize(); // Create a random input tensor. - auto* input_data = GenerateRandomData<float>(total_size); + float* input_data = GenerateRandomData<float>(total_size); DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes)); { - auto* output_data = new float[block_sizes.TotalSize()]; + float* output_data = new float[block_sizes.TotalSize()]; TensorBlock read_block(0, block_sizes, strides, strides, output_data); TensorBlockReader::Run(&read_block, input_data); for (int i = 0; i < total_size; ++i) { @@ -476,7 +484,7 @@ static void test_block_io_squeeze_ones() { } { - auto* output_data = new float[block_sizes.TotalSize()]; + float* output_data = new float[block_sizes.TotalSize()]; TensorBlock write_block(0, block_sizes, strides, strides, input_data); TensorBlockWriter::Run(write_block, output_data); for (int i = 0; i < total_size; ++i) { @@ -489,14 +497,14 @@ static void test_block_io_squeeze_ones() { // Total size == 1. { DSizes<Index, 5> block_sizes(1, 1, 1, 1, 1); - const auto total_size = block_sizes.TotalSize(); + const Index total_size = block_sizes.TotalSize(); // Create a random input tensor. - auto* input_data = GenerateRandomData<float>(total_size); + float* input_data = GenerateRandomData<float>(total_size); DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes)); { - auto* output_data = new float[block_sizes.TotalSize()]; + float* output_data = new float[block_sizes.TotalSize()]; TensorBlock read_block(0, block_sizes, strides, strides, output_data); TensorBlockReader::Run(&read_block, input_data); for (int i = 0; i < total_size; ++i) { @@ -506,7 +514,7 @@ static void test_block_io_squeeze_ones() { } { - auto* output_data = new float[block_sizes.TotalSize()]; + float* output_data = new float[block_sizes.TotalSize()]; TensorBlock write_block(0, block_sizes, strides, strides, input_data); TensorBlockWriter::Run(write_block, output_data); for (int i = 0; i < total_size; ++i) { @@ -635,7 +643,7 @@ static void test_block_cwise_binary_io_basic() { DSizes<Index, NumDims> block_sizes = RandomDims<NumDims>(); DSizes<Index, NumDims> strides(ComputeStrides<Layout, NumDims>(block_sizes)); - const auto total_size = block_sizes.TotalSize(); + const Index total_size = block_sizes.TotalSize(); // Create a random input tensors. T* left_data = GenerateRandomData<T>(total_size); @@ -664,13 +672,13 @@ static void test_block_cwise_binary_io_squeeze_ones() { DSizes<Index, 5> block_sizes(1, 2, 1, 3, 1); DSizes<Index, 5> strides(ComputeStrides<Layout, 5>(block_sizes)); - const auto total_size = block_sizes.TotalSize(); + const Index total_size = block_sizes.TotalSize(); // Create a random input tensors. - auto* left_data = GenerateRandomData<float>(total_size); - auto* right_data = GenerateRandomData<float>(total_size); + float* left_data = GenerateRandomData<float>(total_size); + float* right_data = GenerateRandomData<float>(total_size); - auto* output_data = new float[total_size]; + float* output_data = new float[total_size]; BinaryFunctor functor; TensorBlockCwiseBinaryIO::Run(functor, block_sizes, strides, output_data, strides, left_data, strides, right_data); @@ -711,14 +719,14 @@ static void test_block_cwise_binary_io_zero_strides() { right_strides[3] = 0; // Generate random data. - auto* left_data = GenerateRandomData<float>(left_sizes.TotalSize()); - auto* right_data = GenerateRandomData<float>(right_sizes.TotalSize()); + float* left_data = GenerateRandomData<float>(left_sizes.TotalSize()); + float* right_data = GenerateRandomData<float>(right_sizes.TotalSize()); DSizes<Index, 5> output_sizes = rnd_dims; DSizes<Index, 5> output_strides(ComputeStrides<Layout, 5>(output_sizes)); - const auto output_total_size = output_sizes.TotalSize(); - auto* output_data = new float[output_total_size]; + const Index output_total_size = output_sizes.TotalSize(); + float* output_data = new float[output_total_size]; BinaryFunctor functor; TensorBlockCwiseBinaryIO::Run(functor, output_sizes, output_strides, @@ -755,17 +763,16 @@ static void test_block_cwise_binary_io_zero_strides() { template <int Layout> static void test_uniform_block_shape() { - using T = int; - typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock; - typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper; + typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock; + typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper; { // Test shape 'UniformAllDims' with uniform 'max_coeff count'. DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 5 * 5 * 5 * 5 * 5; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 5 * 5 * 5 * 5 * 5; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); for (int i = 0; i < 5; ++i) { VERIFY_IS_EQUAL(5, block.block_sizes()[i]); } @@ -776,10 +783,10 @@ static void test_uniform_block_shape() // partially into first inner-most dimension. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 7 * 5 * 5 * 5 * 5; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 7 * 5 * 5 * 5 * 5; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[0]); for (int i = 1; i < 5; ++i) { VERIFY_IS_EQUAL(5, block.block_sizes()[i]); @@ -787,10 +794,10 @@ static void test_uniform_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 5 * 5 * 5 * 5 * 6; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 5 * 5 * 5 * 5 * 6; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(6, block.block_sizes()[4]); for (int i = 3; i >= 0; --i) { VERIFY_IS_EQUAL(5, block.block_sizes()[i]); @@ -802,10 +809,10 @@ static void test_uniform_block_shape() // fully into first inner-most dimension. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 11 * 5 * 5 * 5 * 5; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 11 * 5 * 5 * 5 * 5; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(11, block.block_sizes()[0]); for (int i = 1; i < 5; ++i) { VERIFY_IS_EQUAL(5, block.block_sizes()[i]); @@ -813,10 +820,10 @@ static void test_uniform_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 5 * 5 * 5 * 5 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 5 * 5 * 5 * 5 * 7; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); for (int i = 3; i >= 0; --i) { VERIFY_IS_EQUAL(5, block.block_sizes()[i]); @@ -828,10 +835,10 @@ static void test_uniform_block_shape() // fully into first few inner-most dimensions. if (Layout == ColMajor) { DSizes<Index, 5> dims(7, 5, 6, 17, 7); - const size_t max_coeff_count = 7 * 5 * 6 * 7 * 5; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 7 * 5 * 6 * 7 * 5; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]); @@ -840,10 +847,10 @@ static void test_uniform_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(7, 5, 6, 9, 7); - const size_t max_coeff_count = 5 * 5 * 5 * 6 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 5 * 5 * 5 * 6 * 7; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(6, block.block_sizes()[3]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]); @@ -855,10 +862,10 @@ static void test_uniform_block_shape() // Test shape 'UniformAllDims' with full allocation to all dims. if (Layout == ColMajor) { DSizes<Index, 5> dims(7, 5, 6, 17, 7); - const size_t max_coeff_count = 7 * 5 * 6 * 17 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 7 * 5 * 6 * 17 * 7; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[0]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]); @@ -867,10 +874,10 @@ static void test_uniform_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(7, 5, 6, 9, 7); - const size_t max_coeff_count = 7 * 5 * 6 * 9 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kUniformAllDims, + const Index max_coeff_count = 7 * 5 * 6 * 9 * 7; + TensorBlockMapper block_mapper(dims, internal::kUniformAllDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(9, block.block_sizes()[3]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]); @@ -883,17 +890,16 @@ static void test_uniform_block_shape() template <int Layout> static void test_skewed_inner_dim_block_shape() { - using T = int; - typedef internal::TensorBlock<T, Index, 5, Layout> TensorBlock; - typedef internal::TensorBlockMapper<T, Index, 5, Layout> TensorBlockMapper; + typedef internal::TensorBlock<int, Index, 5, Layout> TensorBlock; + typedef internal::TensorBlockMapper<int, Index, 5, Layout> TensorBlockMapper; // Test shape 'SkewedInnerDims' with partial allocation to inner-most dim. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 10 * 1 * 1 * 1 * 1; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 10 * 1 * 1 * 1 * 1; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(10, block.block_sizes()[0]); for (int i = 1; i < 5; ++i) { VERIFY_IS_EQUAL(1, block.block_sizes()[i]); @@ -901,10 +907,10 @@ static void test_skewed_inner_dim_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 1 * 1 * 1 * 1 * 6; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 1 * 1 * 1 * 1 * 6; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(6, block.block_sizes()[4]); for (int i = 3; i >= 0; --i) { VERIFY_IS_EQUAL(1, block.block_sizes()[i]); @@ -915,10 +921,10 @@ static void test_skewed_inner_dim_block_shape() // Test shape 'SkewedInnerDims' with full allocation to inner-most dim. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 11 * 1 * 1 * 1 * 1; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 11 * 1 * 1 * 1 * 1; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(11, block.block_sizes()[0]); for (int i = 1; i < 5; ++i) { VERIFY_IS_EQUAL(1, block.block_sizes()[i]); @@ -926,10 +932,10 @@ static void test_skewed_inner_dim_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 1 * 1 * 1 * 1 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 1 * 1 * 1 * 1 * 7; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); for (int i = 3; i >= 0; --i) { VERIFY_IS_EQUAL(1, block.block_sizes()[i]); @@ -941,10 +947,10 @@ static void test_skewed_inner_dim_block_shape() // and partial allocation to second inner-dim. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 11 * 3 * 1 * 1 * 1; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 11 * 3 * 1 * 1 * 1; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(3, block.block_sizes()[1]); for (int i = 2; i < 5; ++i) { @@ -953,10 +959,10 @@ static void test_skewed_inner_dim_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 1 * 1 * 1 * 15 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 1 * 1 * 1 * 15 * 7; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(15, block.block_sizes()[3]); for (int i = 2; i >= 0; --i) { @@ -969,10 +975,10 @@ static void test_skewed_inner_dim_block_shape() // and partial allocation to third inner-dim. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 11 * 5 * 5 * 1 * 1; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 11 * 5 * 5 * 1 * 1; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]); @@ -982,10 +988,10 @@ static void test_skewed_inner_dim_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 1 * 1 * 5 * 17 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 1 * 1 * 5 * 17 * 7; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(17, block.block_sizes()[3]); VERIFY_IS_EQUAL(5, block.block_sizes()[2]); @@ -998,10 +1004,10 @@ static void test_skewed_inner_dim_block_shape() // Test shape 'SkewedInnerDims' with full allocation to all dims. if (Layout == ColMajor) { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 11 * 5 * 6 * 17 * 7; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(11, block.block_sizes()[0]); VERIFY_IS_EQUAL(5, block.block_sizes()[1]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]); @@ -1010,10 +1016,10 @@ static void test_skewed_inner_dim_block_shape() VERIFY(block.block_sizes().TotalSize() <= max_coeff_count); } else { DSizes<Index, 5> dims(11, 5, 6, 17, 7); - const size_t max_coeff_count = 11 * 5 * 6 * 17 * 7; - TensorBlockMapper block_mapper(dims, TensorBlockShapeType::kSkewedInnerDims, + const Index max_coeff_count = 11 * 5 * 6 * 17 * 7; + TensorBlockMapper block_mapper(dims, internal::kSkewedInnerDims, max_coeff_count); - TensorBlock block = block_mapper.GetBlockForIndex(0, nullptr); + TensorBlock block = block_mapper.GetBlockForIndex(0, NULL); VERIFY_IS_EQUAL(7, block.block_sizes()[4]); VERIFY_IS_EQUAL(17, block.block_sizes()[3]); VERIFY_IS_EQUAL(6, block.block_sizes()[2]); @@ -1026,15 +1032,13 @@ static void test_skewed_inner_dim_block_shape() template <int Layout> static void test_empty_dims(const internal::TensorBlockShapeType block_shape) { - using T = int; - // Test blocking of tensors with zero dimensions: // - we must not crash on asserts and divisions by zero // - we must not return block with zero dimensions // (recipe for overflows/underflows, divisions by zero and NaNs later) // - total block count must be zero { - typedef internal::TensorBlockMapper<T, Index, 1, Layout> TensorBlockMapper; + typedef internal::TensorBlockMapper<int, Index, 1, Layout> TensorBlockMapper; DSizes<Index, 1> dims(0); for (int max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) { TensorBlockMapper block_mapper(dims, block_shape, max_coeff_count); @@ -1044,7 +1048,7 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape) } { - typedef internal::TensorBlockMapper<T, Index, 2, Layout> TensorBlockMapper; + typedef internal::TensorBlockMapper<int, Index, 2, Layout> TensorBlockMapper; for (int dim1 = 0; dim1 < 3; ++dim1) { for (int dim2 = 0; dim2 < 3; ++dim2) { DSizes<Index, 2> dims(dim1, dim2); @@ -1098,9 +1102,9 @@ EIGEN_DECLARE_TEST(cxx11_tensor_block_access) { TEST_LAYOUTS(test_block_cwise_binary_io_zero_strides); TEST_LAYOUTS(test_uniform_block_shape); TEST_LAYOUTS(test_skewed_inner_dim_block_shape); - TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kUniformAllDims); - TEST_LAYOUTS_WITH_ARG(test_empty_dims, TensorBlockShapeType::kSkewedInnerDims); + TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kUniformAllDims); + TEST_LAYOUTS_WITH_ARG(test_empty_dims, internal::kSkewedInnerDims); } #undef TEST_LAYOUTS -#undef TEST_LAYOUTS_WITH_ARG
\ No newline at end of file +#undef TEST_LAYOUTS_WITH_ARG diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp index f53515b4e..e223d9ffd 100644 --- a/unsupported/test/cxx11_tensor_concatenation.cpp +++ b/unsupported/test/cxx11_tensor_concatenation.cpp @@ -56,7 +56,7 @@ static void test_static_dimension_failure() // either the code should change to // Tensor<int, 2>::Dimensions{{2, 3}} // or Tensor<int, 2>::Dimensions{Tensor<int, 2>::Dimensions{{2, 3}}} - .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{2, 3}}), 0); + .concatenate(right.reshape(Tensor<int, 2>::Dimensions(2, 3)), 0); } template<int DataLayout> diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp index d4cfbd0da..4e5922440 100644 --- a/unsupported/test/cxx11_tensor_contraction.cpp +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -514,7 +514,7 @@ static void test_const_inputs() struct SqrtOutputKernel { template <typename Index, typename Scalar> EIGEN_ALWAYS_INLINE void operator()( - const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, + const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper, const TensorContractionParams&, Index, Index, Index num_rows, Index num_cols) const { for (int i = 0; i < num_rows; ++i) { @@ -553,7 +553,7 @@ static void test_large_contraction_with_output_kernel() { m_result = m_left * m_right; - for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + for (std::ptrdiff_t i = 0; i < t_result.dimensions().TotalSize(); i++) { VERIFY(&t_result.data()[i] != &m_result.data()[i]); VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); } diff --git a/unsupported/test/cxx11_tensor_index_list.cpp b/unsupported/test/cxx11_tensor_index_list.cpp index e81fa5e40..294677a4d 100644 --- a/unsupported/test/cxx11_tensor_index_list.cpp +++ b/unsupported/test/cxx11_tensor_index_list.cpp @@ -170,7 +170,6 @@ static void test_type2indexpair_list() typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b; typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c; - Dims0 d0; Dims2_a d2_a; Dims2_b d2_b; diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 5c3aae482..6d8e58214 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -255,7 +255,7 @@ void test_multithread_contraction_agrees_with_singlethread() { struct SqrtOutputKernel { template <typename Index, typename Scalar> EIGEN_ALWAYS_INLINE void operator()( - const OutputKernel::OutputMapper<Index, Scalar>& output_mapper, + const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper, const TensorContractionParams&, Index, Index, Index num_rows, Index num_cols) const { for (int i = 0; i < num_rows; ++i) { @@ -300,7 +300,7 @@ static void test_multithread_contraction_with_output_kernel() { m_result = m_left * m_right; - for (size_t i = 0; i < t_result.dimensions().TotalSize(); i++) { + for (Index i = 0; i < t_result.dimensions().TotalSize(); i++) { VERIFY(&t_result.data()[i] != &m_result.data()[i]); VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i])); } @@ -428,7 +428,7 @@ void test_threadpool_allocate(TestAllocator* allocator) void* ptr = device.allocate(512); device.deallocate(ptr); } - VERIFY(allocator != nullptr); + VERIFY(allocator != NULL); VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs); VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs); } @@ -460,7 +460,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool) CALL_SUBTEST_6(test_multithread_random()); TestAllocator test_allocator; - CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(nullptr)); + CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>(NULL)); CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>(&test_allocator)); CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator)); } diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp index 4f143b6de..b5b764c65 100644 --- a/unsupported/test/kronecker_product.cpp +++ b/unsupported/test/kronecker_product.cpp @@ -9,6 +9,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifdef EIGEN_TEST_PART_1 #include "sparse.h" @@ -95,7 +96,7 @@ EIGEN_DECLARE_TEST(kronecker_product) SM_a.insert(1,0) = DM_a.coeffRef(1,0) = -0.9076572187376921; SM_a.insert(1,1) = DM_a.coeffRef(1,1) = 0.6469156566545853; SM_a.insert(1,2) = DM_a.coeffRef(1,2) = -0.3658010398782789; - + MatrixXd DM_b(3,2); SparseMatrix<double> SM_b(3,2); SM_b.insert(0,0) = DM_b.coeffRef(0,0) = 0.9004440976767099; @@ -165,7 +166,7 @@ EIGEN_DECLARE_TEST(kronecker_product) SM_a.insert(0,3) = -0.2; SM_a.insert(2,4) = 0.3; SM_a.finalize(); - + SM_b.insert(0,0) = 0.4; SM_b.insert(2,1) = -0.5; SM_b.finalize(); @@ -183,7 +184,7 @@ EIGEN_DECLARE_TEST(kronecker_product) DM_b2.resize(4,8); DM_ab2 = kroneckerProduct(DM_a2,DM_b2); CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8)); - + for(int i = 0; i < g_repeat; i++) { double density = Eigen::internal::random<double>(0.01,0.5); @@ -196,35 +197,35 @@ EIGEN_DECLARE_TEST(kronecker_product) MatrixXf dA(ra,ca), dB(rb,cb), dC; initSparse(density, dA, sA); initSparse(density, dB, sB); - + sC = kroneckerProduct(sA,sB); dC = kroneckerProduct(dA,dB); VERIFY_IS_APPROX(MatrixXf(sC),dC); - + sC = kroneckerProduct(sA.transpose(),sB); dC = kroneckerProduct(dA.transpose(),dB); VERIFY_IS_APPROX(MatrixXf(sC),dC); - + sC = kroneckerProduct(sA.transpose(),sB.transpose()); dC = kroneckerProduct(dA.transpose(),dB.transpose()); VERIFY_IS_APPROX(MatrixXf(sC),dC); - + sC = kroneckerProduct(sA,sB.transpose()); dC = kroneckerProduct(dA,dB.transpose()); VERIFY_IS_APPROX(MatrixXf(sC),dC); - + sC2 = kroneckerProduct(sA,sB); dC = kroneckerProduct(dA,dB); VERIFY_IS_APPROX(MatrixXf(sC2),dC); - + sC2 = kroneckerProduct(dA,sB); dC = kroneckerProduct(dA,dB); VERIFY_IS_APPROX(MatrixXf(sC2),dC); - + sC2 = kroneckerProduct(sA,dB); dC = kroneckerProduct(dA,dB); VERIFY_IS_APPROX(MatrixXf(sC2),dC); - + sC2 = kroneckerProduct(2*sA,sB); dC = kroneckerProduct(2*dA,dB); VERIFY_IS_APPROX(MatrixXf(sC2),dC); @@ -236,7 +237,6 @@ EIGEN_DECLARE_TEST(kronecker_product) #ifdef EIGEN_TEST_PART_2 // simply check that for a dense kronecker product, sparse module is not needed - #include "main.h" #include <Eigen/KroneckerProduct> diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp index 93fb71430..2049b8ba0 100644 --- a/unsupported/test/matrix_function.cpp +++ b/unsupported/test/matrix_function.cpp @@ -23,9 +23,8 @@ inline bool test_isApprox_abs(const Type1& a, const Type2& b) // Returns a matrix with eigenvalues clustered around 0, 1 and 2. template<typename MatrixType> -MatrixType randomMatrixWithRealEivals(const typename MatrixType::Index size) +MatrixType randomMatrixWithRealEivals(const Index size) { - typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; MatrixType diag = MatrixType::Zero(size, size); @@ -42,16 +41,15 @@ template <typename MatrixType, int IsComplex = NumTraits<typename internal::trai struct randomMatrixWithImagEivals { // Returns a matrix with eigenvalues clustered around 0 and +/- i. - static MatrixType run(const typename MatrixType::Index size); + static MatrixType run(const Index size); }; // Partial specialization for real matrices template<typename MatrixType> struct randomMatrixWithImagEivals<MatrixType, 0> { - static MatrixType run(const typename MatrixType::Index size) + static MatrixType run(const Index size) { - typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; MatrixType diag = MatrixType::Zero(size, size); Index i = 0; @@ -77,9 +75,8 @@ struct randomMatrixWithImagEivals<MatrixType, 0> template<typename MatrixType> struct randomMatrixWithImagEivals<MatrixType, 1> { - static MatrixType run(const typename MatrixType::Index size) + static MatrixType run(const Index size) { - typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; const Scalar imagUnit(0, 1); @@ -171,7 +168,6 @@ void testMatrixType(const MatrixType& m) { // Matrices with clustered eigenvalue lead to different code paths // in MatrixFunction.h and are thus useful for testing. - typedef typename MatrixType::Index Index; const Index size = m.rows(); for (int i = 0; i < g_repeat; i++) { diff --git a/unsupported/test/openglsupport.cpp b/unsupported/test/openglsupport.cpp index 460830086..eadd7f985 100644 --- a/unsupported/test/openglsupport.cpp +++ b/unsupported/test/openglsupport.cpp @@ -318,10 +318,6 @@ EIGEN_DECLARE_TEST(openglsupport) GLint prg_id = createShader(vtx,frg); - typedef Vector2d Vector2d; - typedef Vector3d Vector3d; - typedef Vector4d Vector4d; - VERIFY_UNIFORM(dv,v2d, Vector2d); VERIFY_UNIFORM(dv,v3d, Vector3d); VERIFY_UNIFORM(dv,v4d, Vector4d); diff --git a/unsupported/test/polynomialsolver.cpp b/unsupported/test/polynomialsolver.cpp index 65efea0cb..50c74f797 100644 --- a/unsupported/test/polynomialsolver.cpp +++ b/unsupported/test/polynomialsolver.cpp @@ -30,7 +30,6 @@ struct increment_if_fixed_size template<int Deg, typename POLYNOMIAL, typename SOLVER> bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) { - typedef typename POLYNOMIAL::Index Index; typedef typename POLYNOMIAL::Scalar Scalar; typedef typename POLYNOMIAL::RealScalar RealScalar; |