diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-12-20 17:02:06 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-12-20 17:02:06 -0800 |
commit | 0f577d47445cd0e34e46e7d7ecdf2d51625d5060 (patch) | |
tree | 267a1e8c2f906edfed2699897cff0782ca64e97b | |
parent | 29186f766f7e36dd8dbe933e035f6bcccc8fe70d (diff) | |
parent | f2f9df8aa57d7a303eb113c251245e315f2ad2b7 (diff) |
Merged eigen/eigen into default
24 files changed, 491 insertions, 239 deletions
@@ -28,7 +28,11 @@ activity.png *.rej log patch +*.patch a a.* lapack/testing lapack/reference +.*project +.settings +Makefile diff --git a/Eigen/Core b/Eigen/Core index 444c1c8d7..16be82ac2 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -150,6 +150,10 @@ #define EIGEN_VECTORIZE_AVX2 #define EIGEN_VECTORIZE_AVX #define EIGEN_VECTORIZE_FMA + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 #ifdef __AVX512DQ__ #define EIGEN_VECTORIZE_AVX512DQ #endif @@ -360,6 +364,8 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX512/PacketMath.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX512/MathFunctions.h" #elif defined EIGEN_VECTORIZE_AVX // Use AVX for floats and doubles, SSE for integers diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index 571972023..a07efb1d5 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -32,7 +32,7 @@ template<> struct cholmod_configure_matrix<std::complex<double> > { } }; -// Other scalar types are not yet suppotred by Cholmod +// Other scalar types are not yet supported by Cholmod // template<> struct cholmod_configure_matrix<float> { // template<typename CholmodType> // static void run(CholmodType& mat) { @@ -124,6 +124,9 @@ cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Sca if(UpLo==Upper) res.stype = 1; if(UpLo==Lower) res.stype = -1; + // swap stype for rowmajor matrices (only works for real matrices) + EIGEN_STATIC_ASSERT((_Options & RowMajorBit) == 0 || NumTraits<_Scalar>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + if(_Options & RowMajorBit) res.stype *=-1; return res; } @@ -159,6 +162,44 @@ MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm) static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) ); } +namespace internal { + +// template specializations for int and long that call the correct cholmod method + +#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ + template<typename _StorageIndex> ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \ + template<> ret cm_ ## name<long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } + +#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ + template<typename _StorageIndex> ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \ + template<> ret cm_ ## name<long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } + +EIGEN_CHOLMOD_SPECIALIZE0(int, start) +EIGEN_CHOLMOD_SPECIALIZE0(int, finish) + +EIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L) +EIGEN_CHOLMOD_SPECIALIZE1(int, free_dense, cholmod_dense*, X) +EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A) + +EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) + +template<typename _StorageIndex> cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); } +template<> cholmod_dense* cm_solve<long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } + +template<typename _StorageIndex> cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); } +template<> cholmod_sparse* cm_spsolve<long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } + +template<typename _StorageIndex> +int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); } +template<> +int cm_factorize_p<long> (cholmod_sparse* A, double beta[2], long* fset, size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } + +#undef EIGEN_CHOLMOD_SPECIALIZE0 +#undef EIGEN_CHOLMOD_SPECIALIZE1 + +} // namespace internal + + enum CholmodMode { CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt }; @@ -195,7 +236,7 @@ class CholmodBase : public SparseSolverBase<Derived> { EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - cholmod_start(&m_cholmod); + internal::cm_start<StorageIndex>(m_cholmod); } explicit CholmodBase(const MatrixType& matrix) @@ -203,15 +244,15 @@ class CholmodBase : public SparseSolverBase<Derived> { EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - cholmod_start(&m_cholmod); + internal::cm_start<StorageIndex>(m_cholmod); compute(matrix); } ~CholmodBase() { if(m_cholmodFactor) - cholmod_free_factor(&m_cholmodFactor, &m_cholmod); - cholmod_finish(&m_cholmod); + internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod); + internal::cm_finish<StorageIndex>(m_cholmod); } inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); } @@ -219,7 +260,7 @@ class CholmodBase : public SparseSolverBase<Derived> /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was succesful, + * \returns \c Success if computation was successful, * \c NumericalIssue if the matrix.appears to be negative. */ ComputationInfo info() const @@ -246,11 +287,11 @@ class CholmodBase : public SparseSolverBase<Derived> { if(m_cholmodFactor) { - cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod); m_cholmodFactor = 0; } cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>()); - m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); + m_cholmodFactor = internal::cm_analyze<StorageIndex>(A, m_cholmod); this->m_isInitialized = true; this->m_info = Success; @@ -268,7 +309,7 @@ class CholmodBase : public SparseSolverBase<Derived> { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>()); - cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); + internal::cm_factorize_p<StorageIndex>(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod); // If the factorization failed, minor is the column at which it did. On success minor == n. this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); @@ -289,19 +330,20 @@ class CholmodBase : public SparseSolverBase<Derived> EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); - // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. + // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref. Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived()); cholmod_dense b_cd = viewAsCholmod(b_ref); - cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); + cholmod_dense* x_cd = internal::cm_solve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod); if(!x_cd) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols()); - cholmod_free_dense(&x_cd, &m_cholmod); + internal::cm_free_dense<StorageIndex>(x_cd, m_cholmod); } /** \internal */ @@ -316,15 +358,16 @@ class CholmodBase : public SparseSolverBase<Derived> // note: cs stands for Cholmod Sparse Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived()); cholmod_sparse b_cs = viewAsCholmod(b_ref); - cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); + cholmod_sparse* x_cs = internal::cm_spsolve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod); if(!x_cs) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's sparse solver) dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs); - cholmod_free_sparse(&x_cs, &m_cholmod); + internal::cm_free_sparse<StorageIndex>(x_cs, m_cholmod); } #endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/Eigen/src/Core/BooleanRedux.h b/Eigen/src/Core/BooleanRedux.h index 8409d8749..ed607d5d8 100644 --- a/Eigen/src/Core/BooleanRedux.h +++ b/Eigen/src/Core/BooleanRedux.h @@ -14,56 +14,54 @@ namespace Eigen { namespace internal { -template<typename Derived, int UnrollCount> +template<typename Derived, int UnrollCount, int Rows> struct all_unroller { - typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Traits::RowsAtCompileTime, - row = (UnrollCount-1) % Traits::RowsAtCompileTime + col = (UnrollCount-1) / Rows, + row = (UnrollCount-1) % Rows }; static inline bool run(const Derived &mat) { - return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col); + return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col); } }; -template<typename Derived> -struct all_unroller<Derived, 0> +template<typename Derived, int Rows> +struct all_unroller<Derived, 0, Rows> { static inline bool run(const Derived &/*mat*/) { return true; } }; -template<typename Derived> -struct all_unroller<Derived, Dynamic> +template<typename Derived, int Rows> +struct all_unroller<Derived, Dynamic, Rows> { static inline bool run(const Derived &) { return false; } }; -template<typename Derived, int UnrollCount> +template<typename Derived, int UnrollCount, int Rows> struct any_unroller { - typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Traits::RowsAtCompileTime, - row = (UnrollCount-1) % Traits::RowsAtCompileTime + col = (UnrollCount-1) / Rows, + row = (UnrollCount-1) % Rows }; static inline bool run(const Derived &mat) { - return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col); + return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col); } }; -template<typename Derived> -struct any_unroller<Derived, 0> +template<typename Derived, int Rows> +struct any_unroller<Derived, 0, Rows> { static inline bool run(const Derived & /*mat*/) { return false; } }; -template<typename Derived> -struct any_unroller<Derived, Dynamic> +template<typename Derived, int Rows> +struct any_unroller<Derived, Dynamic, Rows> { static inline bool run(const Derived &) { return false; } }; @@ -87,7 +85,7 @@ inline bool DenseBase<Derived>::all() const }; Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); + return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -111,7 +109,7 @@ inline bool DenseBase<Derived>::any() const }; Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); + return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 1d14af652..24fc7835b 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -106,7 +106,7 @@ struct evaluator<const T> // ---------- base class for all evaluators ---------- template<typename ExpressionType> -struct evaluator_base : public noncopyable +struct evaluator_base { // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; @@ -114,6 +114,14 @@ struct evaluator_base : public noncopyable enum { Alignment = 0 }; + // noncopyable: + // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) + // and make complex evaluator much larger than then should do. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} +private: + EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); + EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); }; // -------------------- Matrix and Array -------------------- @@ -123,6 +131,27 @@ struct evaluator_base : public noncopyable // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, // so no need for more sophisticated dispatching. +// this helper permits to completely eliminate m_outerStride if it is known at compiletime. +template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data { +public: + plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) + { + EIGEN_ONLY_USED_FOR_DEBUG(outerStride); + eigen_internal_assert(outerStride==OuterStride); + } + Index outerStride() const { return OuterStride; } + const Scalar *data; +}; + +template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> { +public: + plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} + Index outerStride() const { return m_outerStride; } + const Scalar *data; +protected: + Index m_outerStride; +}; + template<typename Derived> struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> @@ -141,18 +170,21 @@ struct evaluator<PlainObjectBase<Derived> > Flags = traits<Derived>::EvaluatorFlags, Alignment = traits<Derived>::Alignment }; - + enum { + // We do not need to know the outer stride for vectors + OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime + }; + EIGEN_DEVICE_FUNC evaluator() - : m_data(0), - m_outerStride(IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime) + : m_d(0,OuterStrideAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) - : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -161,30 +193,30 @@ struct evaluator<PlainObjectBase<Derived> > CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) - return m_data[row * m_outerStride.value() + col]; + return m_d.data[row * m_d.outerStride() + col]; else - return m_data[row + col * m_outerStride.value()]; + return m_d.data[row + col * m_d.outerStride()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_data[index]; + return m_d.data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) - return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; + return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col]; else - return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; + return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return const_cast<Scalar*>(m_data)[index]; + return const_cast<Scalar*>(m_d.data)[index]; } template<int LoadMode, typename PacketType> @@ -192,16 +224,16 @@ struct evaluator<PlainObjectBase<Derived> > PacketType packet(Index row, Index col) const { if (IsRowMajor) - return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col); + return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col); else - return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value()); + return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride()); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return ploadt<PacketType, LoadMode>(m_data + index); + return ploadt<PacketType, LoadMode>(m_d.data + index); } template<int StoreMode,typename PacketType> @@ -210,26 +242,22 @@ struct evaluator<PlainObjectBase<Derived> > { if (IsRowMajor) return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x); + (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x); else return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x); + (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x); } template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); + return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x); } protected: - const Scalar *m_data; - // We do not need to know the outer stride for vectors - variable_if_dynamic<Index, IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime> m_outerStride; + plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d; }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> @@ -527,9 +555,7 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit unary_evaluator(const XprType& op) - : m_functor(op.functor()), - m_argImpl(op.nestedExpression()) + explicit unary_evaluator(const XprType& op) : m_d(op) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -540,32 +566,43 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_functor(m_argImpl.coeff(row, col)); + return m_d.func()(m_d.argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_functor(m_argImpl.coeff(index)); + return m_d.func()(m_d.argImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); + return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); + return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index)); } protected: - const UnaryOp m_functor; - evaluator<ArgType> m_argImpl; + + // this helper permits to completely eliminate the functor if it is empty + class Data : private UnaryOp + { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); } + evaluator<ArgType> argImpl; + }; + + Data m_d; }; // -------------------- CwiseTernaryOp -------------------- @@ -609,11 +646,7 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased evaluator<Arg3>::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) - : m_functor(xpr.functor()), - m_arg1Impl(xpr.arg1()), - m_arg2Impl(xpr.arg2()), - m_arg3Impl(xpr.arg3()) + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -624,38 +657,47 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col), - m_arg2Impl.template packet<LoadMode,PacketType>(row, col), - m_arg3Impl.template packet<LoadMode,PacketType>(row, col)); + return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col), + m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col), + m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index), - m_arg2Impl.template packet<LoadMode,PacketType>(index), - m_arg3Impl.template packet<LoadMode,PacketType>(index)); + return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index), + m_d.arg2Impl.template packet<LoadMode,PacketType>(index), + m_d.arg3Impl.template packet<LoadMode,PacketType>(index)); } protected: - const TernaryOp m_functor; - evaluator<Arg1> m_arg1Impl; - evaluator<Arg2> m_arg2Impl; - evaluator<Arg3> m_arg3Impl; + // this helper permits to completely eliminate the functor if it is empty + struct Data : private TernaryOp + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TernaryOp& func() const { return static_cast<const TernaryOp&>(*this); } + evaluator<Arg1> arg1Impl; + evaluator<Arg2> arg2Impl; + evaluator<Arg3> arg3Impl; + }; + + Data m_d; }; // -------------------- CwiseBinaryOp -------------------- @@ -696,10 +738,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment) }; - EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) - : m_functor(xpr.functor()), - m_lhsImpl(xpr.lhs()), - m_rhsImpl(xpr.rhs()) + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_d(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -710,35 +749,45 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), - m_rhsImpl.template packet<LoadMode,PacketType>(row, col)); + return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col), + m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), - m_rhsImpl.template packet<LoadMode,PacketType>(index)); + return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index), + m_d.rhsImpl.template packet<LoadMode,PacketType>(index)); } protected: - const BinaryOp m_functor; - evaluator<Lhs> m_lhsImpl; - evaluator<Rhs> m_rhsImpl; + + // this helper permits to completely eliminate the functor if it is empty + struct Data : private BinaryOp + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const BinaryOp& func() const { return static_cast<const BinaryOp&>(*this); } + evaluator<Lhs> lhsImpl; + evaluator<Rhs> rhsImpl; + }; + + Data m_d; }; // -------------------- CwiseUnaryView -------------------- @@ -757,9 +806,7 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) - : m_unaryOp(op.functor()), - m_argImpl(op.nestedExpression()) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -771,30 +818,40 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_unaryOp(m_argImpl.coeff(row, col)); + return m_d.func()(m_d.argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_unaryOp(m_argImpl.coeff(index)); + return m_d.func()(m_d.argImpl.coeff(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_unaryOp(m_argImpl.coeffRef(row, col)); + return m_d.func()(m_d.argImpl.coeffRef(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_unaryOp(m_argImpl.coeffRef(index)); + return m_d.func()(m_d.argImpl.coeffRef(index)); } protected: - const UnaryOp m_unaryOp; - evaluator<ArgType> m_argImpl; + + // this helper permits to completely eliminate the functor if it is empty + struct Data : private UnaryOp + { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); } + evaluator<ArgType> argImpl; + }; + + Data m_d; }; // -------------------- Map -------------------- diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index 62d4180da..06484ab30 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -45,7 +45,7 @@ struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType> }; } -// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ?? + template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> > { @@ -60,10 +60,12 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView /** \brief The type of coefficients in this matrix */ typedef typename internal::traits<SelfAdjointView>::Scalar Scalar; typedef typename MatrixType::StorageIndex StorageIndex; + typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType; enum { Mode = internal::traits<SelfAdjointView>::Mode, - Flags = internal::traits<SelfAdjointView>::Flags + Flags = internal::traits<SelfAdjointView>::Flags, + TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0) }; typedef typename MatrixType::PlainObject PlainObject; @@ -187,6 +189,36 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2); } + typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; + /** \sa MatrixBase::conjugate() const */ + EIGEN_DEVICE_FUNC + inline const ConjugateReturnType conjugate() const + { return ConjugateReturnType(m_matrix.conjugate()); } + + typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType; + /** \sa MatrixBase::adjoint() const */ + EIGEN_DEVICE_FUNC + inline const AdjointReturnType adjoint() const + { return AdjointReturnType(m_matrix.adjoint()); } + + typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType; + /** \sa MatrixBase::transpose() */ + EIGEN_DEVICE_FUNC + inline TransposeReturnType transpose() + { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + typename MatrixType::TransposeReturnType tmp(m_matrix); + return TransposeReturnType(tmp); + } + + typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType; + /** \sa MatrixBase::transpose() const */ + EIGEN_DEVICE_FUNC + inline const ConstTransposeReturnType transpose() const + { + return ConstTransposeReturnType(m_matrix.transpose()); + } + /** \returns a const expression of the main diagonal of the matrix \c *this * * This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator. diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index f6500a16e..0580b80f8 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -461,53 +461,21 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) { // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) { - Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from); - // mimic an "inplace" permutation of the lower 128bits using a blend - lane0 = _mm256_blend_ps( - lane0, _mm256_castps128_ps256(_mm_permute_ps( - _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))), - 15); - // then we can perform a consistent permutation on the global register to get - // everything in shape: - lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2)); - - Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4)); - // mimic an "inplace" permutation of the lower 128bits using a blend - lane1 = _mm256_blend_ps( - lane1, _mm256_castps128_ps256(_mm_permute_ps( - _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))), - 15); - // then we can perform a consistent permutation on the global register to get - // everything in shape: - lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2)); - -#ifdef EIGEN_VECTORIZE_AVX512DQ - Packet16f res = _mm512_undefined_ps(); - return _mm512_insertf32x8(res, lane0, 0); - return _mm512_insertf32x8(res, lane1, 1); - return res; -#else - Packet16f res = _mm512_undefined_ps(); - res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0); - res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1); - res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2); - res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3); - return res; -#endif + __m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); + __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); + __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); + return pairs; } // Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, // a3} template <> EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) { - Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from); - lane0 = _mm256_permute_pd(lane0, 3 << 2); - - Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2)); - lane1 = _mm256_permute_pd(lane1, 3 << 2); - - Packet8d res = _mm512_undefined_pd(); - res = _mm512_insertf64x4(res, lane0, 0); - return _mm512_insertf64x4(res, lane1, 1); + __m512d x = _mm512_setzero_pd(); + x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0); + x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1); + x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2); + x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3); + return x; } // Loads 4 floats from memory a returns the packet @@ -525,11 +493,11 @@ EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) { // {a0, a0 a0, a0, a1, a1, a1, a1} template <> EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) { - Packet8d tmp = _mm512_undefined_pd(); - Packet2d tmp0 = _mm_load_pd1(from); - Packet2d tmp1 = _mm_load_pd1(from + 1); - Packet4d lane0 = _mm256_broadcastsd_pd(tmp0); - Packet4d lane1 = _mm256_broadcastsd_pd(tmp1); + __m128d tmp0 = _mm_load_pd1(from); + __m256d lane0 = _mm256_broadcastsd_pd(tmp0); + __m128d tmp1 = _mm_load_pd1(from + 1); + __m256d lane1 = _mm256_broadcastsd_pd(tmp1); + __m512d tmp = _mm512_undefined_pd(); tmp = _mm512_insertf64x4(tmp, lane0, 0); return _mm512_insertf64x4(tmp, lane1, 1); } diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 45213f791..59367ba29 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -15,14 +15,14 @@ namespace Eigen { namespace internal { -static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; #ifdef __VSX__ #if defined(_BIG_ENDIAN) -static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; -static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; #else -static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_ZERO_, 8);//{ 0x8000000000000000, 0x0000000000000000 }; -static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_ZERO_, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; #endif #endif diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 5511245dd..c5e4bede7 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -84,8 +84,10 @@ static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0); static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125); static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); +#ifdef __POWER8_VECTOR__ static Packet2l p2l_1023 = { 1023, 1023 }; static Packet2ul p2ul_52 = { 52, 52 }; +#endif #endif diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index cbfef3503..e7d4f4d8d 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -72,7 +72,7 @@ static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS16,-16); //{ -16, -16, -16, -16} static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1} -static Packet4f p4f_ZERO_ = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} +static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} #ifndef __VSX__ static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} #endif @@ -358,7 +358,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } -template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_ZERO); } +template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_madd(a,b, p4f_MZERO); } template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return a * b; } template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) @@ -373,7 +373,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const t = vec_nmsub(y_0, b, p4f_ONE); y_1 = vec_madd(y_0, t, y_0); - return vec_madd(a, y_1, p4f_ZERO); + return vec_madd(a, y_1, p4f_MZERO); #else return vec_div(a, b); #endif @@ -766,7 +766,7 @@ static Packet2l p2l_ONE = { 1, 1 }; static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO); static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO); -static Packet2d p2d_ZERO_ = { -0.0, -0.0 }; +static Packet2d p2d_MZERO = { -0.0, -0.0 }; #ifdef _BIG_ENDIAN static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(p2d_ZERO), reinterpret_cast<Packet4f>(p2d_ONE), 8)); @@ -904,7 +904,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; } -template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_ZERO); } +template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_madd(a,b,p2d_MZERO); } template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_div(a,b); } // for some weird raisons, it has to be overloaded for packet of integers diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 2a8f58d74..d392bf3ff 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -28,11 +28,13 @@ namespace internal { #define EIGEN_HAS_SINGLE_INSTRUCTION_CJMADD #endif -// FIXME NEON has 16 quad registers, but since the current register allocator -// is so bad, it is much better to reduce it to 8 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#if EIGEN_ARCH_ARM64 +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32 +#else #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 #endif +#endif typedef float32x2_t Packet2f; typedef float32x4_t Packet4f; diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h index 21f80d86b..4431f2fc4 100755 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,7 +4,6 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable - // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data @@ -20,7 +19,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 4ce427478..95960b448 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -501,10 +501,11 @@ // attribute to maximize inlining. This should only be used when really necessary: in particular, // it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times. // FIXME with the always_inline attribute, -// gcc 3.4.x reports the following compilation error: +// gcc 3.4.x and 4.1 reports the following compilation error: // Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const' // : function body not available -#if EIGEN_GNUC_AT_LEAST(4,0) +// See also bug 1367 +#if EIGEN_GNUC_AT_LEAST(4,2) #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline #else #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE @@ -627,6 +628,14 @@ namespace Eigen { #endif +#if EIGEN_COMP_MSVC + // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. + // This workaround is ugly, but it does the job. +# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond +#else +# define EIGEN_CONST_CONDITIONAL(cond) cond +#endif + //------------------------------------------------------------------------------------------ // Static and dynamic alignment control // diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index 066eae4f9..c902d8f0a 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -63,7 +63,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Default constructor initializing a null box. */ EIGEN_DEVICE_FUNC inline AlignedBox() - { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); } + { if (EIGEN_CONST_CONDITIONAL(AmbientDimAtCompileTime!=Dynamic)) setEmpty(); } /** Constructs a null box with \a _dim the dimension of the ambient space. */ EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h index 07f2659b2..05929b299 100644 --- a/Eigen/src/Geometry/Hyperplane.h +++ b/Eigen/src/Geometry/Hyperplane.h @@ -217,7 +217,10 @@ public: EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine) { if (traits==Affine) + { normal() = mat.inverse().transpose() * normal(); + m_coeffs /= normal().norm(); + } else if (traits==Isometry) normal() = mat * normal(); else diff --git a/Eigen/src/Geometry/ParametrizedLine.h b/Eigen/src/Geometry/ParametrizedLine.h index 1e985d8cd..3929ca87f 100644 --- a/Eigen/src/Geometry/ParametrizedLine.h +++ b/Eigen/src/Geometry/ParametrizedLine.h @@ -104,7 +104,44 @@ public: template <int OtherOptions> EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; - /** \returns \c *this with scalar type casted to \a NewScalarType + /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this. + * + * \param mat the Dim x Dim transformation matrix + * \param traits specifies whether the matrix \a mat represents an #Isometry + * or a more generic #Affine transformation. The default is #Affine. + */ + template<typename XprType> + EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine) + { + if (traits==Affine) + direction() = (mat * direction()).normalized(); + else if (traits==Isometry) + direction() = mat * direction(); + else + { + eigen_assert(0 && "invalid traits value in ParametrizedLine::transform()"); + } + origin() = mat * origin(); + return *this; + } + + /** Applies the transformation \a t to \c *this and returns a reference to \c *this. + * + * \param t the transformation of dimension Dim + * \param traits specifies whether the transformation \a t represents an #Isometry + * or a more generic #Affine transformation. The default is #Affine. + * Other kind of transformations are not supported. + */ + template<int TrOptions> + EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t, + TransformTraits traits = Affine) + { + transform(t.linear(), traits); + origin() += t.translation(); + return *this; + } + +/** \returns \c *this with scalar type casted to \a NewScalarType * * Note that if \a NewScalarType is equal to the current scalar type of \c *this * then this function smartly returns a const reference to \c *this. diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 3f31ee45d..2d36dfadf 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -335,7 +335,7 @@ public: OtherModeIsAffineCompact = OtherMode == int(AffineCompact) }; - if(ModeIsAffineCompact == OtherModeIsAffineCompact) + if(EIGEN_CONST_CONDITIONAL(ModeIsAffineCompact == OtherModeIsAffineCompact)) { // We need the block expression because the code is compiled for all // combinations of transformations and will trigger a compile time error @@ -343,7 +343,7 @@ public: m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0); makeAffine(); } - else if(OtherModeIsAffineCompact) + else if(EIGEN_CONST_CONDITIONAL(OtherModeIsAffineCompact)) { typedef typename Transform<Scalar,Dim,OtherMode,OtherOptions>::MatrixType OtherMatrixType; internal::transform_construct_from_matrix<OtherMatrixType,Mode,Options,Dim,HDim>::run(this, other.matrix()); @@ -481,7 +481,7 @@ public: TransformTimeDiagonalReturnType res; res.linear().noalias() = a*b.linear(); res.translation().noalias() = a*b.translation(); - if (Mode!=int(AffineCompact)) + if (EIGEN_CONST_CONDITIONAL(Mode!=int(AffineCompact))) res.matrix().row(Dim) = b.matrix().row(Dim); return res; } @@ -755,7 +755,7 @@ template<typename Scalar, int Dim, int Mode,int Options> Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other) { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (Mode == int(AffineCompact)) + if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -801,7 +801,7 @@ Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator { check_template_params(); EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (Mode == int(AffineCompact)) + if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -819,7 +819,7 @@ template<typename Scalar, int Dim, int Mode, int Options> QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (Mode == int(AffineCompact)) + if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(0,2), m_matrix.coeff(1,2)); @@ -912,7 +912,7 @@ EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - if(int(Mode)==int(Projective)) + if(EIGEN_CONST_CONDITIONAL(int(Mode)==int(Projective))) affine() += other * m_matrix.row(Dim); else translation() += other; diff --git a/test/cholmod_support.cpp b/test/cholmod_support.cpp index a7eda28f7..931207334 100644 --- a/test/cholmod_support.cpp +++ b/test/cholmod_support.cpp @@ -12,21 +12,21 @@ #include <Eigen/CholmodSupport> -template<typename T> void test_cholmod_T() +template<typename SparseType> void test_cholmod_ST() { - CholmodDecomposition<SparseMatrix<T>, Lower> g_chol_colmajor_lower; g_chol_colmajor_lower.setMode(CholmodSupernodalLLt); - CholmodDecomposition<SparseMatrix<T>, Upper> g_chol_colmajor_upper; g_chol_colmajor_upper.setMode(CholmodSupernodalLLt); - CholmodDecomposition<SparseMatrix<T>, Lower> g_llt_colmajor_lower; g_llt_colmajor_lower.setMode(CholmodSimplicialLLt); - CholmodDecomposition<SparseMatrix<T>, Upper> g_llt_colmajor_upper; g_llt_colmajor_upper.setMode(CholmodSimplicialLLt); - CholmodDecomposition<SparseMatrix<T>, Lower> g_ldlt_colmajor_lower; g_ldlt_colmajor_lower.setMode(CholmodLDLt); - CholmodDecomposition<SparseMatrix<T>, Upper> g_ldlt_colmajor_upper; g_ldlt_colmajor_upper.setMode(CholmodLDLt); + CholmodDecomposition<SparseType, Lower> g_chol_colmajor_lower; g_chol_colmajor_lower.setMode(CholmodSupernodalLLt); + CholmodDecomposition<SparseType, Upper> g_chol_colmajor_upper; g_chol_colmajor_upper.setMode(CholmodSupernodalLLt); + CholmodDecomposition<SparseType, Lower> g_llt_colmajor_lower; g_llt_colmajor_lower.setMode(CholmodSimplicialLLt); + CholmodDecomposition<SparseType, Upper> g_llt_colmajor_upper; g_llt_colmajor_upper.setMode(CholmodSimplicialLLt); + CholmodDecomposition<SparseType, Lower> g_ldlt_colmajor_lower; g_ldlt_colmajor_lower.setMode(CholmodLDLt); + CholmodDecomposition<SparseType, Upper> g_ldlt_colmajor_upper; g_ldlt_colmajor_upper.setMode(CholmodLDLt); - CholmodSupernodalLLT<SparseMatrix<T>, Lower> chol_colmajor_lower; - CholmodSupernodalLLT<SparseMatrix<T>, Upper> chol_colmajor_upper; - CholmodSimplicialLLT<SparseMatrix<T>, Lower> llt_colmajor_lower; - CholmodSimplicialLLT<SparseMatrix<T>, Upper> llt_colmajor_upper; - CholmodSimplicialLDLT<SparseMatrix<T>, Lower> ldlt_colmajor_lower; - CholmodSimplicialLDLT<SparseMatrix<T>, Upper> ldlt_colmajor_upper; + CholmodSupernodalLLT<SparseType, Lower> chol_colmajor_lower; + CholmodSupernodalLLT<SparseType, Upper> chol_colmajor_upper; + CholmodSimplicialLLT<SparseType, Lower> llt_colmajor_lower; + CholmodSimplicialLLT<SparseType, Upper> llt_colmajor_upper; + CholmodSimplicialLDLT<SparseType, Lower> ldlt_colmajor_lower; + CholmodSimplicialLDLT<SparseType, Upper> ldlt_colmajor_upper; check_sparse_spd_solving(g_chol_colmajor_lower); check_sparse_spd_solving(g_chol_colmajor_upper); @@ -50,8 +50,20 @@ template<typename T> void test_cholmod_T() check_sparse_spd_determinant(ldlt_colmajor_upper); } +template<typename T, int flags, typename IdxType> void test_cholmod_T() +{ + test_cholmod_ST<SparseMatrix<T, flags, IdxType> >(); +} + void test_cholmod_support() { - CALL_SUBTEST_1(test_cholmod_T<double>()); - CALL_SUBTEST_2(test_cholmod_T<std::complex<double> >()); + CALL_SUBTEST_11( (test_cholmod_T<double , ColMajor, int >()) ); + CALL_SUBTEST_12( (test_cholmod_T<double , ColMajor, long>()) ); + CALL_SUBTEST_13( (test_cholmod_T<double , RowMajor, int >()) ); + CALL_SUBTEST_14( (test_cholmod_T<double , RowMajor, long>()) ); + CALL_SUBTEST_21( (test_cholmod_T<std::complex<double>, ColMajor, int >()) ); + CALL_SUBTEST_22( (test_cholmod_T<std::complex<double>, ColMajor, long>()) ); + // TODO complex row-major matrices do not work at the moment: + // CALL_SUBTEST_23( (test_cholmod_T<std::complex<double>, RowMajor, int >()) ); + // CALL_SUBTEST_24( (test_cholmod_T<std::complex<double>, RowMajor, long>()) ); } diff --git a/test/geo_hyperplane.cpp b/test/geo_hyperplane.cpp index e77702bc7..27892850d 100644 --- a/test/geo_hyperplane.cpp +++ b/test/geo_hyperplane.cpp @@ -66,12 +66,15 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane) VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot,Isometry).absDistance(rot * p1), Scalar(1) ); pl2 = pl1; VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling).absDistance((rot*scaling) * p1), Scalar(1) ); + VERIFY_IS_APPROX( pl2.normal().norm(), RealScalar(1) ); pl2 = pl1; VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling*translation) .absDistance((rot*scaling*translation) * p1), Scalar(1) ); + VERIFY_IS_APPROX( pl2.normal().norm(), RealScalar(1) ); pl2 = pl1; VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*translation,Isometry) .absDistance((rot*translation) * p1), Scalar(1) ); + VERIFY_IS_APPROX( pl2.normal().norm(), RealScalar(1) ); } // casting diff --git a/test/geo_parametrizedline.cpp b/test/geo_parametrizedline.cpp index 9bf5f3c1d..29c1b105c 100644 --- a/test/geo_parametrizedline.cpp +++ b/test/geo_parametrizedline.cpp @@ -25,6 +25,8 @@ template<typename LineType> void parametrizedline(const LineType& _line) typedef typename NumTraits<Scalar>::Real RealScalar; typedef Matrix<Scalar, LineType::AmbientDimAtCompileTime, 1> VectorType; typedef Hyperplane<Scalar,LineType::AmbientDimAtCompileTime> HyperplaneType; + typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime, + HyperplaneType::AmbientDimAtCompileTime> MatrixType; VectorType p0 = VectorType::Random(dim); VectorType p1 = VectorType::Random(dim); @@ -59,6 +61,31 @@ template<typename LineType> void parametrizedline(const LineType& _line) VERIFY_IS_MUCH_SMALLER_THAN(hp.signedDistance(pi), RealScalar(1)); VERIFY_IS_MUCH_SMALLER_THAN(l0.distance(pi), RealScalar(1)); VERIFY_IS_APPROX(l0.intersectionPoint(hp), pi); + + // transform + if (!NumTraits<Scalar>::IsComplex) + { + MatrixType rot = MatrixType::Random(dim,dim).householderQr().householderQ(); + DiagonalMatrix<Scalar,LineType::AmbientDimAtCompileTime> scaling(VectorType::Random()); + Translation<Scalar,LineType::AmbientDimAtCompileTime> translation(VectorType::Random()); + + while(scaling.diagonal().cwiseAbs().minCoeff()<RealScalar(1e-4)) scaling.diagonal() = VectorType::Random(); + + LineType l1 = l0; + VectorType p3 = l0.pointAt(Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN( l1.transform(rot).distance(rot * p3), Scalar(1) ); + l1 = l0; + VERIFY_IS_MUCH_SMALLER_THAN( l1.transform(rot,Isometry).distance(rot * p3), Scalar(1) ); + l1 = l0; + VERIFY_IS_MUCH_SMALLER_THAN( l1.transform(rot*scaling).distance((rot*scaling) * p3), Scalar(1) ); + l1 = l0; + VERIFY_IS_MUCH_SMALLER_THAN( l1.transform(rot*scaling*translation) + .distance((rot*scaling*translation) * p3), Scalar(1) ); + l1 = l0; + VERIFY_IS_MUCH_SMALLER_THAN( l1.transform(rot*translation,Isometry) + .distance((rot*translation) * p3), Scalar(1) ); + } + } template<typename Scalar> void parametrizedline_alignment() diff --git a/test/product_symm.cpp b/test/product_symm.cpp index 74d7329b1..8c44383f9 100644 --- a/test/product_symm.cpp +++ b/test/product_symm.cpp @@ -39,6 +39,24 @@ template<typename Scalar, int Size, int OtherSize> void symm(int size = Size, in VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView<Lower>() * (s2*rhs1), rhs13 = (s1*m1) * (s2*rhs1)); + VERIFY_IS_APPROX(rhs12 = (s1*m2).transpose().template selfadjointView<Upper>() * (s2*rhs1), + rhs13 = (s1*m1.transpose()) * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView<Lower>().transpose() * (s2*rhs1), + rhs13 = (s1*m1.transpose()) * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).conjugate().template selfadjointView<Lower>() * (s2*rhs1), + rhs13 = (s1*m1).conjugate() * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView<Lower>().conjugate() * (s2*rhs1), + rhs13 = (s1*m1).conjugate() * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).adjoint().template selfadjointView<Upper>() * (s2*rhs1), + rhs13 = (s1*m1).adjoint() * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView<Lower>().adjoint() * (s2*rhs1), + rhs13 = (s1*m1).adjoint() * (s2*rhs1)); + m2 = m1.template triangularView<Upper>(); rhs12.setRandom(); rhs13 = rhs12; m3 = m2.template selfadjointView<Upper>(); VERIFY_IS_EQUAL(m1, m3); diff --git a/unsupported/Eigen/src/SparseExtra/MarketIO.h b/unsupported/Eigen/src/SparseExtra/MarketIO.h index cdc14f86e..fc70a24d8 100644 --- a/unsupported/Eigen/src/SparseExtra/MarketIO.h +++ b/unsupported/Eigen/src/SparseExtra/MarketIO.h @@ -12,38 +12,38 @@ #define EIGEN_SPARSE_MARKET_IO_H #include <iostream> +#include <vector> namespace Eigen { namespace internal { - template <typename Scalar> - inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value) + template <typename Scalar, typename StorageIndex> + inline void GetMarketLine (const char* line, StorageIndex& i, StorageIndex& j, Scalar& value) { - line >> i >> j >> value; - i--; - j--; - if(i>=0 && j>=0 && i<M && j<N) - { - return true; - } - else - return false; + std::stringstream sline(line); + sline >> i >> j >> value; } - template <typename Scalar> - inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex<Scalar>& value) + + template<> inline void GetMarketLine (const char* line, int& i, int& j, float& value) + { std::sscanf(line, "%d %d %g", &i, &j, &value); } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, double& value) + { std::sscanf(line, "%d %d %lg", &i, &j, &value); } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, std::complex<float>& value) + { std::sscanf(line, "%d %d %g %g", &i, &j, &numext::real_ref(value), &numext::imag_ref(value)); } + + template<> inline void GetMarketLine (const char* line, int& i, int& j, std::complex<double>& value) + { std::sscanf(line, "%d %d %lg %lg", &i, &j, &numext::real_ref(value), &numext::imag_ref(value)); } + + template <typename Scalar, typename StorageIndex> + inline void GetMarketLine (const char* line, StorageIndex& i, StorageIndex& j, std::complex<Scalar>& value) { + std::stringstream sline(line); Scalar valR, valI; - line >> i >> j >> valR >> valI; - i--; - j--; - if(i>=0 && j>=0 && i<M && j<N) - { - value = std::complex<Scalar>(valR, valI); - return true; - } - else - return false; + sline >> i >> j >> valR >> valI; + value = std::complex<Scalar>(valR,valI); } template <typename RealScalar> @@ -81,13 +81,13 @@ namespace internal } } - template<typename Scalar> - inline void PutMatrixElt(Scalar value, int row, int col, std::ofstream& out) + template<typename Scalar, typename StorageIndex> + inline void PutMatrixElt(Scalar value, StorageIndex row, StorageIndex col, std::ofstream& out) { out << row << " "<< col << " " << value << "\n"; } - template<typename Scalar> - inline void PutMatrixElt(std::complex<Scalar> value, int row, int col, std::ofstream& out) + template<typename Scalar, typename StorageIndex> + inline void PutMatrixElt(std::complex<Scalar> value, StorageIndex row, StorageIndex col, std::ofstream& out) { out << row << " " << col << " " << value.real() << " " << value.imag() << "\n"; } @@ -133,17 +133,20 @@ template<typename SparseMatrixType> bool loadMarket(SparseMatrixType& mat, const std::string& filename) { typedef typename SparseMatrixType::Scalar Scalar; - typedef typename SparseMatrixType::Index Index; + typedef typename SparseMatrixType::StorageIndex StorageIndex; std::ifstream input(filename.c_str(),std::ios::in); if(!input) return false; + + char rdbuffer[4096]; + input.rdbuf()->pubsetbuf(rdbuffer, 4096); const int maxBuffersize = 2048; char buffer[maxBuffersize]; bool readsizes = false; - typedef Triplet<Scalar,Index> T; + typedef Triplet<Scalar,StorageIndex> T; std::vector<T> elements; Index M(-1), N(-1), NNZ(-1); @@ -154,33 +157,36 @@ bool loadMarket(SparseMatrixType& mat, const std::string& filename) //NOTE An appropriate test should be done on the header to get the symmetry if(buffer[0]=='%') continue; - - std::stringstream line(buffer); - + if(!readsizes) { + std::stringstream line(buffer); line >> M >> N >> NNZ; if(M > 0 && N > 0 && NNZ > 0) { readsizes = true; - //std::cout << "sizes: " << M << "," << N << "," << NNZ << "\n"; mat.resize(M,N); mat.reserve(NNZ); } } else { - Index i(-1), j(-1); + StorageIndex i(-1), j(-1); Scalar value; - if( internal::GetMarketLine(line, M, N, i, j, value) ) + internal::GetMarketLine(buffer, i, j, value); + + i--; + j--; + if(i>=0 && j>=0 && i<M && j<N) { - ++ count; + ++count; elements.push_back(T(i,j,value)); } - else + else std::cerr << "Invalid read: " << i << "," << j << "\n"; } } + mat.setFromTriplets(elements.begin(), elements.end()); if(count!=NNZ) std::cerr << count << "!=" << NNZ << "\n"; @@ -225,12 +231,13 @@ template<typename SparseMatrixType> bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sym = 0) { typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::RealScalar RealScalar; std::ofstream out(filename.c_str(),std::ios::out); if(!out) return false; out.flags(std::ios_base::scientific); - out.precision(64); + out.precision(std::numeric_limits<RealScalar>::digits10 + 2); std::string header; internal::putMarketHeader<Scalar>(header, sym); out << header << std::endl; @@ -241,7 +248,6 @@ bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sy { ++ count; internal::PutMatrixElt(it.value(), it.row()+1, it.col()+1, out); - // out << it.row()+1 << " " << it.col()+1 << " " << it.value() << "\n"; } out.close(); return true; @@ -250,13 +256,14 @@ bool saveMarket(const SparseMatrixType& mat, const std::string& filename, int sy template<typename VectorType> bool saveMarketVector (const VectorType& vec, const std::string& filename) { - typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; std::ofstream out(filename.c_str(),std::ios::out); if(!out) return false; out.flags(std::ios_base::scientific); - out.precision(64); + out.precision(std::numeric_limits<RealScalar>::digits10 + 2); if(internal::is_same<Scalar, std::complex<float> >::value || internal::is_same<Scalar, std::complex<double> >::value) out << "%%MatrixMarket matrix array complex general\n"; else diff --git a/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/unsupported/test/cxx11_tensor_shuffling_sycl.cpp index b2b75cbde..c4521aac8 100644 --- a/unsupported/test/cxx11_tensor_shuffling_sycl.cpp +++ b/unsupported/test/cxx11_tensor_shuffling_sycl.cpp @@ -57,6 +57,7 @@ static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) gpu2.device(sycl_device)=gpu1.shuffle(shuffles); sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize); + sycl_device.synchronize(); VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1); VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2); @@ -82,8 +83,9 @@ static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(buffSize)); TensorMap<Tensor<DataType, 4,DataLayout,IndexTypes>> gpu3(gpu_data3, tensorrangeShuffle); - gpu3.device(sycl_device)=gpu1.shuffle(shuffles); - sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize); + gpu3.device(sycl_device)=gpu1.shuffle(shuffles); + sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize); + sycl_device.synchronize(); VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3); VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4); diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp index a010ceb93..4f6723d6d 100644 --- a/unsupported/test/sparse_extra.cpp +++ b/unsupported/test/sparse_extra.cpp @@ -129,6 +129,19 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re } +template<typename SparseMatrixType> +void check_marketio() +{ + typedef Matrix<typename SparseMatrixType::Scalar, Dynamic, Dynamic> DenseMatrix; + Index rows = internal::random<Index>(1,100); + Index cols = internal::random<Index>(1,100); + SparseMatrixType m1, m2; + m1 = DenseMatrix::Random(rows, cols).sparseView(); + saveMarket(m1, "sparse_extra.mtx"); + loadMarket(m2, "sparse_extra.mtx"); + VERIFY_IS_EQUAL(DenseMatrix(m1),DenseMatrix(m2)); +} + void test_sparse_extra() { for(int i = 0; i < g_repeat; i++) { @@ -143,5 +156,15 @@ void test_sparse_extra() CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) ); CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) ); + + CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s); } } |