From 6c5e915e9a6c79550e7e2db2b53648f163a1411d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 3 Dec 2013 17:17:53 +0100 Subject: Enable use of evaluators for noalias and lazyProduct, add conversion to scalar for inner products --- test/main.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'test/main.h') diff --git a/test/main.h b/test/main.h index 9dd8bc535..dda451a40 100644 --- a/test/main.h +++ b/test/main.h @@ -66,6 +66,8 @@ namespace Eigen static bool g_has_set_repeat, g_has_set_seed; } +#define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl + #define EI_PP_MAKE_STRING2(S) #S #define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S) -- cgit v1.2.3 From 4aac87251f16094c01e9c5c8bbf094cd471a2306 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 22 Jul 2014 12:54:03 +0200 Subject: Re-enable a couple of unit tests with evaluators. --- Eigen/src/Core/SelfAdjointView.h | 2 +- Eigen/src/SparseCore/SparseMatrix.h | 12 ++++++++++-- test/CMakeLists.txt | 4 +++- test/main.h | 1 + test/sparse_basic.cpp | 9 +++++---- 5 files changed, 20 insertions(+), 8 deletions(-) (limited to 'test/main.h') diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h index c8bdec5d4..546f61252 100644 --- a/Eigen/src/Core/SelfAdjointView.h +++ b/Eigen/src/Core/SelfAdjointView.h @@ -35,7 +35,7 @@ struct traits > : traits typedef typename nested::type MatrixTypeNested; typedef typename remove_all::type MatrixTypeNestedCleaned; typedef MatrixType ExpressionType; - typedef typename MatrixType::PlainObject DenseMatrixType; + typedef typename MatrixType::PlainObject FullMatrixType; enum { Mode = UpLo | SelfAdjoint, Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 5f0c3d0a7..6080c272a 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -52,7 +52,9 @@ struct traits > MaxRowsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic, Flags = _Options | NestByRefBit | LvalueBit, +#ifndef EIGEN_TEST_EVALUATORS CoeffReadCost = NumTraits::ReadCost, +#endif SupportedAccessPatterns = InnerRandomAccessPattern }; }; @@ -74,8 +76,10 @@ struct traits, DiagIndex> ColsAtCompileTime = 1, MaxRowsAtCompileTime = Dynamic, MaxColsAtCompileTime = 1, - Flags = 0, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost*10 + Flags = 0 +#ifndef EIGEN_TEST_EVALUATORS + , CoeffReadCost = _MatrixTypeNested::CoeffReadCost*10 +#endif }; }; @@ -1343,6 +1347,8 @@ template struct evaluator > : evaluator_base > { + typedef _Scalar Scalar; + typedef _Index Index; typedef SparseMatrix<_Scalar,_Options,_Index> SparseMatrixType; typedef typename SparseMatrixType::InnerIterator InnerIterator; typedef typename SparseMatrixType::ReverseInnerIterator ReverseInnerIterator; @@ -1358,6 +1364,8 @@ struct evaluator > operator SparseMatrixType&() { return m_matrix->const_cast_derived(); } operator const SparseMatrixType&() const { return *m_matrix; } + Scalar coeff(Index row, Index col) const { return m_matrix->coeff(row,col); } + const SparseMatrixType *m_matrix; }; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 154e62424..5b5b55c60 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -229,10 +229,12 @@ if(NOT EIGEN_TEST_EVALUATORS) ei_add_test(stdvector_overload) ei_add_test(stdlist) ei_add_test(stddeque) - ei_add_test(sparse_vector) +endif(NOT EIGEN_TEST_EVALUATORS) ei_add_test(sparse_basic) + ei_add_test(sparse_vector) ei_add_test(sparse_product) ei_add_test(sparse_solvers) +if(NOT EIGEN_TEST_EVALUATORS) ei_add_test(sparse_permutations) ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) diff --git a/test/main.h b/test/main.h index a3c157126..57996956d 100644 --- a/test/main.h +++ b/test/main.h @@ -72,6 +72,7 @@ namespace Eigen } #define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl +// #define TRACK while() #define EI_PP_MAKE_STRING2(S) #S #define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S) diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp index 4c9b9111e..c86534bad 100644 --- a/test/sparse_basic.cpp +++ b/test/sparse_basic.cpp @@ -201,9 +201,9 @@ template void sparse_basic(const SparseMatrixType& re VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); - //m2.innerVector(j0) = 2*m2.innerVector(j1); - //refMat2.col(j0) = 2*refMat2.col(j1); - //VERIFY_IS_APPROX(m2, refMat2); +// m2.innerVector(j0) = 2*m2.innerVector(j1); +// refMat2.col(j0) = 2*refMat2.col(j1); +// VERIFY_IS_APPROX(m2, refMat2); } // test innerVectors() @@ -239,7 +239,7 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m2, refMat2); } - + // test basic computations { DenseMatrix refM1 = DenseMatrix::Zero(rows, rows); @@ -255,6 +255,7 @@ template void sparse_basic(const SparseMatrixType& re initSparse(density, refM3, m3); initSparse(density, refM4, m4); + VERIFY_IS_APPROX(m1*s1, refM1*s1); VERIFY_IS_APPROX(m1+m2, refM1+refM2); VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3); VERIFY_IS_APPROX(m3.cwiseProduct(m1+m2), refM3.cwiseProduct(refM1+refM2)); -- cgit v1.2.3 From bae2e3327b27a21e5024e235255f403cfe1be2c5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 31 Jul 2014 13:35:49 +0200 Subject: Call product_generic_impl by default, and remove lot of boilerplate code --- Eigen/src/Core/CoreEvaluators.h | 4 +- Eigen/src/Core/Product.h | 4 +- Eigen/src/Core/ProductEvaluators.h | 130 ++------------------ Eigen/src/Geometry/Homogeneous.h | 175 ++++++++++++++++++++++++++- Eigen/src/Geometry/Transform.h | 22 ++++ Eigen/src/SparseCore/SparseDenseProduct.h | 42 +------ Eigen/src/SparseCore/SparseDiagonalProduct.h | 12 +- Eigen/src/SparseCore/SparsePermutation.h | 4 + Eigen/src/SparseCore/SparseProduct.h | 19 --- Eigen/src/SparseCore/SparseSelfAdjointView.h | 46 +------ test/main.h | 6 - 11 files changed, 223 insertions(+), 241 deletions(-) (limited to 'test/main.h') diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index b19a29e53..66984e378 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -67,8 +67,8 @@ struct evaluator_traits_base // typedef evaluator nestedType; // by default, get evaluator kind and shape from storage - typedef typename storage_kind_to_evaluator_kind::Kind Kind; - typedef typename storage_kind_to_shape::Shape Shape; + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; // 1 if assignment A = B assumes aliasing when B is of type T and thus B needs to be evaluated into a // temporary; 0 if not. diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 9e5e47d13..0cf20f2e2 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -108,8 +108,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, typedef typename ProductImpl< Lhs, Rhs, Option, - typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, internal::product_type::ret>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Product) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index b04df00e7..8a63384a7 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -90,9 +90,10 @@ struct evaluator_traits > enum { AssumeAliasing = 1 }; }; -// The evaluator for default dense products creates a temporary and call generic_product_impl -template -struct product_evaluator, ProductTag, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape, typename traits::Scalar, typename traits::Scalar> : public evaluator::PlainObject>::type { typedef Product XprType; @@ -118,7 +119,7 @@ struct product_evaluator, ProductTag, DenseSha // // generic_product_impl::evalTo(m_result, lhs, rhs); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } protected: @@ -501,8 +502,8 @@ protected: }; template -struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > - : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar > +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar > { typedef Product XprType; typedef Product BaseProduct; @@ -607,26 +608,6 @@ struct generic_product_impl } }; -template -struct product_evaluator, ProductTag, TriangularShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - - template struct generic_product_impl : generic_product_impl_base > @@ -640,26 +621,6 @@ struct generic_product_impl } }; -template -struct product_evaluator, ProductTag, DenseShape, TriangularShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - - /*************************************************************************** * SelfAdjoint products @@ -681,26 +642,6 @@ struct generic_product_impl } }; -template -struct product_evaluator, ProductTag, SelfAdjointShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - - template struct generic_product_impl : generic_product_impl_base > @@ -714,24 +655,6 @@ struct generic_product_impl } }; -template -struct product_evaluator, ProductTag, DenseShape, SelfAdjointShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; /*************************************************************************** * Diagonal products @@ -933,45 +856,6 @@ struct generic_product_impl, DenseShape, PermutationShape, P } }; -// TODO: left/right and self-adj/symmetric/permutation look the same ... Too much boilerplate? -template -struct product_evaluator, ProductTag, PermutationShape, DenseShape, typename traits::Scalar, typename traits::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - -template -struct product_evaluator, ProductTag, DenseShape, PermutationShape, typename traits::Scalar, typename traits::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index 00e71d190..07bc22154 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -48,8 +48,10 @@ struct traits > TmpFlags = _MatrixTypeNested::Flags & HereditaryBits, Flags = ColsAtCompileTime==1 ? (TmpFlags & ~RowMajorBit) : RowsAtCompileTime==1 ? (TmpFlags | RowMajorBit) - : TmpFlags, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + : TmpFlags +#ifndef EIGEN_TEST_EVALUATORS + , CoeffReadCost = _MatrixTypeNested::CoeffReadCost +#endif // EIGEN_TEST_EVALUATORS }; }; @@ -63,6 +65,7 @@ template class Homogeneous { public: + typedef MatrixType NestedExpression; enum { Direction = _Direction }; typedef MatrixBase Base; @@ -74,7 +77,10 @@ template class Homogeneous inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); } inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); } + + const NestedExpression& nestedExpression() const { return m_matrix; } +#ifndef EIGEN_TEST_EVALUATORS inline Scalar coeff(Index row, Index col) const { if( (int(Direction)==Vertical && row==m_matrix.rows()) @@ -106,6 +112,31 @@ template class Homogeneous eigen_assert(int(Direction)==Vertical); return internal::homogeneous_left_product_impl >(lhs,rhs.m_matrix); } +#else + template + inline const Product + operator* (const MatrixBase& rhs) const + { + eigen_assert(int(Direction)==Horizontal); + return Product(*this,rhs.derived()); + } + + template friend + inline const Product + operator* (const MatrixBase& lhs, const Homogeneous& rhs) + { + eigen_assert(int(Direction)==Vertical); + return Product(lhs.derived(),rhs); + } + + template friend + inline const Product, Homogeneous > + operator* (const Transform& lhs, const Homogeneous& rhs) + { + eigen_assert(int(Direction)==Vertical); + return Product, Homogeneous>(lhs,rhs); + } +#endif protected: typename MatrixType::Nested m_matrix; @@ -300,6 +331,146 @@ struct homogeneous_right_product_impl,Rhs> typename Rhs::Nested m_rhs; }; +#ifdef EIGEN_TEST_EVALUATORS +template +struct unary_evaluator, IndexBased> + : evaluator::PlainObject >::type +{ + typedef Homogeneous XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator::type Base; + + unary_evaluator(const XprType& op) + : Base(), m_temp(op) + { + ::new (static_cast(this)) Base(m_temp); + } + +protected: + PlainObject m_temp; +}; + +// dense = homogeneous +template< typename DstXprType, typename ArgType, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> +{ + typedef Homogeneous SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + dst.template topRows(src.nestedExpression().rows()) = src.nestedExpression(); +// dst.topRows(src.nestedExpression().rows()) = src.nestedExpression(); + dst.row(dst.rows()-1).setOnes(); + } +}; + +// dense = homogeneous +template< typename DstXprType, typename ArgType, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, Scalar> +{ + typedef Homogeneous SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + dst.template leftCols(src.nestedExpression().cols()) = src.nestedExpression(); +// dst.leftCols(src.nestedExpression().cols()) = src.nestedExpression(); + dst.col(dst.cols()-1).setOnes(); + } +}; + +template +struct generic_product_impl, Rhs, DenseShape, DenseShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Homogeneous& lhs, const Rhs& rhs) + { + homogeneous_right_product_impl, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst); + } +}; + +template +struct generic_product_impl, DenseShape, DenseShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous& rhs) + { + homogeneous_left_product_impl, Lhs>(rhs.nestedExpression(), lhs).evalTo(dst); + } +}; + +template +struct generic_product_impl, Homogeneous, DenseShape, DenseShape, ProductTag> +{ + typedef Transform TransformType; + template + static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous& rhs) + { + homogeneous_left_product_impl, TransformType>(rhs.nestedExpression(), lhs).evalTo(dst); + } +}; + + +template +struct product_evaluator, Rhs, DefaultProduct>, ProductTag, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar> + : public evaluator, Rhs, DefaultProduct>::PlainObject>::type +{ + typedef Homogeneous Lhs; + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator::type Base; + + product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +template +struct product_evaluator, DefaultProduct>, ProductTag, DenseShape, DenseShape, typename traits::Scalar, typename traits::Scalar> + : public evaluator, DefaultProduct>::PlainObject>::type +{ + typedef Homogeneous Rhs; + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator::type Base; + + product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +template +struct product_evaluator, Homogeneous, DefaultProduct>, ProductTag, DenseShape, DenseShape, Scalar, typename traits::Scalar> + : public evaluator, Homogeneous, DefaultProduct>::PlainObject>::type +{ + typedef Transform Lhs; + typedef Homogeneous Rhs; + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename evaluator::type Base; + + product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +#endif // EIGEN_TEST_EVALUATORS + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index cb93acf6b..54d05f9cf 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -62,6 +62,23 @@ struct transform_construct_from_matrix; template struct transform_take_affine_part; +#ifdef EIGEN_TEST_EVALUATORS +template +struct traits > +{ + typedef _Scalar Scalar; + typedef DenseIndex Index; + typedef Dense StorageKind; + enum { + RowsAtCompileTime = _Dim, + ColsAtCompileTime = _Dim, + MaxRowsAtCompileTime = _Dim, + MaxColsAtCompileTime = _Dim, + Flags = 0 + }; +}; +#endif + } // end namespace internal /** \geometry_module \ingroup Geometry_Module @@ -355,6 +372,11 @@ public: inline Transform& operator=(const QTransform& other); inline QTransform toQTransform(void) const; #endif + +#ifdef EIGEN_TEST_EVALUATORS + Index rows() const { return m_matrix.cols(); } + Index cols() const { return m_matrix.cols(); } +#endif /** shortcut for m_matrix(row,col); * \sa MatrixBase::operator(Index,Index) const */ diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 8864b7308..a715b8bde 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -410,44 +410,6 @@ struct generic_product_impl } }; -template -struct product_evaluator, ProductTag, SparseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - -template -struct product_evaluator, ProductTag, DenseShape, SparseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - template struct sparse_dense_outer_product_evaluator { @@ -530,7 +492,7 @@ protected: // sparse * dense outer product template -struct product_evaluator, OuterProduct, SparseShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> +struct product_evaluator, OuterProduct, SparseShape, DenseShape, typename traits::Scalar, typename traits::Scalar> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; @@ -545,7 +507,7 @@ struct product_evaluator, OuterProduct, Sparse }; template -struct product_evaluator, OuterProduct, DenseShape, SparseShape, typename Lhs::Scalar, typename Rhs::Scalar> +struct product_evaluator, OuterProduct, DenseShape, SparseShape, typename traits::Scalar, typename traits::Scalar> : sparse_dense_outer_product_evaluator { typedef sparse_dense_outer_product_evaluator Base; diff --git a/Eigen/src/SparseCore/SparseDiagonalProduct.h b/Eigen/src/SparseCore/SparseDiagonalProduct.h index 4c51881e0..9f465a828 100644 --- a/Eigen/src/SparseCore/SparseDiagonalProduct.h +++ b/Eigen/src/SparseCore/SparseDiagonalProduct.h @@ -197,11 +197,11 @@ enum { template struct sparse_diagonal_product_evaluator; -template -struct product_evaluator, ProductTag, DiagonalShape, SparseShape, typename Lhs::Scalar, typename Rhs::Scalar> +template +struct product_evaluator, ProductTag, DiagonalShape, SparseShape, typename traits::Scalar, typename traits::Scalar> : public sparse_diagonal_product_evaluator { - typedef Product XprType; + typedef Product XprType; typedef evaluator type; typedef evaluator nestedType; enum { CoeffReadCost = Dynamic, Flags = Rhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags @@ -210,11 +210,11 @@ struct product_evaluator, ProductTag, DiagonalShape, product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} }; -template -struct product_evaluator, ProductTag, SparseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> +template +struct product_evaluator, ProductTag, SparseShape, DiagonalShape, typename traits::Scalar, typename traits::Scalar> : public sparse_diagonal_product_evaluator, Lhs::Flags&RowMajorBit?SDP_AsCwiseProduct:SDP_AsScalarProduct> { - typedef Product XprType; + typedef Product XprType; typedef evaluator type; typedef evaluator nestedType; enum { CoeffReadCost = Dynamic, Flags = Lhs::Flags&RowMajorBit }; // FIXME CoeffReadCost & Flags diff --git a/Eigen/src/SparseCore/SparsePermutation.h b/Eigen/src/SparseCore/SparsePermutation.h index ebfefab98..a888ae9e1 100644 --- a/Eigen/src/SparseCore/SparsePermutation.h +++ b/Eigen/src/SparseCore/SparsePermutation.h @@ -196,6 +196,10 @@ struct generic_product_impl, SparseShape, PermutationShape, } }; +// TODO, the following two overloads are only needed to define the right temporary type through +// typename traits >::ReturnType +// while it should be correctly handled by traits >::PlainObject + template struct product_evaluator, ProductTag, PermutationShape, SparseShape, typename traits::Scalar, typename traits::Scalar> : public evaluator >::ReturnType>::type diff --git a/Eigen/src/SparseCore/SparseProduct.h b/Eigen/src/SparseCore/SparseProduct.h index 4e181d471..18f40b9d9 100644 --- a/Eigen/src/SparseCore/SparseProduct.h +++ b/Eigen/src/SparseCore/SparseProduct.h @@ -224,25 +224,6 @@ struct generic_product_impl } }; -template -struct product_evaluator, ProductTag, SparseShape, SparseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - template struct evaluator > > : public evaluator::PlainObject>::type diff --git a/Eigen/src/SparseCore/SparseSelfAdjointView.h b/Eigen/src/SparseCore/SparseSelfAdjointView.h index 530ff27bf..4235d6c4c 100644 --- a/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -441,46 +441,11 @@ struct generic_product_impl -struct product_evaluator, ProductTag, SparseSelfAdjointShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; - -template -struct product_evaluator, ProductTag, DenseShape, SparseSelfAdjointShape, typename Lhs::Scalar, typename Rhs::Scalar> - : public evaluator::PlainObject>::type -{ - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename evaluator::type Base; - - product_evaluator(const XprType& xpr) - : m_result(xpr.rows(), xpr.cols()) - { - ::new (static_cast(this)) Base(m_result); - generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); - } - -protected: - PlainObject m_result; -}; +// NOTE: these two overloads are needed to evaluate the sparse sefladjoint view into a full sparse matrix +// TODO: maybe the copy could be handled by generic_product_impl so that these overloads would not be needed anymore template -struct product_evaluator, ProductTag, SparseSelfAdjointShape, SparseShape, typename LhsView::Scalar, typename Rhs::Scalar> +struct product_evaluator, ProductTag, SparseSelfAdjointShape, SparseShape, typename traits::Scalar, typename traits::Scalar> : public evaluator::PlainObject>::type { typedef Product XprType; @@ -488,9 +453,8 @@ struct product_evaluator, ProductTag, Spar typedef typename evaluator::type Base; product_evaluator(const XprType& xpr) - : /*m_lhs(xpr.lhs()),*/ m_result(xpr.rows(), xpr.cols()) + : m_lhs(xpr.lhs()), m_result(xpr.rows(), xpr.cols()) { - m_lhs = xpr.lhs(); ::new (static_cast(this)) Base(m_result); generic_product_impl::evalTo(m_result, m_lhs, xpr.rhs()); } @@ -501,7 +465,7 @@ protected: }; template -struct product_evaluator, ProductTag, SparseShape, SparseSelfAdjointShape, typename Lhs::Scalar, typename RhsView::Scalar> +struct product_evaluator, ProductTag, SparseShape, SparseSelfAdjointShape, typename traits::Scalar, typename traits::Scalar> : public evaluator::PlainObject>::type { typedef Product XprType; diff --git a/test/main.h b/test/main.h index 57996956d..e89b5a305 100644 --- a/test/main.h +++ b/test/main.h @@ -279,13 +279,7 @@ inline bool test_isApproxOrLessThan(const long double& a, const long double& b) template inline bool test_isApprox(const Type1& a, const Type2& b) { -#ifdef EIGEN_TEST_EVALUATORS - typename internal::eval::type a_eval(a); - typename internal::eval::type b_eval(b); - return a_eval.isApprox(b_eval, test_precision()); -#else return a.isApprox(b, test_precision()); -#endif } // The idea behind this function is to compare the two scalars a and b where -- cgit v1.2.3 From 7ff266e3ce592ec1a6284cf16811965eec775f25 Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Fri, 29 Aug 2014 20:03:49 +0000 Subject: Initial VSX commit --- CMakeLists.txt | 6 + Eigen/Core | 17 +- Eigen/src/Core/arch/AltiVec/Complex.h | 186 ++++++++++++++++++++++ Eigen/src/Core/arch/AltiVec/PacketMath.h | 66 +++++++- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 4 +- Eigen/src/Core/util/Constants.h | 6 + bench/btl/libs/eigen2/eigen2_interface.hh | 2 +- cmake/EigenTesting.cmake | 6 + test/main.h | 2 +- test/packetmath.cpp | 2 +- 10 files changed, 285 insertions(+), 12 deletions(-) (limited to 'test/main.h') diff --git a/CMakeLists.txt b/CMakeLists.txt index 96d6c8701..ea42cc8db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -219,6 +219,12 @@ if(NOT MSVC) message(STATUS "Enabling AltiVec in tests/examples") endif() + option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF) + if(EIGEN_TEST_VSX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx") + message(STATUS "Enabling VSX in tests/examples") + endif() + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) if(EIGEN_TEST_NEON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8") diff --git a/Eigen/Core b/Eigen/Core index 776b7faf3..8ea165d5b 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -171,6 +171,15 @@ #undef bool #undef vector #undef pixel + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel #elif defined __ARM_NEON__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_NEON @@ -235,6 +244,8 @@ inline static const char *SimdInstructionSetsInUse(void) { return "SSE, SSE2"; #elif defined(EIGEN_VECTORIZE_ALTIVEC) return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; #elif defined(EIGEN_VECTORIZE_NEON) return "ARM NEON"; #else @@ -286,8 +297,12 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" -#elif defined EIGEN_VECTORIZE_ALTIVEC +#elif defined(EIGEN_VECTORIZE_ALTIVEC) + #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/Complex.h" +#elif defined(EIGEN_VECTORIZE_ALTIVEC) #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/VSX.h" #include "src/Core/arch/AltiVec/Complex.h" #elif defined EIGEN_VECTORIZE_NEON #include "src/Core/arch/NEON/PacketMath.h" diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 13b874d0c..064341a3b 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -14,7 +14,13 @@ namespace Eigen { namespace internal { +#ifdef _BIG_ENDIAN static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_ZERO_);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; +#else +static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4f_ZERO_, (Packet4ui)p4i_ZERO);//{ 0x80000000, 0x00000000, 0x80000000, 0x00000000 }; +static Packet2ul p2ul_CONJ_XOR = (Packet2ul) vec_sld((Packet4ui)p2d_ZERO_, (Packet4ui)p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; +#endif + static Packet16uc p16uc_COMPLEX_RE = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_COMPLEX_IM = vec_sld(p16uc_DUPLICATE, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_COMPLEX_REV = vec_sld(p16uc_REVERSE, p16uc_REVERSE, 8);//{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 }; @@ -237,6 +243,186 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) kernel.packet[0].v = tmp; } +//---------- double ---------- +struct Packet1cd +{ + EIGEN_STRONG_INLINE Packet1cd() {} + EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {} + Packet2d v; +}; + +template<> struct packet_traits > : default_packet_traits +{ + typedef Packet1cd type; + typedef Packet1cd half; + enum { + Vectorizable = 1, + AlignedOnScalar = 0, + size = 1, + HasHalfPacket = 0, + + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasNegate = 1, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasSetLinear = 0 + }; +}; + +template<> struct unpacket_traits { typedef std::complex type; enum {size=1}; typedef Packet1cd half; }; + +template<> EIGEN_STRONG_INLINE Packet1cd padd(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_add(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd psub(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_sub(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); } +template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { return Packet1cd((Packet2d)vec_xor((Packet2ul)a.v, p2ul_CONJ_XOR)); } + +template<> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) +{ + Packet2d v1, v2; + + // Permute and multiply the real parts of a and b + v1 = vec_perm(a.v, a.v, p16uc_COMPLEX_RE); + // Get the imaginary parts of a + v2 = vec_perm(a.v, a.v, p16uc_COMPLEX_IM); + // multiply a_re * b + v1 = vec_madd(v1, b.v, p4f_ZERO); + // multiply a_im * b and get the conjugate result + v2 = vec_madd(v2, b.v, p4f_ZERO); + v2 = (Packet4f) vec_xor((Packet4ui)v2, p4ui_CONJ_XOR); + // permute back to a proper order + v2 = vec_perm(v2, v2, p16uc_COMPLEX_REV); + + return Packet2cf(vec_add(v1, v2)); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pand (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd por (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_or(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pxor (const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_xor(a.v,b.v)); } +template<> EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(vec_and(a.v, vec_nor(b.v,b.v))); } + +template<> EIGEN_STRONG_INLINE Packet1cd pload (const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); } +template<> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); } + +template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* from) +{ + return pset1(*from); +} + +template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } +template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } + +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { vec_dstt((double *)addr, DST_CTRL(2,2,32), DST_CHAN); } + +template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) +{ + std::complex EIGEN_ALIGN16 res[2]; + pstore((float *)&res, a.v); + + return res[0]; +} + +template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) +{ + Packet2d rev_a; + rev_a = vec_perm(a.v, a.v, p16uc_COMPLEX_REV2); + return Packet1cd(rev_a); +} + +template<> EIGEN_STRONG_INLINE std::complex predux(const Packet1cd& a) +{ + Packet2d b; + b = (Packet2d) vec_sld(a.v, a.v, 8); + b = padd(a.v, b); + return pfirst(Packet1cd(b)); +} + +template<> EIGEN_STRONG_INLINE Packet1cd preduxp(const Packet1cd* vecs) +{ + Packet2d b1, b2; + + b1 = (Packet2d) vec_sld(vecs[0].v, vecs[1].v, 8); + b2 = (Packet2d) vec_sld(vecs[1].v, vecs[0].v, 8); + b2 = (Packet2d) vec_sld(b2, b2, 8); + b2 = padd(b1, b2); + + return Packet1cd(b2); +} + +template<> EIGEN_STRONG_INLINE std::complex predux_mul(const Packet1cd& a) +{ + Packet2d b; + Packet1cd prod; + b = (Packet2d) vec_sld(a.v, a.v, 8); + prod = pmul(a, Packet1cd(b)); + + return pfirst(prod); +} + +template +struct palign_impl +{ + static EIGEN_STRONG_INLINE void run(Packet1cd& first, const Packet1cd& second) + { } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(a, pconj(b)); + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return internal::pmul(pconj(a), b); + } +}; + +template<> struct conj_helper +{ + EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet1cd& y, const Packet1cd& c) const + { return padd(pmul(x,y),c); } + + EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) const + { + return pconj(internal::pmul(a, b)); + } +}; + +template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) +{ + // TODO optimize it for AltiVec + Packet1cd res = conj_helper().pmul(a,b); + Packet2d s = vec_madd(b.v, b.v, p4f_ZERO); + return Packet1cd(pdiv(res.v, vec_add(s,vec_perm(s, s, p16uc_COMPLEX_REV)))); +} + +template<> EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x) +{ + return Packet1cd(vec_perm(x.v, x.v, p16uc_COMPLEX_REV)); +} + +EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) +{ + Packet2d tmp = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_0); + kernel.packet[1].v = vec_perm(kernel.packet[0].v, kernel.packet[1].v, p16uc_COMPLEX_TRANSPOSE_1); + kernel.packet[0].v = tmp; +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index b43e8ace3..e70039ae2 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -31,6 +31,12 @@ namespace internal { #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 #endif +#ifdef __VSX__ +typedef __vector double Packet2d; +typedef __vector unsigned long Packet2ul; +typedef __vector long Packet2l; +#endif // __VSX__ + typedef __vector float Packet4f; typedef __vector int Packet4i; typedef __vector unsigned int Packet4ui; @@ -50,22 +56,37 @@ typedef __vector unsigned char Packet16uc; #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ Packet4f p4f_##NAME = pset1(X) -#define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \ - Packet4f p4f_##NAME = vreinterpretq_f32_u32(pset1(X)) - #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ Packet4i p4i_##NAME = pset1(X) +#define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \ + Packet2d p2d_##NAME = pset1(X) + +#define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \ + Packet2l p2l_##NAME = pset1(X) + #define DST_CHAN 1 #define DST_CTRL(size, count, stride) (((size) << 24) | ((count) << 16) | (stride)) +// Handle endianness properly while loading constants // Define global static constants: +#ifdef _BIG_ENDIAN static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 }; static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 }; static Packet16uc p16uc_REVERSE = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); //{ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15} static Packet16uc p16uc_DUPLICATE = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7}; - +#else +static Packet4f p4f_COUNTDOWN = { 3.0, 2.0, 1.0, 0.0 }; +static Packet4i p4i_COUNTDOWN = { 3, 2, 1, 0 }; +static Packet16uc p16uc_REVERSE = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 }; +static Packet16uc p16uc_FORWARD = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3}; +static Packet16uc p16uc_DUPLICATE = { 4,5,6,7, 4,5,6,7, 0,1,2,3, 0,1,2,3 }; +static Packet2d p2d_ZERO_ = (Packet2d) { 0x8000000000000000, 0x8000000000000000 }; +static Packet2l p2l_ZERO = (Packet2l) { 0x0, 0x0 }; +#endif // _BIG_ENDIAN + +// These constants are endian-agnostic static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,} static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE,1); //{ 1, 1, 1, 1} @@ -93,6 +114,24 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 0 }; }; +#ifdef __VSX__ +template<> struct packet_traits : default_packet_traits +{ + typedef Packet2d type; + typedef Packet2d half; + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size=2, + HasHalfPacket = 0, + + HasDiv = 1, + HasExp = 0, + HasSqrt = 0 + }; +}; +#endif // __VSX__ + template<> struct packet_traits : default_packet_traits { typedef Packet4i type; @@ -105,6 +144,10 @@ template<> struct packet_traits : default_packet_traits }; }; +#ifdef __VSX__ +template<> struct unpacket_traits { typedef double type; enum {size=2}; typedef Packet2d half; }; +#endif // __VSX__ + template<> struct unpacket_traits { typedef float type; enum {size=4}; typedef Packet4f half; }; template<> struct unpacket_traits { typedef int type; enum {size=4}; typedef Packet4i half; }; /* @@ -311,7 +354,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } -// Logical Operations are not supported for float, so we have to reinterpret casts using NEON intrinsics template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); } @@ -327,10 +369,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, con template<> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return vec_ld(0, from); } +#ifndef __VSX__ template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD - // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html Packet16uc MSQ, LSQ; Packet16uc mask; MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword @@ -350,6 +392,18 @@ template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) mask = vec_lvsl(0, from); // create the permute mask return (Packet4i) vec_perm(MSQ, LSQ, mask); // align the data } +#else +template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + return (Packet4i) vec_vsx_ld((long)from & 15, from); // align the data +} +template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) +{ + EIGEN_DEBUG_ALIGNED_LOAD + return (Packet4f) vec_vsx_ld((long)from & 15, from); // align the data +} +#endif template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) { diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 7da52c2e8..7b2ed6728 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -182,7 +182,7 @@ public: nr = 4, // register block size along the M direction (currently, this one cannot be modified) -#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) +#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else @@ -290,7 +290,7 @@ public: NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, nr = 4, -#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) +#if defined(EIGEN_HAS_FUSED_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) // we assume 16 registers mr = 3*LhsPacketSize, #else diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 05107fdfe..31073b990 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -413,10 +413,16 @@ namespace Architecture Generic = 0x0, SSE = 0x1, AltiVec = 0x2, + VSX = 0x3, + NEON = 0x4, #if defined EIGEN_VECTORIZE_SSE Target = SSE #elif defined EIGEN_VECTORIZE_ALTIVEC Target = AltiVec +#elif defined EIGEN_VECTORIZE_VSX + Target = VSX +#elif defined EIGEN_VECTORIZE_NEON + Target = NEON #else Target = Generic #endif diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh index 47fe58135..1deabdae2 100644 --- a/bench/btl/libs/eigen2/eigen2_interface.hh +++ b/bench/btl/libs/eigen2/eigen2_interface.hh @@ -47,7 +47,7 @@ public : { #if defined(EIGEN_VECTORIZE_SSE) if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; - #elif defined(EIGEN_VECTORIZE_ALTIVEC) + #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; #else if (SIZE==Dynamic) return "eigen2_novec"; else return "tiny_eigen2_novec"; diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 9b9776894..6383c6eac 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -282,6 +282,12 @@ macro(ei_testing_print_summary) message(STATUS "Altivec: Using architecture defaults") endif() + if(EIGEN_TEST_VSX) + message(STATUS "VSX: ON") + else() + message(STATUS "VSX: Using architecture defaults") + endif() + if(EIGEN_TEST_NEON) message(STATUS "ARM NEON: ON") else() diff --git a/test/main.h b/test/main.h index 773873a0d..7667eaa18 100644 --- a/test/main.h +++ b/test/main.h @@ -76,7 +76,7 @@ #endif // bounds integer values for AltiVec -#ifdef __ALTIVEC__ +#if defined(__ALTIVEC__) || defined(__VSX__) #define EIGEN_MAKING_DOCS #endif diff --git a/test/packetmath.cpp b/test/packetmath.cpp index e5dc473c2..e716d6d9a 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -156,7 +156,7 @@ template void packetmath() CHECK_CWISE2(REF_ADD, internal::padd); CHECK_CWISE2(REF_SUB, internal::psub); CHECK_CWISE2(REF_MUL, internal::pmul); - #ifndef EIGEN_VECTORIZE_ALTIVEC + #if !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX) if (!internal::is_same::value) CHECK_CWISE2(REF_DIV, internal::pdiv); #endif -- cgit v1.2.3 From aa5f79206fb632d141c3555338f89f59d1bb4633 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 20 Oct 2014 11:38:51 +0200 Subject: Fix bug #859: pexp(NaN) returned Inf instead of NaN --- Eigen/src/Core/arch/SSE/MathFunctions.h | 2 +- test/main.h | 20 ++++++++++++++++++++ test/packetmath.cpp | 6 ++++++ test/stable_norm.cpp | 20 -------------------- 4 files changed, 27 insertions(+), 21 deletions(-) (limited to 'test/main.h') diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 8f78b3a6c..b549e4870 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -167,7 +167,7 @@ Packet4f pexp(const Packet4f& _x) emm0 = _mm_cvttps_epi32(fx); emm0 = _mm_add_epi32(emm0, p4i_0x7f); emm0 = _mm_slli_epi32(emm0, 23); - return pmul(y, Packet4f(_mm_castsi128_ps(emm0))); + return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x); } template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp(const Packet2d& _x) diff --git a/test/main.h b/test/main.h index 371c7e602..adf4adb31 100644 --- a/test/main.h +++ b/test/main.h @@ -432,6 +432,26 @@ void randomPermutationVector(PermutationVectorType& v, typename PermutationVecto } } +template bool isNotNaN(const T& x) +{ + return x==x; +} + +template bool isNaN(const T& x) +{ + return x!=x; +} + +template bool isInf(const T& x) +{ + return x > NumTraits::highest(); +} + +template bool isMinusInf(const T& x) +{ + return x < NumTraits::lowest(); +} + } // end namespace Eigen template struct GetDifferentType; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index e716d6d9a..a4166d868 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -297,6 +297,12 @@ template void packetmath_real() data2[i] = internal::random(-87,88); } CHECK_CWISE1_IF(internal::packet_traits::HasExp, std::exp, internal::pexp); + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasExp,Packet> h; + h.store(data2, internal::pexp(h.load(data1))); + VERIFY(isNaN(data2[0])); + } for (int i=0; i bool isNotNaN(const T& x) -{ - return x==x; -} - -template bool isNaN(const T& x) -{ - return x!=x; -} - -template bool isInf(const T& x) -{ - return x > NumTraits::highest(); -} - -template bool isMinusInf(const T& x) -{ - return x < NumTraits::lowest(); -} - // workaround aggressive optimization in ICC template EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; } -- cgit v1.2.3 From 04ffb9956eb75fbab8f4926235ba011ae7a79d39 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 24 Oct 2014 13:18:23 +0200 Subject: Replace TEST_SET_BUT_UNUSED_VARIABLE by already defined EIGEN_UNUSED_VARIABLE --- test/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/main.h') diff --git a/test/main.h b/test/main.h index adf4adb31..b3fa68476 100644 --- a/test/main.h +++ b/test/main.h @@ -61,7 +61,7 @@ #endif // shuts down ICC's remark #593: variable "XXX" was set but never used -#define TEST_SET_BUT_UNUSED_VARIABLE(X) X = X + 0; +#define TEST_SET_BUT_UNUSED_VARIABLE(X) EIGEN_UNUSED_VARIABLE(X) // the following file is automatically generated by cmake #include "split_test_helper.h" -- cgit v1.2.3