diff options
-rw-r--r-- | Eigen/src/Array/Random.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/Assign.h | 33 | ||||
-rw-r--r-- | Eigen/src/Core/CwiseNullaryOp.h | 74 | ||||
-rw-r--r-- | Eigen/src/Core/DenseBase.h | 15 | ||||
-rw-r--r-- | Eigen/src/Core/Functors.h | 71 | ||||
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/Transpose.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/SSE/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/util/Constants.h | 5 | ||||
-rw-r--r-- | doc/snippets/DenseBase_LinSpaced.cpp | 2 | ||||
-rw-r--r-- | doc/snippets/DenseBase_LinSpaced_seq.cpp | 2 | ||||
-rw-r--r-- | doc/snippets/DenseBase_setLinSpaced.cpp | 3 | ||||
-rw-r--r-- | test/nullary.cpp | 54 |
13 files changed, 252 insertions, 19 deletions
diff --git a/Eigen/src/Array/Random.h b/Eigen/src/Array/Random.h index 97ca0fba3..e1f4aa7ca 100644 --- a/Eigen/src/Array/Random.h +++ b/Eigen/src/Array/Random.h @@ -27,7 +27,7 @@ template<typename Scalar> struct ei_scalar_random_op { EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_random_op) - inline const Scalar operator() (int, int) const { return ei_random<Scalar>(); } + inline const Scalar operator() (int, int = 0) const { return ei_random<Scalar>(); } }; template<typename Scalar> struct ei_functor_traits<ei_scalar_random_op<Scalar> > diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index e5c17b3f4..41653098f 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -378,6 +378,31 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli *** Linear vectorization *** ***************************/ +template <bool IsAligned = false> +struct ei_unaligned_assign_impl +{ + template <typename Derived, typename OtherDerived> + static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) {} +}; + +template <> +struct ei_unaligned_assign_impl<false> +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. +#ifdef _MSC_VER + template <typename Derived, typename OtherDerived> + static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) +#else + template <typename Derived, typename OtherDerived> + static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) +#endif + { + for (int index = start; index < end; ++index) + dst.copyCoeff(index, src); + } +}; + template<typename Derived1, typename Derived2> struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling> { @@ -389,16 +414,14 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling : ei_first_aligned(&dst.coeffRef(0), size); const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - for(int index = 0; index < alignedStart; ++index) - dst.copyCoeff(index, src); - + ei_unaligned_assign_impl<ei_assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart); + for(int index = alignedStart; index < alignedEnd; index += packetSize) { dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src); } - for(int index = alignedEnd; index < size; ++index) - dst.copyCoeff(index, src); + ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size); } }; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index c5d5cd97c..f4dd0b695 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -80,23 +80,20 @@ class CwiseNullaryOp : ei_no_assignment_operator, } template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(int, int) const + EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const { - return m_functor.packetOp(); + return m_functor.packetOp(row, col); } EIGEN_STRONG_INLINE const Scalar coeff(int index) const { - if(RowsAtCompileTime == 1) - return m_functor(0, index); - else - return m_functor(index, 0); + return m_functor(index); } template<int LoadMode> - EIGEN_STRONG_INLINE PacketScalar packet(int) const + EIGEN_STRONG_INLINE PacketScalar packet(int index) const { - return m_functor.packetOp(); + return m_functor.packetOp(index); } protected: @@ -228,6 +225,49 @@ DenseBase<Derived>::Constant(const Scalar& value) return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op<Scalar>(value)); } +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * This particular version of LinSpaced() uses sequential access, i.e. vector access is + * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization + * and yields faster code than the random access version. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced_seq.cpp + * Output: \verbinclude DenseBase_LinSpaced_seq.out + * + * \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Scalar,Scalar,int), CwiseNullaryOp + */ +template<typename Derived> +EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType +DenseBase<Derived>::LinSpaced(Sequential_t, Scalar low, Scalar high, int size) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size)); +} + +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced.cpp + * Output: \verbinclude DenseBase_LinSpaced.out + * + * \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Sequential_t,Scalar,Scalar,int), CwiseNullaryOp + */ +template<typename Derived> +EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType +DenseBase<Derived>::LinSpaced(Scalar low, Scalar high, int size) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size)); +} + /** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ template<typename Derived> bool DenseBase<Derived>::isApproxToConstant @@ -305,6 +345,24 @@ DenseStorageBase<Derived,_Base,_Options>::setConstant(int rows, int cols, const return setConstant(value); } +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * + * \only_for_vectors + * + * Example: \include DenseBase_setLinSpaced.cpp + * Output: \verbinclude DenseBase_setLinSpaced.out + * + * \sa CwiseNullaryOp + */ +template<typename Derived> +EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Scalar low, Scalar high, int size) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size)); +} // zero: diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 8ea0f3ddf..e0a3a04af 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -192,11 +192,15 @@ template<typename Derived> class DenseBase /** \internal Represents a matrix with all coefficients equal to one another*/ typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ + typedef CwiseNullaryOp<ei_linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ + typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType; - /** \internal expression tyepe of a column */ + /** \internal expression type of a column */ typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr; - /** \internal expression tyepe of a column */ + /** \internal expression type of a column */ typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr; #endif // not EIGEN_PARSED_BY_DOXYGEN @@ -343,6 +347,11 @@ template<typename Derived> class DenseBase static const ConstantReturnType Constant(const Scalar& value); + static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, Scalar low, Scalar high, int size); + static const RandomAccessLinSpacedReturnType + LinSpaced(Scalar low, Scalar high, int size); + template<typename CustomNullaryOp> static const CwiseNullaryOp<CustomNullaryOp, Derived> NullaryExpr(int rows, int cols, const CustomNullaryOp& func); @@ -362,11 +371,11 @@ template<typename Derived> class DenseBase void fill(const Scalar& value); Derived& setConstant(const Scalar& value); + Derived& setLinSpaced(Scalar low, Scalar high, int size); Derived& setZero(); Derived& setOnes(); Derived& setRandom(); - template<typename OtherDerived> bool isApprox(const DenseBase<OtherDerived>& other, RealScalar prec = dummy_precision<Scalar>()) const; diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index aa3eba5cc..31d0cff70 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -437,7 +437,7 @@ struct ei_scalar_constant_op { EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { } EIGEN_STRONG_INLINE const Scalar operator() (int, int = 0) const { return m_other; } - EIGEN_STRONG_INLINE const PacketScalar packetOp() const { return ei_pset1(m_other); } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int, int = 0) const { return ei_pset1(m_other); } const Scalar m_other; }; template<typename Scalar> @@ -452,6 +452,75 @@ template<typename Scalar> struct ei_functor_traits<ei_scalar_identity_op<Scalar> > { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; }; +template <typename Scalar, bool RandomAccess> struct ei_linspaced_op_impl; + +// linear access for packet ops: +// 1) initialization +// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) +// 2) each step +// base += [size*step, ..., size*step] +template <typename Scalar> +struct ei_linspaced_op_impl<Scalar,false> +{ + typedef typename ei_packet_traits<Scalar>::type PacketScalar; + + ei_linspaced_op_impl(Scalar low, Scalar step) : + m_low(low), m_step(step), + m_packetStep(ei_pset1(ei_packet_traits<Scalar>::size*step)), + m_base(ei_padd(ei_pset1(low),ei_pmul(ei_pset1(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {} + + EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int) const { return m_base = ei_padd(m_base,m_packetStep); } + + const Scalar m_low; + const Scalar m_step; + const PacketScalar m_packetStep; + mutable PacketScalar m_base; +}; + +// random access for packet ops: +// 1) each step +// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) +template <typename Scalar> +struct ei_linspaced_op_impl<Scalar,true> +{ + typedef typename ei_packet_traits<Scalar>::type PacketScalar; + + ei_linspaced_op_impl(Scalar low, Scalar step) : + m_low(low), m_step(step), + m_lowPacket(ei_pset1(m_low)), m_stepPacket(ei_pset1(m_step)), m_interPacket(ei_plset<Scalar>(0)) {} + + EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int i) const + { return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Scalar>(i),m_interPacket))); } + + const Scalar m_low; + const Scalar m_step; + const PacketScalar m_lowPacket; + const PacketScalar m_stepPacket; + const PacketScalar m_interPacket; +}; + +// ----- Linspace functor ---------------------------------------------------------------- + +// Forward declaration (we default to random access which does not really give +// us a speed gain when using packet access but it allows to use the functor in +// nested expressions). +template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op; +template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> > +{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; }; +template <typename Scalar, bool RandomAccess> struct ei_linspaced_op +{ + typedef typename ei_packet_traits<Scalar>::type PacketScalar; + ei_linspaced_op(Scalar low, Scalar high, int num_steps) : impl(low, (high-low)/(num_steps-1)) {} + EIGEN_STRONG_INLINE const Scalar operator() (int i, int = 0) const { return impl(i); } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int i, int = 0) const { return impl.packetOp(i); } + // This proxy object handles the actual required temporaries, the different + // implementations (random vs. sequential access) as well as the piping + // correct piping to size 2/4 packet operations. + const ei_linspaced_op_impl<Scalar,RandomAccess> impl; +}; + // allow to add new functors and specializations of ei_functor_traits from outside Eigen. // this macro is really needed because ei_functor_traits must be specialized after it is declared but before it is used... #ifdef EIGEN_FUNCTORS_PLUGIN diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index ae1720eca..08981f89d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -157,6 +157,10 @@ ei_ploadu(const Scalar* from) { return *from; } template<typename Scalar> inline typename ei_packet_traits<Scalar>::type ei_pset1(const Scalar& a) { return a; } +/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ +template<typename Scalar> inline typename ei_packet_traits<Scalar>::type +ei_plset(const Scalar& a) { return a; } + /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ template<typename Scalar, typename Packet> inline void ei_pstore(Scalar* to, const Packet& from) { (*to) = from; } diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 1113792fd..143b39033 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -63,7 +63,7 @@ template<typename MatrixType> class Transpose public: typedef typename TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageType>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose) + EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose) inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {} diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index f65801137..a5a56f759 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -91,6 +91,10 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { r #endif template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); } +template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); } +template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); } + template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); } diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 01c1aeb2f..6ae103e66 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -224,6 +224,11 @@ namespace { EIGEN_UNUSED NoChange_t NoChange; } +struct Sequential_t {}; +namespace { + EIGEN_UNUSED Sequential_t Sequential; +} + struct Default_t {}; namespace { EIGEN_UNUSED Default_t Default; diff --git a/doc/snippets/DenseBase_LinSpaced.cpp b/doc/snippets/DenseBase_LinSpaced.cpp new file mode 100644 index 000000000..540709adc --- /dev/null +++ b/doc/snippets/DenseBase_LinSpaced.cpp @@ -0,0 +1,2 @@ +cout << VectorXi::LinSpaced(7,10,4) << endl; +cout << VectorXd::LinSpaced(0.0,1.0,5) << endl; diff --git a/doc/snippets/DenseBase_LinSpaced_seq.cpp b/doc/snippets/DenseBase_LinSpaced_seq.cpp new file mode 100644 index 000000000..73873c4e9 --- /dev/null +++ b/doc/snippets/DenseBase_LinSpaced_seq.cpp @@ -0,0 +1,2 @@ +cout << VectorXi::LinSpaced(Sequential,7,10,4).transpose() << endl; +cout << VectorXd::LinSpaced(Sequential,0.0,1.0,5).transpose() << endl; diff --git a/doc/snippets/DenseBase_setLinSpaced.cpp b/doc/snippets/DenseBase_setLinSpaced.cpp new file mode 100644 index 000000000..a8ea73407 --- /dev/null +++ b/doc/snippets/DenseBase_setLinSpaced.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setLinSpaced(0.5f,1.5f,5).transpose(); +cout << v << endl; diff --git a/test/nullary.cpp b/test/nullary.cpp index a7f6c60a7..240365529 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -46,6 +46,54 @@ bool equalsIdentity(const MatrixType& A) return offDiagOK && diagOK; } +template<typename VectorType> +void testVectorType(const VectorType& base) +{ + typedef typename ei_traits<VectorType>::Scalar Scalar; + Scalar low = ei_random(-500,500); + Scalar high = ei_random(-500,500); + if (low>high) std::swap(low,high); + const int size = base.size(); + const Scalar step = (high-low)/(size-1); + + // check whether the result yields what we expect it to do + VectorType m(base); + m.setLinSpaced(low,high,size); + + VectorType n(size); + for (int i=0; i<size; ++i) + n(i) = low+i*step; + + VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 ); + + // random access version + m = VectorType::LinSpaced(low,high,size); + VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 ); + + // These guys sometimes fail! This is not good. Any ideas how to fix them!? + //VERIFY( m(m.size()-1) == high ); + //VERIFY( m(0) == low ); + + // sequential access version + m = VectorType::LinSpaced(Sequential,low,high,size); + VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 ); + + // These guys sometimes fail! This is not good. Any ideas how to fix them!? + //VERIFY( m(m.size()-1) == high ); + //VERIFY( m(0) == low ); + + // check whether everything works with row and col major vectors + Matrix<Scalar,Dynamic,1> row_vector(size); + Matrix<Scalar,1,Dynamic> col_vector(size); + row_vector.setLinSpaced(low,high,size); + col_vector.setLinSpaced(low,high,size); + VERIFY( (row_vector-col_vector.transpose()).norm() < 1e-10 ); + + Matrix<Scalar,Dynamic,1> size_changer(size+50); + size_changer.setLinSpaced(low,high,size); + VERIFY( size_changer.size() == size ); +} + template<typename MatrixType> void testMatrixType(const MatrixType& m) { @@ -63,4 +111,10 @@ void test_nullary() CALL_SUBTEST_1( testMatrixType(Matrix2d()) ); CALL_SUBTEST_2( testMatrixType(MatrixXcf(50,50)) ); CALL_SUBTEST_3( testMatrixType(MatrixXf(5,7)) ); + CALL_SUBTEST_4( testVectorType(VectorXd(51)) ); + CALL_SUBTEST_5( testVectorType(VectorXd(41)) ); + CALL_SUBTEST_6( testVectorType(Vector3d()) ); + CALL_SUBTEST_7( testVectorType(VectorXf(51)) ); + CALL_SUBTEST_8( testVectorType(VectorXf(41)) ); + CALL_SUBTEST_9( testVectorType(Vector3f()) ); } |