// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob // // Eigen is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 3 of the License, or (at your option) any later version. // // Alternatively, you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 2 of // the License, or (at your option) any later version. // // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the // GNU General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License and a copy of the GNU General Public License along with // Eigen. If not, see . #ifndef EIGEN_DOT_H #define EIGEN_DOT_H /*************************************************************************** * Part 1 : the logic deciding a strategy for vectorization and unrolling ***************************************************************************/ template struct ei_dot_traits { public: enum { Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit) && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit) ? LinearVectorization : NoVectorization }; private: typedef typename Derived1::Scalar Scalar; enum { PacketSize = ei_packet_traits::size, Cost = Derived1::SizeAtCompileTime * (Derived1::CoeffReadCost + Derived2::CoeffReadCost + NumTraits::MulCost) + (Derived1::SizeAtCompileTime-1) * NumTraits::AddCost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Vectorization) == int(NoVectorization) ? 1 : int(PacketSize)) }; public: enum { Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; }; /*************************************************************************** * Part 2 : unrollers ***************************************************************************/ /*** no vectorization ***/ template struct ei_dot_novec_unroller { enum { HalfLength = Length/2 }; typedef typename Derived1::Scalar Scalar; inline static Scalar run(const Derived1& v1, const Derived2& v2) { return ei_dot_novec_unroller::run(v1, v2) + ei_dot_novec_unroller::run(v1, v2); } }; template struct ei_dot_novec_unroller { typedef typename Derived1::Scalar Scalar; inline static Scalar run(const Derived1& v1, const Derived2& v2) { return v1.coeff(Start) * ei_conj(v2.coeff(Start)); } }; /*** vectorization ***/ template::size)> struct ei_dot_vec_unroller { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits::type PacketScalar; enum { row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0 }; inline static PacketScalar run(const Derived1& v1, const Derived2& v2) { return ei_pmadd( v1.template packet(row1, col1), v2.template packet(row2, col2), ei_dot_vec_unroller::size, Stop>::run(v1, v2) ); } }; template struct ei_dot_vec_unroller { enum { row1 = Derived1::RowsAtCompileTime == 1 ? 0 : Index, col1 = Derived1::RowsAtCompileTime == 1 ? Index : 0, row2 = Derived2::RowsAtCompileTime == 1 ? 0 : Index, col2 = Derived2::RowsAtCompileTime == 1 ? Index : 0, alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned }; typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits::type PacketScalar; inline static PacketScalar run(const Derived1& v1, const Derived2& v2) { return ei_pmul(v1.template packet(row1, col1), v2.template packet(row2, col2)); } }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ template::Vectorization, int Unrolling = ei_dot_traits::Unrolling > struct ei_dot_impl; template struct ei_dot_impl { typedef typename Derived1::Scalar Scalar; static Scalar run(const Derived1& v1, const Derived2& v2) { ei_assert(v1.size()>0 && "you are using a non initialized vector"); Scalar res; res = v1.coeff(0) * ei_conj(v2.coeff(0)); for(int i = 1; i < v1.size(); ++i) res += v1.coeff(i) * ei_conj(v2.coeff(i)); return res; } }; template struct ei_dot_impl : public ei_dot_novec_unroller {}; template struct ei_dot_impl { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits::type PacketScalar; static Scalar run(const Derived1& v1, const Derived2& v2) { const int size = v1.size(); const int packetSize = ei_packet_traits::size; const int alignedSize = (size/packetSize)*packetSize; enum { alignment1 = (Derived1::Flags & AlignedBit) ? Aligned : Unaligned, alignment2 = (Derived2::Flags & AlignedBit) ? Aligned : Unaligned }; Scalar res; // do the vectorizable part of the sum if(size >= packetSize) { PacketScalar packet_res = ei_pmul( v1.template packet(0), v2.template packet(0) ); for(int index = packetSize; index(index), v2.template packet(index), packet_res ); } res = ei_predux(packet_res); // now we must do the rest without vectorization. if(alignedSize == size) return res; } else // too small to vectorize anything. // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. { res = Scalar(0); } // do the remainder of the vector for(int index = alignedSize; index < size; ++index) { res += v1.coeff(index) * v2.coeff(index); } return res; } }; template struct ei_dot_impl { typedef typename Derived1::Scalar Scalar; typedef typename ei_packet_traits::type PacketScalar; enum { PacketSize = ei_packet_traits::size, Size = Derived1::SizeAtCompileTime, VectorizationSize = (Size / PacketSize) * PacketSize }; static Scalar run(const Derived1& v1, const Derived2& v2) { Scalar res = ei_predux(ei_dot_vec_unroller::run(v1, v2)); if (VectorizationSize != Size) res += ei_dot_novec_unroller::run(v1, v2); return res; } }; /*************************************************************************** * Part 4 : implementation of MatrixBase methods ***************************************************************************/ /** \returns the dot product of *this with other. * * \only_for_vectors * * \note If the scalar type is complex numbers, then this function returns the hermitian * (sesquilinear) dot product, linear in the first variable and conjugate-linear in the * second variable. * * \sa squaredNorm(), norm() */ template template typename ei_traits::Scalar MatrixBase::dot(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT((ei_is_same_type::ret), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) ei_assert(size() == other.size()); // dot() must honor EvalBeforeNestingBit (eg: v.dot(M*v) ) typedef typename ei_cleantype::type ThisNested; typedef typename ei_cleantype::type OtherNested; return ei_dot_impl::run(derived(), other.derived()); } /** \returns the squared \em l2 norm of *this, i.e., for vectors, the dot product of *this with itself. * * \sa dot(), norm() */ template inline typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const { return ei_real((*this).cwise().abs2().sum()); } /** \returns the \em l2 norm of *this, i.e., for vectors, the square root of the dot product of *this with itself. * * \sa dot(), squaredNorm() */ template inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { return ei_sqrt(squaredNorm()); } /** \returns the \em l2 norm of \c *this using a numerically more stable * algorithm. * * \sa norm(), dot(), squaredNorm(), blueNorm() */ template inline typename NumTraits::Scalar>::Real MatrixBase::stableNorm() const { return this->cwise().abs().redux(ei_scalar_hypot_op()); } /** \internal Computes ibeta^iexp by binary expansion of iexp, * exact if ibeta is the machine base */ template inline T bexp(int ibeta, int iexp) { T tbeta = T(ibeta); T res = 1.0; int n = iexp; if (n<0) { n = - n; tbeta = 1.0/tbeta; } for(;;) { if ((n % 2)==0) res = res * tbeta; n = n/2; if (n==0) return res; tbeta = tbeta*tbeta; } return res; } /** \returns the \em l2 norm of \c *this using the Blue's algorithm. * A Portable Fortran Program to Find the Euclidean Norm of a Vector, * ACM TOMS, Vol 4, Issue 1, 1978. * * \sa norm(), dot(), squaredNorm(), stableNorm() */ template inline typename NumTraits::Scalar>::Real MatrixBase::blueNorm() const { static int nmax; static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr; int n; Scalar ax, abig, amed, asml; if(nmax <= 0) { int nbig, ibeta, it, iemin, iemax, iexp; Scalar abig, eps; // This program calculates the machine-dependent constants // bl, b2, slm, s2m, relerr overfl, nmax // from the "basic" machine-dependent numbers // nbig, ibeta, it, iemin, iemax, rbig. // The following define the basic machine-dependent constants. // For portability, the PORT subprograms "ilmaeh" and "rlmach" // are used. For any specific computer, each of the assignment // statements can be replaced nbig = std::numeric_limits::max(); // largest integer ibeta = NumTraits::Base; // base for floating-point numbers it = NumTraits::Mantissa; // number of base-beta digits in mantissa iemin = std::numeric_limits::min_exponent; // minimum exponent iemax = std::numeric_limits::max_exponent; // maximum exponent rbig = std::numeric_limits::max(); // largest floating-point number // Check the basic machine-dependent constants. if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2) { ei_assert(false && "the algorithm cannot be guaranteed on this computer"); } iexp = -((1-iemin)/2); b1 = bexp(ibeta, iexp); // lower boundary of midrange iexp = (iemax + 1 - it)/2; b2 = bexp(ibeta,iexp); // upper boundary of midrange iexp = (2-iemin)/2; s1m = bexp(ibeta,iexp); // scaling factor for lower range iexp = - ((iemax+it)/2); s2m = bexp(ibeta,iexp); // scaling factor for upper range overfl = rbig*s2m; // overfow boundary for abig eps = bexp(ibeta, 1-it); relerr = ei_sqrt(eps); // tolerance for neglecting asml abig = 1.0/eps - 1.0; if (Scalar(nbig)>abig) nmax = abig; // largest safe n else nmax = nbig; } n = size(); if(n==0) return 0; asml = Scalar(0); amed = Scalar(0); abig = Scalar(0); for(int j=0; j b2) abig += ei_abs2(ax*s2m); else if(ax < b1) asml += ei_abs2(ax*s1m); else amed += ei_abs2(ax); } if(abig > Scalar(0)) { abig = ei_sqrt(abig); if(abig > overfl) { ei_assert(false && "overflow"); return rbig; } if(amed > Scalar(0)) { abig = abig/s2m; amed = ei_sqrt(amed); } else { return abig/s2m; } } else if(asml > Scalar(0)) { if (amed > Scalar(0)) { abig = ei_sqrt(amed); amed = ei_sqrt(asml) / s1m; } else { return ei_sqrt(asml)/s1m; } } else { return ei_sqrt(amed); } asml = std::min(abig, amed); abig = std::max(abig, amed); if(asml <= abig*relerr) return abig; else return abig * ei_sqrt(Scalar(1) + ei_abs2(asml/abig)); } /** \returns an expression of the quotient of *this by its own norm. * * \only_for_vectors * * \sa norm(), normalize() */ template inline const typename MatrixBase::PlainMatrixType MatrixBase::normalized() const { typedef typename ei_nested::type Nested; typedef typename ei_unref::type _Nested; _Nested n(derived()); return n / n.norm(); } /** Normalizes the vector, i.e. divides it by its own norm. * * \only_for_vectors * * \sa norm(), normalized() */ template inline void MatrixBase::normalize() { *this /= norm(); } /** \returns true if *this is approximately orthogonal to \a other, * within the precision given by \a prec. * * Example: \include MatrixBase_isOrthogonal.cpp * Output: \verbinclude MatrixBase_isOrthogonal.out */ template template bool MatrixBase::isOrthogonal (const MatrixBase& other, RealScalar prec) const { typename ei_nested::type nested(derived()); typename ei_nested::type otherNested(other.derived()); return ei_abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); } /** \returns true if *this is approximately an unitary matrix, * within the precision given by \a prec. In the case where the \a Scalar * type is real numbers, a unitary matrix is an orthogonal matrix, whence the name. * * \note This can be used to check whether a family of vectors forms an orthonormal basis. * Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an * orthonormal basis. * * Example: \include MatrixBase_isUnitary.cpp * Output: \verbinclude MatrixBase_isUnitary.out */ template bool MatrixBase::isUnitary(RealScalar prec) const { typename Derived::Nested nested(derived()); for(int i = 0; i < cols(); ++i) { if(!ei_isApprox(nested.col(i).squaredNorm(), static_cast(1), prec)) return false; for(int j = 0; j < i; ++j) if(!ei_isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast(1), prec)) return false; } return true; } #endif // EIGEN_DOT_H