diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2009-10-06 09:27:01 -0400 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2009-10-06 09:27:01 -0400 |
commit | 24e1d3266a9108f2b0bd348d1ebb09dc324b0eed (patch) | |
tree | 003a9aad956b368f8d996a686c5ca5b888c7bb5b | |
parent | 80ede36b2402e4f3015d8b99843fa6cdc124ca3c (diff) | |
parent | 4cf73660273fa8c1ea9db4e41bc576f93fd0ccc9 (diff) |
merge
-rw-r--r-- | Eigen/src/Core/StableNorm.h | 14 | ||||
-rw-r--r-- | Eigen/src/Core/products/GeneralMatrixVector.h | 35 | ||||
-rw-r--r-- | test/stable_norm.cpp | 15 | ||||
-rw-r--r-- | test/testsuite.cmake | 2 |
4 files changed, 44 insertions, 22 deletions
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index facab9dbd..b4a5d0bfd 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -108,21 +108,15 @@ MatrixBase<Derived>::blueNorm() const iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent rbig = std::numeric_limits<RealScalar>::max(); // largest floating-point number - // Check the basic machine-dependent constants. - if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) - || (it<=4 && ibeta <= 3 ) || it<2) - { - ei_assert(false && "the algorithm cannot be guaranteed on this computer"); - } iexp = -((1-iemin)/2); - b1 = RealScalar(std::pow(double(ibeta),iexp)); // lower boundary of midrange + b1 = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // lower boundary of midrange iexp = (iemax + 1 - it)/2; - b2 = RealScalar(std::pow(double(ibeta),iexp)); // upper boundary of midrange + b2 = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // upper boundary of midrange iexp = (2-iemin)/2; - s1m = RealScalar(std::pow(double(ibeta),iexp)); // scaling factor for lower range + s1m = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // scaling factor for lower range iexp = - ((iemax+it)/2); - s2m = RealScalar(std::pow(double(ibeta),iexp)); // scaling factor for upper range + s2m = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // scaling factor for upper range overfl = rbig*s2m; // overfow boundary for abig eps = RealScalar(std::pow(double(ibeta), 1-it)); diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 57875035a..a18e5ef1d 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -57,8 +57,7 @@ void ei_cache_friendly_product_colmajor_times_vector( if(ConjugateRhs) alpha = ei_conj(alpha); -// std::cerr << "prod " << size << " " << rhs.size() << "\n"; - + typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename ei_packet_traits<Scalar>::type Packet; const int PacketSize = sizeof(Packet)/sizeof(Scalar); @@ -69,9 +68,9 @@ void ei_cache_friendly_product_colmajor_times_vector( const int PeelAlignedMask = PacketSize*peels-1; // How many coeffs of the result do we have to skip to be aligned. - // Here we assume data are at least aligned on the base scalar type that is mandatory anyway. - const int alignedStart = ei_alignmentOffset(res,size); - const int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; + // Here we assume data are at least aligned on the base scalar type. + int alignedStart = ei_alignmentOffset(res,size); + int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const int alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; @@ -84,12 +83,18 @@ void ei_cache_friendly_product_colmajor_times_vector( // find how many columns do we have to skip to be aligned with the result (if possible) int skipColumns = 0; - if (PacketSize>1) + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(res)%sizeof(RealScalar)) ) + { + alignedSize = 0; + alignedStart = 0; + } + else if (PacketSize>1) { ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize); while (skipColumns<PacketSize && - alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize)) + alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize)) ++skipColumns; if (skipColumns==PacketSize) { @@ -263,6 +268,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( ei_conj_helper<ConjugateLhs,ConjugateRhs> cj; + typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename ei_packet_traits<Scalar>::type Packet; const int PacketSize = sizeof(Packet)/sizeof(Scalar); @@ -274,9 +280,10 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( const int size = rhsSize; // How many coeffs of the result do we have to skip to be aligned. - // Here we assume data are at least aligned on the base scalar type that is mandatory anyway. - const int alignedStart = ei_alignmentOffset(rhs, size); - const int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; + // Here we assume data are at least aligned on the base scalar type + // if that's not the case then vectorization is discarded, see below. + int alignedStart = ei_alignmentOffset(rhs, size); + int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0; const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart; const int alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0; @@ -289,7 +296,13 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector( // find how many rows do we have to skip to be aligned with rhs (if possible) int skipRows = 0; - if (PacketSize>1) + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(rhs)%sizeof(RealScalar)) ) + { + alignedSize = 0; + alignedStart = 0; + } + else if (PacketSize>1) { ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize); diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp index 726512ec0..6ce99477d 100644 --- a/test/stable_norm.cpp +++ b/test/stable_norm.cpp @@ -33,6 +33,21 @@ template<typename MatrixType> void stable_norm(const MatrixType& m) typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits<Scalar>::Real RealScalar; + // Check the basic machine-dependent constants. + { + int nbig, ibeta, it, iemin, iemax, iexp; + RealScalar abig, eps; + + ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers + it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa + iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent + iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent + + VERIFY( (!(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2)) + && "the stable norm algorithm cannot be guaranteed on this computer"); + } + + int rows = m.rows(); int cols = m.cols(); diff --git a/test/testsuite.cmake b/test/testsuite.cmake index 147b82d46..b223345b1 100644 --- a/test/testsuite.cmake +++ b/test/testsuite.cmake @@ -177,7 +177,7 @@ if(WIN32 AND NOT UNIX) SET (CTEST_INITIAL_CACHE " MAKECOMMAND:STRING=nmake -i CMAKE_MAKE_PROGRAM:FILEPATH=nmake - CMAKE_GENERATOR:INTERNAL=NMake Makefiles + CMAKE_GENERATOR:INTERNAL=NMake Makefiles CMAKE_BUILD_TYPE:STRING=Release BUILDNAME:STRING=${EIGEN_BUILD_STRING} SITE:STRING=${EIGEN_SITE} |