aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-10-06 09:27:01 -0400
committerGravatar Benoit Jacob <jacob.benoit.1@gmail.com>2009-10-06 09:27:01 -0400
commit24e1d3266a9108f2b0bd348d1ebb09dc324b0eed (patch)
tree003a9aad956b368f8d996a686c5ca5b888c7bb5b
parent80ede36b2402e4f3015d8b99843fa6cdc124ca3c (diff)
parent4cf73660273fa8c1ea9db4e41bc576f93fd0ccc9 (diff)
merge
-rw-r--r--Eigen/src/Core/StableNorm.h14
-rw-r--r--Eigen/src/Core/products/GeneralMatrixVector.h35
-rw-r--r--test/stable_norm.cpp15
-rw-r--r--test/testsuite.cmake2
4 files changed, 44 insertions, 22 deletions
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index facab9dbd..b4a5d0bfd 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -108,21 +108,15 @@ MatrixBase<Derived>::blueNorm() const
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
rbig = std::numeric_limits<RealScalar>::max(); // largest floating-point number
- // Check the basic machine-dependent constants.
- if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
- || (it<=4 && ibeta <= 3 ) || it<2)
- {
- ei_assert(false && "the algorithm cannot be guaranteed on this computer");
- }
iexp = -((1-iemin)/2);
- b1 = RealScalar(std::pow(double(ibeta),iexp)); // lower boundary of midrange
+ b1 = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
- b2 = RealScalar(std::pow(double(ibeta),iexp)); // upper boundary of midrange
+ b2 = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // upper boundary of midrange
iexp = (2-iemin)/2;
- s1m = RealScalar(std::pow(double(ibeta),iexp)); // scaling factor for lower range
+ s1m = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // scaling factor for lower range
iexp = - ((iemax+it)/2);
- s2m = RealScalar(std::pow(double(ibeta),iexp)); // scaling factor for upper range
+ s2m = RealScalar(std::pow<RealScalar>(ibeta,iexp)); // scaling factor for upper range
overfl = rbig*s2m; // overfow boundary for abig
eps = RealScalar(std::pow(double(ibeta), 1-it));
diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h
index 57875035a..a18e5ef1d 100644
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -57,8 +57,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
if(ConjugateRhs)
alpha = ei_conj(alpha);
-// std::cerr << "prod " << size << " " << rhs.size() << "\n";
-
+ typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const int PacketSize = sizeof(Packet)/sizeof(Scalar);
@@ -69,9 +68,9 @@ void ei_cache_friendly_product_colmajor_times_vector(
const int PeelAlignedMask = PacketSize*peels-1;
// How many coeffs of the result do we have to skip to be aligned.
- // Here we assume data are at least aligned on the base scalar type that is mandatory anyway.
- const int alignedStart = ei_alignmentOffset(res,size);
- const int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
+ // Here we assume data are at least aligned on the base scalar type.
+ int alignedStart = ei_alignmentOffset(res,size);
+ int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
const int alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
@@ -84,12 +83,18 @@ void ei_cache_friendly_product_colmajor_times_vector(
// find how many columns do we have to skip to be aligned with the result (if possible)
int skipColumns = 0;
- if (PacketSize>1)
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+ if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(res)%sizeof(RealScalar)) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ }
+ else if (PacketSize>1)
{
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
while (skipColumns<PacketSize &&
- alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize))
+ alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%PacketSize))
++skipColumns;
if (skipColumns==PacketSize)
{
@@ -263,6 +268,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
ei_conj_helper<ConjugateLhs,ConjugateRhs> cj;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename ei_packet_traits<Scalar>::type Packet;
const int PacketSize = sizeof(Packet)/sizeof(Scalar);
@@ -274,9 +280,10 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
const int size = rhsSize;
// How many coeffs of the result do we have to skip to be aligned.
- // Here we assume data are at least aligned on the base scalar type that is mandatory anyway.
- const int alignedStart = ei_alignmentOffset(rhs, size);
- const int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
+ // Here we assume data are at least aligned on the base scalar type
+ // if that's not the case then vectorization is discarded, see below.
+ int alignedStart = ei_alignmentOffset(rhs, size);
+ int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
const int alignmentStep = PacketSize>1 ? (PacketSize - lhsStride % PacketSize) & PacketAlignedMask : 0;
@@ -289,7 +296,13 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
// find how many rows do we have to skip to be aligned with rhs (if possible)
int skipRows = 0;
- if (PacketSize>1)
+ // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+ if( (size_t(lhs)%sizeof(RealScalar)) || (size_t(rhs)%sizeof(RealScalar)) )
+ {
+ alignedSize = 0;
+ alignedStart = 0;
+ }
+ else if (PacketSize>1)
{
ei_internal_assert(size_t(lhs+lhsAlignmentOffset)%sizeof(Packet)==0 || size<PacketSize);
diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp
index 726512ec0..6ce99477d 100644
--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@@ -33,6 +33,21 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar;
+ // Check the basic machine-dependent constants.
+ {
+ int nbig, ibeta, it, iemin, iemax, iexp;
+ RealScalar abig, eps;
+
+ ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
+ it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
+ iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
+ iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
+
+ VERIFY( (!(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2))
+ && "the stable norm algorithm cannot be guaranteed on this computer");
+ }
+
+
int rows = m.rows();
int cols = m.cols();
diff --git a/test/testsuite.cmake b/test/testsuite.cmake
index 147b82d46..b223345b1 100644
--- a/test/testsuite.cmake
+++ b/test/testsuite.cmake
@@ -177,7 +177,7 @@ if(WIN32 AND NOT UNIX)
SET (CTEST_INITIAL_CACHE "
MAKECOMMAND:STRING=nmake -i
CMAKE_MAKE_PROGRAM:FILEPATH=nmake
- CMAKE_GENERATOR:INTERNAL=NMake Makefiles
+ CMAKE_GENERATOR:INTERNAL=NMake Makefiles
CMAKE_BUILD_TYPE:STRING=Release
BUILDNAME:STRING=${EIGEN_BUILD_STRING}
SITE:STRING=${EIGEN_SITE}