diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-01-14 13:26:58 -0800 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2019-01-14 13:26:58 -0800 |
commit | ec7fe83554d15dc3a2cabe9468263bcb09195b99 (patch) | |
tree | 249bfbb8ab98c6be11c541578510544f85e2932e | |
parent | 2c5843dbbbcb5b925e06d75f58cc3bc09f19c3bb (diff) | |
parent | 5a59452aaed01c275f0aad0c4cee86ce95292de6 (diff) |
Merge.
-rw-r--r-- | Eigen/src/Core/GenericPacketMath.h | 2 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/Complex.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/arch/AVX512/PacketMath.h | 4 | ||||
-rw-r--r-- | Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h | 123 | ||||
-rw-r--r-- | Eigen/src/Core/util/ConfigureVectorization.h | 7 | ||||
-rw-r--r-- | Eigen/src/Core/util/Macros.h | 43 | ||||
-rwxr-xr-x | Eigen/src/Core/util/Meta.h | 33 | ||||
-rw-r--r-- | Eigen/src/Geometry/Transform.h | 7 | ||||
-rw-r--r-- | Eigen/src/LU/FullPivLU.h | 10 | ||||
-rw-r--r-- | Eigen/src/LU/PartialPivLU.h | 2 | ||||
-rw-r--r-- | doc/CoeffwiseMathFunctionsTable.dox | 1 | ||||
-rwxr-xr-x | test/geo_transformations.cpp | 61 | ||||
-rw-r--r-- | test/lu.cpp | 4 | ||||
-rw-r--r-- | test/packetmath.cpp | 1 |
14 files changed, 246 insertions, 64 deletions
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index bb3275fe8..04a321b9f 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -216,7 +216,7 @@ pandnot(const Packet& a, const Packet& b) { return a & (~b); } /** \internal \returns ones */ template<typename Packet> EIGEN_DEVICE_FUNC inline Packet -ptrue(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;} +ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} template <typename RealScalar> EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) { diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index f2034a713..7bb2fd630 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -308,18 +308,18 @@ template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex< template<> EIGEN_DEVICE_FUNC inline Packet4cd pgather<std::complex<double>, Packet4cd>(const std::complex<double>* from, Index stride) { return Packet4cd(_mm512_insertf64x4(_mm512_castpd256_pd512( - _mm256_insertf128_pd(_mm256_castpd128_pd256(pload<Packet1cd>(from+0*stride).v), pload<Packet1cd>(from+1*stride).v,1)), - _mm256_insertf128_pd(_mm256_castpd128_pd256(pload<Packet1cd>(from+2*stride).v), pload<Packet1cd>(from+3*stride).v,1), 1)); + _mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+0*stride).v), ploadu<Packet1cd>(from+1*stride).v,1)), + _mm256_insertf128_pd(_mm256_castpd128_pd256(ploadu<Packet1cd>(from+2*stride).v), ploadu<Packet1cd>(from+3*stride).v,1), 1)); } template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet4cd>(std::complex<double>* to, const Packet4cd& from, Index stride) { __m512i fromi = _mm512_castpd_si512(from.v); double* tod = (double*)(void*)to; - _mm_store_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) ); - _mm_store_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) ); - _mm_store_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) ); - _mm_store_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) ); + _mm_storeu_pd(tod+0*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,0)) ); + _mm_storeu_pd(tod+2*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,1)) ); + _mm_storeu_pd(tod+4*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,2)) ); + _mm_storeu_pd(tod+6*stride, _mm_castsi128_pd(_mm512_extracti32x4_epi32(fromi,3)) ); } template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet4cd>(const Packet4cd& a) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 1164f24b1..4832f2a3b 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -57,7 +57,7 @@ template<> struct packet_traits<float> : default_packet_traits HasBlend = 0, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, -#if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG +#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) #ifdef EIGEN_VECTORIZE_AVX512DQ HasLog = 1, #endif @@ -77,7 +77,7 @@ template<> struct packet_traits<double> : default_packet_traits AlignedOnScalar = 1, size = 8, HasHalfPacket = 1, -#if EIGEN_GNUC_AT_LEAST(5, 3) +#if EIGEN_GNUC_AT_LEAST(5, 3) || (!EIGEN_COMP_GNUC_STRICT) HasSqrt = EIGEN_FAST_MATH, HasRsqrt = EIGEN_FAST_MATH, #endif diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 8c6e4f5c7..ce3f0fc68 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -3,7 +3,7 @@ // // Copyright (C) 2007 Julien Pommier // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) -// Copyright (C) 2009-2018 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2009-2019 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -253,15 +253,68 @@ Packet pexp_double(const Packet _x) return pmax(pldexp(x,fx), _x); } -/* The code is the rewriting of the cephes sinf/cosf functions. - Precision is excellent as long as x < 8192 (I did not bother to - take into account the special handling they have for greater values - -- it does not return garbage for arguments over 8192, though, but - the extra precision is missing). +// The following code is inspired by the following stack-overflow answer: +// https://stackoverflow.com/questions/30463616/payne-hanek-algorithm-implementation-in-c/30465751#30465751 +// It has been largely optimized: +// - By-pass calls to frexp. +// - Aligned loads of required 96 bits of 2/pi. This is accomplished by +// (1) balancing the mantissa and exponent to the required bits of 2/pi are +// aligned on 8-bits, and (2) replicating the storage of the bits of 2/pi. +// - Avoid a branch in rounding and extraction of the remaining fractional part. +// Overall, I measured a speed up higher than x2 on x86-64. +inline float trig_reduce_huge (float xf, int *quadrant) +{ + using Eigen::numext::int32_t; + using Eigen::numext::uint32_t; + using Eigen::numext::int64_t; + using Eigen::numext::uint64_t; - Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the - surprising but correct result. -*/ + const double pio2_62 = 3.4061215800865545e-19; // pi/2 * 2^-62 + const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt + + // 192 bits of 2/pi for Payne-Hanek reduction + // Bits are introduced by packet of 8 to enable aligned reads. + static const uint32_t two_over_pi [] = + { + 0x00000028, 0x000028be, 0x0028be60, 0x28be60db, + 0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a, + 0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4, + 0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770, + 0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566, + 0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410, + 0x10e41000, 0xe4100000 + }; + + uint32_t xi = numext::as_uint(xf); + // Below, -118 = -126 + 8. + // -126 is to get the exponent, + // +8 is to enable alignment of 2/pi's bits on 8 bits. + // This is possible because the fractional part of x as only 24 meaningful bits. + uint32_t e = (xi >> 23) - 118; + // Extract the mantissa and shift it to align it wrt the exponent + xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7); + + uint32_t i = e >> 3; + uint32_t twoopi_1 = two_over_pi[i-1]; + uint32_t twoopi_2 = two_over_pi[i+3]; + uint32_t twoopi_3 = two_over_pi[i+7]; + + // Compute x * 2/pi in 2.62-bit fixed-point format. + uint64_t p; + p = uint64_t(xi) * twoopi_3; + p = uint64_t(xi) * twoopi_2 + (p >> 32); + p = (uint64_t(xi * twoopi_1) << 32) + p; + + // Round to nearest: add 0.5 and extract integral part. + uint64_t q = (p + zero_dot_five) >> 62; + *quadrant = int(q); + // Now it remains to compute "r = x - q*pi/2" with high accuracy, + // since we have p=x/(pi/2) with high accuracy, we can more efficiently compute r as: + // r = (p-q)*pi/2, + // where the product can be be carried out with sufficient accuracy using double precision. + p -= q<<62; + return float(double(int64_t(p)) * pio2_62); +} template<bool ComputeSine,typename Packet> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS @@ -285,17 +338,6 @@ Packet psincos_float(const Packet& _x) PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24) y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi - // Compute the sign to apply to the polynomial. - // sin: sign = second_bit(y_int) xor signbit(_x) - // cos: sign = second_bit(y_int+1) - Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(pshiftleft<30>(y_int))) - : preinterpret<Packet>(pshiftleft<30>(padd(y_int,csti_1))); - sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit - - // Get the polynomial selection mask from the second bit of y_int - // We'll calculate both (sin and cos) polynomials and then select from the two. - Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int))); - // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4 // using "Extended precision modular arithmetic" #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) @@ -332,29 +374,36 @@ Packet psincos_float(const Packet& _x) // The coefficients are: 0xbfc90f80, 0xb7354480, 0x2e74b9ee #endif - // We use huge_vals as a temporary for abs(_x) to ensure huge_vals - // is fully initialized for the last pselect(). (prevent compiler warning) - Packet huge_vals = pabs(_x); - Packet huge_mask = pcmp_le(pset1<Packet>(huge_th),huge_vals); - - if(predux_any(huge_mask)) + if(predux_any(pcmp_le(pset1<Packet>(huge_th),pabs(_x)))) { const int PacketSize = unpacket_traits<Packet>::size; - #if EIGEN_HAS_CXX11 - alignas(Packet) float vals[PacketSize]; - #else EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float vals[PacketSize]; - #endif - pstoreu(vals, _x); - for(int k=0; k<PacketSize;++k) { + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float x_cpy[PacketSize]; + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) int y_int2[PacketSize]; + pstoreu(vals, pabs(_x)); + pstoreu(x_cpy, x); + pstoreu(y_int2, y_int); + for(int k=0; k<PacketSize;++k) + { float val = vals[k]; - if(numext::abs(val)>=huge_th) { - vals[k] = ComputeSine ? std::sin(val) : std::cos(val); - } + if(val>=huge_th && (numext::isfinite)(val)) + x_cpy[k] = trig_reduce_huge(val,&y_int2[k]); } - huge_vals = ploadu<Packet>(vals); + x = ploadu<Packet>(x_cpy); + y_int = ploadu<PacketI>(y_int2); } + // Compute the sign to apply to the polynomial. + // sin: sign = second_bit(y_int) xor signbit(_x) + // cos: sign = second_bit(y_int+1) + Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(pshiftleft<30>(y_int))) + : preinterpret<Packet>(pshiftleft<30>(padd(y_int,csti_1))); + sign_bit = pand(sign_bit, cst_sign_mask); // clear all but left most bit + + // Get the polynomial selection mask from the second bit of y_int + // We'll calculate both (sin and cos) polynomials and then select from the two. + Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int))); + Packet x2 = pmul(x,x); // Evaluate the cos(x) polynomial. (-Pi/4 <= x <= Pi/4) @@ -383,7 +432,7 @@ Packet psincos_float(const Packet& _x) : pselect(poly_mask,y1,y2); // Update the sign and filter huge inputs - return pselect(huge_mask, huge_vals, pxor(y, sign_bit)); + return pxor(y, sign_bit); } template<typename Packet> diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h index 121476394..b4d423cb0 100644 --- a/Eigen/src/Core/util/ConfigureVectorization.h +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -29,10 +29,15 @@ * * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link * vectorized and non-vectorized code. + * + * FIXME: this code can be cleaned up once we switch to proper C++11 only. */ #if (defined EIGEN_CUDACC) #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) #define EIGEN_ALIGNOF(x) __alignof(x) +#elif EIGEN_HAS_ALIGNAS + #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n) + #define EIGEN_ALIGNOF(x) alignof(x) #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #define EIGEN_ALIGNOF(x) __alignof(x) @@ -44,7 +49,7 @@ #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #define EIGEN_ALIGNOF(x) __alignof(x) #else - #error Please tell me what is the equivalent of __attribute__((aligned(n))) and __alignof(x) for your compiler + #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler #endif // If the user explicitly disable vectorization, then we also disable alignment diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index c7dba1fc4..982b98b50 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -129,16 +129,21 @@ #define EIGEN_COMP_MSVC_STRICT 0 #endif -/// \internal EIGEN_COMP_IBM set to 1 if the compiler is IBM XL C++ -#if defined(__IBMCPP__) || defined(__xlc__) - #define EIGEN_COMP_IBM 1 +/// \internal EIGEN_COMP_IBM set to xlc version if the compiler is IBM XL C++ +// XLC version +// 3.1 0x0301 +// 4.5 0x0405 +// 5.0 0x0500 +// 12.1 0x0C01 +#if defined(__IBMCPP__) || defined(__xlc__) || defined(__ibmxl__) + #define EIGEN_COMP_IBM __xlC__ #else #define EIGEN_COMP_IBM 0 #endif -/// \internal EIGEN_COMP_PGI set to 1 if the compiler is Portland Group Compiler +/// \internal EIGEN_COMP_PGI set to PGI version if the compiler is Portland Group Compiler #if defined(__PGI) - #define EIGEN_COMP_PGI 1 + #define EIGEN_COMP_PGI (__PGIC__*100+__PGIC_MINOR__) #else #define EIGEN_COMP_PGI 0 #endif @@ -347,9 +352,17 @@ #define EIGEN_OS_WIN_STRICT 0 #endif -/// \internal EIGEN_OS_SUN set to 1 if the OS is SUN +/// \internal EIGEN_OS_SUN set to __SUNPRO_C if the OS is SUN +// compiler solaris __SUNPRO_C +// version studio +// 5.7 10 0x570 +// 5.8 11 0x580 +// 5.9 12 0x590 +// 5.10 12.1 0x5100 +// 5.11 12.2 0x5110 +// 5.12 12.3 0x5120 #if (defined(sun) || defined(__sun)) && !(defined(__SVR4) || defined(__svr4__)) - #define EIGEN_OS_SUN 1 + #define EIGEN_OS_SUN __SUNPRO_C #else #define EIGEN_OS_SUN 0 #endif @@ -546,6 +559,22 @@ #endif #endif +#ifndef EIGEN_HAS_ALIGNAS +#if EIGEN_MAX_CPP_VER>=11 && EIGEN_HAS_CXX11 && \ + ( __has_feature(cxx_alignas) \ + || EIGEN_HAS_CXX14 \ + || (EIGEN_COMP_MSVC >= 1800) \ + || (EIGEN_GNUC_AT_LEAST(4,8)) \ + || (EIGEN_COMP_CLANG>=305) \ + || (EIGEN_COMP_ICC>=1500) \ + || (EIGEN_COMP_PGI>=1500) \ + || (EIGEN_COMP_SUN>=0x5130)) +#define EIGEN_HAS_ALIGNAS 1 +#else +#define EIGEN_HAS_ALIGNAS 0 +#endif +#endif + // Does the compiler support type_traits? // - full support of type traits was added only to GCC 5.1.0. // - 20150626 corresponds to the last release of 4.x libstdc++ diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 1415b3fc1..8fcb18a94 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -636,8 +636,41 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); } #endif +/** \internal extract the bits of the float \a x */ +inline unsigned int as_uint(float x) +{ + unsigned int ret; + std::memcpy(&ret, &x, sizeof(float)); + return ret; +} + } // end namespace numext } // end namespace Eigen +// Define portable (u)int{32,64} types +#if EIGEN_HAS_CXX11 +#include <cstdint> +namespace Eigen { +namespace numext { +typedef std::uint32_t uint32_t; +typedef std::int32_t int32_t; +typedef std::uint64_t uint64_t; +typedef std::int64_t int64_t; +} +} +#else +// Without c++11, all compilers able to compile Eigen also +// provides the C99 stdint.h header file. +#include <stdint.h> +namespace Eigen { +namespace numext { +typedef ::uint32_t uint32_t; +typedef ::int32_t int32_t; +typedef ::uint64_t uint64_t; +typedef ::int64_t int64_t; +} +} +#endif + #endif // EIGEN_META_H diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 75991aaed..3670767aa 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -97,6 +97,9 @@ template<int Mode> struct transform_make_affine; * - #AffineCompact: the transformation is stored as a (Dim)x(Dim+1) matrix. * - #Projective: the transformation is stored as a (Dim+1)^2 matrix * without any assumption. + * - #Isometry: same as #Affine with the additional assumption that + * the linear part represents a rotation. This assumption is exploited + * to speed up some functions such as inverse() and rotation(). * \tparam _Options has the same meaning as in class Matrix. It allows to specify DontAlign and/or RowMajor. * These Options are passed directly to the underlying matrix type. * @@ -252,11 +255,11 @@ protected: public: /** Default constructor without initialization of the meaningful coefficients. - * If Mode==Affine, then the last row is set to [0 ... 0 1] */ + * If Mode==Affine or Mode==Isometry, then the last row is set to [0 ... 0 1] */ EIGEN_DEVICE_FUNC inline Transform() { check_template_params(); - internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix); + internal::transform_make_affine<(int(Mode)==Affine || int(Mode)==Isometry) ? Affine : AffineCompact>::run(m_matrix); } EIGEN_DEVICE_FUNC inline Transform(const Transform& other) diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h index 344ec8926..b4f4bc6ee 100644 --- a/Eigen/src/LU/FullPivLU.h +++ b/Eigen/src/LU/FullPivLU.h @@ -18,6 +18,7 @@ template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> > { typedef MatrixXpr XprKind; typedef SolverStorage StorageKind; + typedef int StorageIndex; enum { Flags = 0 }; }; @@ -64,7 +65,6 @@ template<typename _MatrixType> class FullPivLU typedef SolverBase<FullPivLU> Base; EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU) - // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime @@ -529,8 +529,8 @@ void FullPivLU<MatrixType>::computeInPlace() m_nonzero_pivots = k; for(Index i = k; i < size; ++i) { - m_rowsTranspositions.coeffRef(i) = i; - m_colsTranspositions.coeffRef(i) = i; + m_rowsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i); + m_colsTranspositions.coeffRef(i) = internal::convert_index<StorageIndex>(i); } break; } @@ -541,8 +541,8 @@ void FullPivLU<MatrixType>::computeInPlace() // Now that we've found the pivot, we need to apply the row/col swaps to // bring it to the location (k,k). - m_rowsTranspositions.coeffRef(k) = row_of_biggest_in_corner; - m_colsTranspositions.coeffRef(k) = col_of_biggest_in_corner; + m_rowsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(row_of_biggest_in_corner); + m_colsTranspositions.coeffRef(k) = internal::convert_index<StorageIndex>(col_of_biggest_in_corner); if(k != row_of_biggest_in_corner) { m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner)); ++number_of_transpositions; diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index bfcd2c95b..ecc0e748f 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -19,6 +19,7 @@ template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> > { typedef MatrixXpr XprKind; typedef SolverStorage StorageKind; + typedef int StorageIndex; typedef traits<_MatrixType> BaseTraits; enum { Flags = BaseTraits::Flags & RowMajorBit, @@ -80,7 +81,6 @@ template<typename _MatrixType> class PartialPivLU typedef _MatrixType MatrixType; typedef SolverBase<PartialPivLU> Base; EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU) - // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime diff --git a/doc/CoeffwiseMathFunctionsTable.dox b/doc/CoeffwiseMathFunctionsTable.dox index e14eaf615..080e056e1 100644 --- a/doc/CoeffwiseMathFunctionsTable.dox +++ b/doc/CoeffwiseMathFunctionsTable.dox @@ -321,7 +321,6 @@ This also means that, unless specified, if the function \c std::foo is available <td></td> </tr> <tr> -<tr> <td class="code"> \anchor cwisetable_asinh a.\link ArrayBase::asinh asinh\endlink(); \n diff --git a/test/geo_transformations.cpp b/test/geo_transformations.cpp index bf920696b..25f1d9aa0 100755 --- a/test/geo_transformations.cpp +++ b/test/geo_transformations.cpp @@ -612,6 +612,62 @@ template<typename Scalar, int Dim, int Options> void transform_products() VERIFY_IS_APPROX((ac*p).matrix(), a_m*p_m); } +template<typename Scalar, int Mode, int Options> void transformations_no_scale() +{ + /* this test covers the following files: + Cross.h Quaternion.h, Transform.h + */ + typedef Matrix<Scalar,3,1> Vector3; + typedef Matrix<Scalar,4,1> Vector4; + typedef Quaternion<Scalar> Quaternionx; + typedef AngleAxis<Scalar> AngleAxisx; + typedef Transform<Scalar,3,Mode,Options> Transform3; + typedef Translation<Scalar,3> Translation3; + typedef Matrix<Scalar,4,4> Matrix4; + + Vector3 v0 = Vector3::Random(), + v1 = Vector3::Random(); + + Transform3 t0, t1, t2; + + Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + + Quaternionx q1, q2; + + q1 = AngleAxisx(a, v0.normalized()); + + t0 = Transform3::Identity(); + VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); + + t0.setIdentity(); + t1.setIdentity(); + v1 = Vector3::Ones(); + t0.linear() = q1.toRotationMatrix(); + t0.pretranslate(v0); + t1.linear() = q1.conjugate().toRotationMatrix(); + t1.translate(-v0); + + VERIFY((t0 * t1).matrix().isIdentity(test_precision<Scalar>())); + + t1.fromPositionOrientationScale(v0, q1, v1); + VERIFY_IS_APPROX(t1.matrix(), t0.matrix()); + VERIFY_IS_APPROX(t1*v1, t0*v1); + + // translation * vector + t0.setIdentity(); + t0.translate(v0); + VERIFY_IS_APPROX((t0 * v1).template head<3>(), Translation3(v0) * v1); + + // Conversion to matrix. + Transform3 t3; + t3.linear() = q1.toRotationMatrix(); + t3.translation() = v1; + Matrix4 m3 = t3.matrix(); + VERIFY((m3 * m3.inverse()).isIdentity(test_precision<Scalar>())); + // Verify implicit last row is initialized. + VERIFY_IS_APPROX(Vector4(m3.row(3)), Vector4(0.0, 0.0, 0.0, 1.0)); +} + EIGEN_DECLARE_TEST(geo_transformations) { for(int i = 0; i < g_repeat; i++) { @@ -625,7 +681,7 @@ EIGEN_DECLARE_TEST(geo_transformations) CALL_SUBTEST_3(( transformations<double,Projective,AutoAlign>() )); CALL_SUBTEST_3(( transformations<double,Projective,DontAlign>() )); CALL_SUBTEST_3(( transform_alignment<double>() )); - + CALL_SUBTEST_4(( transformations<float,Affine,RowMajor|AutoAlign>() )); CALL_SUBTEST_4(( non_projective_only<float,Affine,RowMajor>() )); @@ -641,5 +697,8 @@ EIGEN_DECLARE_TEST(geo_transformations) CALL_SUBTEST_8(( transform_associativity<double,2,ColMajor>(Rotation2D<double>(internal::random<double>()*double(EIGEN_PI))) )); CALL_SUBTEST_8(( transform_associativity<double,3,ColMajor>(Quaterniond::UnitRandom()) )); + + CALL_SUBTEST_9(( transformations_no_scale<double,Affine,AutoAlign>() )); + CALL_SUBTEST_9(( transformations_no_scale<double,Isometry,AutoAlign>() )); } } diff --git a/test/lu.cpp b/test/lu.cpp index 24bea784a..46fd60555 100644 --- a/test/lu.cpp +++ b/test/lu.cpp @@ -18,6 +18,8 @@ typename MatrixType::RealScalar matrix_l1_norm(const MatrixType& m) { template<typename MatrixType> void lu_non_invertible() { + STATIC_CHECK(( internal::is_same<typename FullPivLU<MatrixType>::StorageIndex,int>::value )); + typedef typename MatrixType::RealScalar RealScalar; /* this test covers the following files: LU.h @@ -191,6 +193,8 @@ template<typename MatrixType> void lu_partial_piv() m1.setRandom(); PartialPivLU<MatrixType> plu(m1); + STATIC_CHECK(( internal::is_same<typename PartialPivLU<MatrixType>::StorageIndex,int>::value )); + VERIFY_IS_APPROX(m1, plu.reconstructedMatrix()); m3 = MatrixType::Random(size,size); diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 04f93108f..4906f6eb0 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -568,6 +568,7 @@ template<typename Scalar,typename Packet> void packetmath_real() h.store(data2, internal::plog(h.load(data1))); VERIFY((numext::isinf)(data2[0])); } + if(PacketTraits::HasSqrt) { packet_helper<PacketTraits::HasSqrt,Packet> h; data1[0] = Scalar(-1.0f); |