From 0d2a532fc3b25199af03106b6d4ade0f92a30dfc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 13:16:08 -0700 Subject: Created the new EIGEN_TEST_CUDA_CLANG option to compile the CUDA tests using clang instead of nvcc --- cmake/EigenTesting.cmake | 27 +++++++++++++++++++++++---- test/CMakeLists.txt | 11 ++++++++++- unsupported/test/CMakeLists.txt | 4 ++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index d5e3972b5..6f3661921 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -19,10 +19,25 @@ macro(ei_add_test_internal testname testname_with_suffix) endif() if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) - if (${ARGC} GREATER 2) - cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + if(EIGEN_TEST_CUDA_CLANG) + set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX) + if(CUDA_64_BIT_DEVICE_CODE) + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + else() + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") + endif() + if (${ARGC} GREATER 2) + add_executable(${targetname} ${filename}) + else() + add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + endif() + target_link_libraries(${targetname} "cudart_static" "cuda" "dl" "rt" "pthread") else() - cuda_add_executable(${targetname} ${filename}) + if (${ARGC} GREATER 2) + cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + else() + cuda_add_executable(${targetname} ${filename}) + endif() endif() else() add_executable(${targetname} ${filename}) @@ -316,7 +331,11 @@ macro(ei_testing_print_summary) endif() if(EIGEN_TEST_CUDA) - message(STATUS "CUDA: ON") + if(EIGEN_TEST_CUDA_CLANG) + message(STATUS "CUDA: ON (using clang)") + else() + message(STATUS "CUDA: ON (using nvcc)") + endif() else() message(STATUS "CUDA: OFF") endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 841c4572b..7bed6a45c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -327,8 +327,14 @@ endif() # CUDA unit tests option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF) +option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF) + +if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang") + message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.") +endif() + if(EIGEN_TEST_CUDA) - + find_package(CUDA 5.0) if(CUDA_FOUND) @@ -336,6 +342,9 @@ if(CUDA_FOUND) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") + endif() cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR}) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index c6a92fe73..b1931d80a 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -190,6 +190,10 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin /usr/bin/clang" CACHE STRING "nvcc flags" FORCE) endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}") + endif() + set(CUDA_NVCC_FLAGS "-std=c++11 --relaxed-constexpr -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"") cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") -- cgit v1.2.3 From 3394379319eb7e7946662142d1ac6bf733a5ae28 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 13:33:59 -0700 Subject: Fixed the packet_traits for half floats. --- Eigen/src/Core/GenericPacketMath.h | 8 ++++---- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 9 +-------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 6ff61c18a..001c2ffbf 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -62,7 +62,7 @@ struct default_packet_traits HasRsqrt = 0, HasExp = 0, HasLog = 0, - HasLog10 = 0, + HasLog10 = 0, HasPow = 0, HasSin = 0, @@ -71,9 +71,9 @@ struct default_packet_traits HasASin = 0, HasACos = 0, HasATan = 0, - HasSinh = 0, - HasCosh = 0, - HasTanh = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, HasLGamma = 0, HasDiGamma = 0, HasZeta = 0, diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 14f0c9415..dc09c74d1 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -33,14 +33,7 @@ template<> struct packet_traits : default_packet_traits AlignedOnScalar = 1, size=2, HasHalfPacket = 0, - - HasDiv = 1, - HasLog = 1, - HasExp = 1, - HasSqrt = 1, - HasRsqrt = 1, - - HasBlend = 0, + HasDiv = 1 }; }; -- cgit v1.2.3 From 8d22967bd9a7963e72622c8fb17cc5322f938f9f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 14:22:39 -0700 Subject: Initial support for taking the power of fp16 --- Eigen/src/Core/arch/CUDA/Half.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 0a3b301bf..3be7e88d7 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -406,6 +406,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half log(const Eigen::ha template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrt(const Eigen::half& a) { return Eigen::half(::sqrtf(float(a))); } +template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half pow(const Eigen::half& a, const Eigen::half& b) { + return Eigen::half(::powf(float(a), float(b))); +} template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floor(const Eigen::half& a) { return Eigen::half(::floorf(float(a))); } @@ -432,6 +435,9 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half sqrth(const Eigen::half& a) { return Eigen::half(::sqrtf(float(a))); } +static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half powh(const Eigen::half& a, const Eigen::half& b) { + return Eigen::half(::powf(float(a), float(b))); +} static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half floorh(const Eigen::half& a) { return Eigen::half(::floorf(float(a))); } -- cgit v1.2.3 From 995f202cea0677264405496c1ef0c3a8570f7dda Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 14:43:36 -0700 Subject: Disabled the use of half2 on cuda devices of compute capability < 5.3 --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 14 ++------------ Eigen/src/Core/arch/CUDA/TypeCasting.h | 25 ++----------------------- unsupported/test/CMakeLists.txt | 5 +---- 3 files changed, 5 insertions(+), 39 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index dc09c74d1..61d532e4d 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -17,7 +17,8 @@ // we'll use on the host side (SSE, AVX, ...) #if defined(__CUDACC__) && defined(EIGEN_USE_GPU) -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 +// Most of the following operations require arch >= 5.3 +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 namespace Eigen { namespace internal { @@ -67,20 +68,12 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(half* to, co template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro(const half* from) { -#if __CUDA_ARCH__ >= 320 return __ldg((const half2*)from); -#else - return __halves2half2(*(from+0), *(from+1)); -#endif } template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro(const half* from) { -#if __CUDA_ARCH__ >= 320 return __halves2half2(__ldg(from+0), __ldg(from+1)); -#else - return __halves2half2(*(from+0), *(from+1)); -#endif } template<> EIGEN_DEVICE_FUNC inline half2 pgather(const half* from, Index stride) { @@ -113,8 +106,6 @@ ptranspose(PacketBlock& kernel) { kernel.packet[1] = __halves2half2(a2, b2); } -// The following operations require arch >= 5.3 -#if __CUDA_ARCH__ >= 530 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const half& a) { return __halves2half2(a, __hadd(a, __float2half(1.0f))); } @@ -190,7 +181,6 @@ template<> EIGEN_DEVICE_FUNC inline half predux_min(const half2& a) { template<> EIGEN_DEVICE_FUNC inline half predux_mul(const half2& a) { return __hmul(__low2half(a), __high2half(a)); } -#endif } // end namespace internal diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h index b2a9724de..396b38eaf 100644 --- a/Eigen/src/Core/arch/CUDA/TypeCasting.h +++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h @@ -71,6 +71,7 @@ struct functor_traits > +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 template <> struct type_casting_traits { @@ -82,22 +83,9 @@ struct type_casting_traits { }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast(const half2& a, const half2& b) { -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 float2 r1 = __half22float2(a); float2 r2 = __half22float2(b); return make_float4(r1.x, r1.y, r2.x, r2.y); -#else - half r1; - r1.x = a.x & 0xFFFF; - half r2; - r2.x = (a.x & 0xFFFF0000) >> 16; - half r3; - r3.x = b.x & 0xFFFF; - half r4; - r4.x = (b.x & 0xFFFF0000) >> 16; - return make_float4(static_cast(r1), static_cast(r2), - static_cast(r3), static_cast(r4)); -#endif } template <> @@ -111,19 +99,10 @@ struct type_casting_traits { template<> EIGEN_STRONG_INLINE half2 pcast(const float4& a) { // Simply discard the second half of the input -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __float22half2_rn(make_float2(a.x, a.y)); -#else - half r1 = static_cast(a.x); - half r2 = static_cast(a.y); - half2 r; - r.x = 0; - r.x |= r1.x; - r.x |= (static_cast(r2.x) << 16); - return r; -#endif } +#endif #endif } // end namespace internal diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index b1931d80a..c088df1c1 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -210,10 +210,7 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) ei_add_test(cxx11_tensor_random_cuda) endif() - # Operations other that casting of half floats are only supported starting with arch 5.3 - if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52) - ei_add_test(cxx11_tensor_of_float16_cuda) - endif() + ei_add_test(cxx11_tensor_of_float16_cuda) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() -- cgit v1.2.3 From 89a3dc35a339eee62635a133b2c351ce45011419 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 15:56:16 -0700 Subject: Fixed isfinite_impl: NumTraits::highest() and NumTraits::lowest() are finite numbers. --- Eigen/src/Core/MathFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index fd73f543b..2f66c7463 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -710,7 +710,7 @@ isfinite_impl(const T& x) using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); #else - return x::highest() && x>NumTraits::lowest(); + return x<=NumTraits::highest() && x>=NumTraits::lowest(); #endif } -- cgit v1.2.3 From 01bd577288bb737ea25485eed1a25d7d3433b01f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 8 Apr 2016 16:40:10 -0700 Subject: Fixed the implementation of Eigen::numext::isfinite, Eigen::numext::isnan, andEigen::numext::isinf on CUDA devices --- Eigen/src/Core/MathFunctions.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 2f66c7463..dd19f080b 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -705,7 +705,7 @@ typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits: isfinite_impl(const T& x) { #ifdef __CUDA_ARCH__ - return (isfinite)(x); + return (::isfinite)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isfinite; return isfinite EIGEN_NOT_A_MACRO (x); @@ -720,7 +720,7 @@ typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits: isinf_impl(const T& x) { #ifdef __CUDA_ARCH__ - return (isinf)(x); + return (::isinf)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isinf; return isinf EIGEN_NOT_A_MACRO (x); @@ -735,7 +735,7 @@ typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits: isnan_impl(const T& x) { #ifdef __CUDA_ARCH__ - return (isnan)(x); + return (::isnan)(x); #elif EIGEN_USE_STD_FPCLASSIFY using std::isnan; return isnan EIGEN_NOT_A_MACRO (x); -- cgit v1.2.3 From a05a683d8329918c29efd931981fc1ead5b6deea Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 9 Apr 2016 10:49:19 +0200 Subject: bug #1160: fix and relax some lm unit tests by turning faillures to warnings --- unsupported/test/levenberg_marquardt.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp index a2bdb99e4..65afa0f78 100644 --- a/unsupported/test/levenberg_marquardt.cpp +++ b/unsupported/test/levenberg_marquardt.cpp @@ -789,7 +789,8 @@ void testNistMGH10(void) MGH10_functor functor; LevenbergMarquardt lm(functor); info = lm.minimize(x); - VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall); + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + // was: VERIFY_IS_EQUAL(info, 1); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01); @@ -799,9 +800,13 @@ void testNistMGH10(void) VERIFY_IS_APPROX(x[2], 3.4522363462E+02); // check return value - //VERIFY_IS_EQUAL(info, 1); + + ++g_test_level; VERIFY_IS_EQUAL(lm.nfev(), 284 ); VERIFY_IS_EQUAL(lm.njev(), 249 ); + --g_test_level; + VERIFY(lm.nfev() < 284 * 3/2); + VERIFY(lm.njev() < 249 * 3/2); /* * Second try @@ -809,7 +814,10 @@ void testNistMGH10(void) x<< 0.02, 4000., 250.; // do the computation info = lm.minimize(x); + ++g_test_level; VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + // was: VERIFY_IS_EQUAL(info, 1); + --g_test_level; // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01); @@ -819,9 +827,12 @@ void testNistMGH10(void) VERIFY_IS_APPROX(x[2], 3.4522363462E+02); // check return value - //VERIFY_IS_EQUAL(info, 1); + ++g_test_level; VERIFY_IS_EQUAL(lm.nfev(), 126); VERIFY_IS_EQUAL(lm.njev(), 116); + --g_test_level; + VERIFY(lm.nfev() < 126 * 3/2); + VERIFY(lm.njev() < 116 * 3/2); } @@ -896,8 +907,12 @@ void testNistBoxBOD(void) // check return value VERIFY_IS_EQUAL(info, 1); + ++g_test_level; VERIFY_IS_EQUAL(lm.nfev(), 16 ); VERIFY_IS_EQUAL(lm.njev(), 15 ); + --g_test_level; + VERIFY(lm.nfev() < 16 * 3/2); + VERIFY(lm.njev() < 15 * 3/2); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03); // check x -- cgit v1.2.3 From af2161cdb4ec19fbc44bcf7bca7cae662b6b8085 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 9 Apr 2016 11:14:02 +0200 Subject: bug #1197: fix/relax some LM unit tests --- unsupported/test/NonLinearOptimization.cpp | 16 ++++++++++++---- unsupported/test/levenberg_marquardt.cpp | 19 +++++++++++-------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp index 724ea7b5b..6a5ed057f 100644 --- a/unsupported/test/NonLinearOptimization.cpp +++ b/unsupported/test/NonLinearOptimization.cpp @@ -14,6 +14,9 @@ using std::sqrt; +// tolerance for chekcing number of iterations +#define LM_EVAL_COUNT_TOL 4/3 + int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag) { /* subroutine fcn for chkder example. */ @@ -1023,7 +1026,8 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.njev, 72); // check norm^2 std::cout.precision(30); - VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4290986055242372e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + std::cout << lm.fvec.squaredNorm() << "\n"; + VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -1044,7 +1048,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev, 9); VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 - VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430571737783119393e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -1354,8 +1358,12 @@ void testNistMGH17(void) // check return value VERIFY_IS_EQUAL(info, 2); - VERIFY(lm.nfev < 650); // 602 - VERIFY(lm.njev < 600); // 545 + ++g_test_level; + VERIFY_IS_EQUAL(lm.nfev, 602); // 602 + VERIFY_IS_EQUAL(lm.njev, 545); // 545 + --g_test_level; + VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL); /* * Second try diff --git a/unsupported/test/levenberg_marquardt.cpp b/unsupported/test/levenberg_marquardt.cpp index 65afa0f78..6dc17bd17 100644 --- a/unsupported/test/levenberg_marquardt.cpp +++ b/unsupported/test/levenberg_marquardt.cpp @@ -23,6 +23,9 @@ using std::sqrt; +// tolerance for chekcing number of iterations +#define LM_EVAL_COUNT_TOL 4/3 + struct lmder_functor : DenseFunctor { lmder_functor(void): DenseFunctor(3,15) {} @@ -631,7 +634,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev(), 79); VERIFY_IS_EQUAL(lm.njev(), 72); // check norm^2 -// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -652,7 +655,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev(), 9); VERIFY_IS_EQUAL(lm.njev(), 8); // check norm^2 -// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -805,8 +808,8 @@ void testNistMGH10(void) VERIFY_IS_EQUAL(lm.nfev(), 284 ); VERIFY_IS_EQUAL(lm.njev(), 249 ); --g_test_level; - VERIFY(lm.nfev() < 284 * 3/2); - VERIFY(lm.njev() < 249 * 3/2); + VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL); /* * Second try @@ -831,8 +834,8 @@ void testNistMGH10(void) VERIFY_IS_EQUAL(lm.nfev(), 126); VERIFY_IS_EQUAL(lm.njev(), 116); --g_test_level; - VERIFY(lm.nfev() < 126 * 3/2); - VERIFY(lm.njev() < 116 * 3/2); + VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL); } @@ -911,8 +914,8 @@ void testNistBoxBOD(void) VERIFY_IS_EQUAL(lm.nfev(), 16 ); VERIFY_IS_EQUAL(lm.njev(), 15 ); --g_test_level; - VERIFY(lm.nfev() < 16 * 3/2); - VERIFY(lm.njev() < 15 * 3/2); + VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03); // check x -- cgit v1.2.3