diff options
-rw-r--r-- | Eigen/src/Cholesky/LDLT.h | 12 | ||||
-rw-r--r-- | Eigen/src/Core/arch/NEON/PacketMath.h | 4 | ||||
-rw-r--r-- | bench/tensors/tensor_benchmarks.h | 9 | ||||
-rw-r--r-- | bench/tensors/tensor_benchmarks_fp16_gpu.cu | 2 | ||||
-rw-r--r-- | unsupported/test/CMakeLists.txt | 3 | ||||
-rw-r--r-- | unsupported/test/cxx11_float16.cpp (renamed from unsupported/test/float16.cpp) | 12 |
6 files changed, 29 insertions, 13 deletions
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index d246a459c..90ed32fac 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -281,8 +281,8 @@ template<> struct ldlt_inplace<Lower> if (size <= 1) { transpositions.setIdentity(); - if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; - else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; + if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef; else sign = ZeroSign; return true; } @@ -339,12 +339,12 @@ template<> struct ldlt_inplace<Lower> A21 /= realAkk; if (sign == PositiveSemiDef) { - if (realAkk < 0) sign = Indefinite; + if (realAkk < static_cast<RealScalar>(0)) sign = Indefinite; } else if (sign == NegativeSemiDef) { - if (realAkk > 0) sign = Indefinite; + if (realAkk > static_cast<RealScalar>(0)) sign = Indefinite; } else if (sign == ZeroSign) { - if (realAkk > 0) sign = PositiveSemiDef; - else if (realAkk < 0) sign = NegativeSemiDef; + if (realAkk > static_cast<RealScalar>(0)) sign = PositiveSemiDef; + else if (realAkk < static_cast<RealScalar>(0)) sign = NegativeSemiDef; } } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 63a2d9f52..3224c36bd 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -179,6 +179,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co // Clang/ARM wrongly advertises __ARM_FEATURE_FMA even when it's not available, // then implements a slow software scalar fallback calling fmaf()! +// Filed LLVM bug: +// https://llvm.org/bugs/show_bug.cgi?id=27216 #if (defined __ARM_FEATURE_FMA) && !(EIGEN_COMP_CLANG && EIGEN_ARCH_ARM) // See bug 936. // FMA is available on VFPv4 i.e. when compiling with -mfpu=neon-vfpv4. @@ -195,6 +197,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& // -march=armv7-a, that is a very common case. // See e.g. this thread: // http://lists.llvm.org/pipermail/llvm-dev/2013-December/068806.html + // Filed LLVM bug: + // https://llvm.org/bugs/show_bug.cgi?id=27219 Packet4f r = c; asm volatile( "vmla.f32 %q[r], %q[a], %q[b]" diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 16b388abf..90b9bc741 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -46,8 +46,13 @@ template <typename Device, typename T> class BenchmarkSuite { void typeCasting(int num_iters) { eigen_assert(m_ == n_); Eigen::array<TensorIndex, 2> sizes; - sizes[0] = m_; - sizes[1] = k_; + if (sizeof(T) >= sizeof(int)) { + sizes[0] = m_; + sizes[1] = k_; + } else { + sizes[0] = m_ * sizeof(T) / sizeof(int); + sizes[1] = k_ * sizeof(T) / sizeof(int); + } const TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> A((int*)a_, sizes); TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, sizes); diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu index 35c6f7489..d34bd73ca 100644 --- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu +++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu @@ -28,7 +28,7 @@ BM_FuncGPU(shuffling); BM_FuncGPU(padding); BM_FuncGPU(striding); BM_FuncGPU(broadcasting); -//BM_FuncGPU(coeffWiseOp); +BM_FuncGPU(coeffWiseOp); //BM_FuncGPU(algebraicFunc); //BM_FuncGPU(transcendentalFunc); BM_FuncGPU(rowReduction); diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 96652bfcf..c6a92fe73 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -110,13 +110,12 @@ ei_add_test(minres) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) -ei_add_test(float16) - if(EIGEN_TEST_CXX11) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) + ei_add_test(cxx11_float16) ei_add_test(cxx11_meta) ei_add_test(cxx11_tensor_simple) # ei_add_test(cxx11_tensor_symmetry) diff --git a/unsupported/test/float16.cpp b/unsupported/test/cxx11_float16.cpp index 13f3ddaca..2dc0872d8 100644 --- a/unsupported/test/float16.cpp +++ b/unsupported/test/cxx11_float16.cpp @@ -7,7 +7,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC float16 +#define EIGEN_TEST_FUNC cxx11_float16 #include "main.h" #include <Eigen/src/Core/arch/CUDA/Half.h> @@ -64,9 +64,13 @@ void test_conversion() VERIFY((numext::isnan)(float(half(__half{0xfc01})))); VERIFY((numext::isinf)(float(half(__half{0x7c00})))); VERIFY((numext::isnan)(float(half(__half{0x7c01})))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 VERIFY((numext::isnan)(float(half(0.0 / 0.0)))); VERIFY((numext::isinf)(float(half(1.0 / 0.0)))); VERIFY((numext::isinf)(float(half(-1.0 / 0.0)))); +#endif // Exactly same checks as above, just directly on the half representation. VERIFY(!(numext::isinf)(half(__half{0x7bff}))); @@ -75,9 +79,13 @@ void test_conversion() VERIFY((numext::isnan)(half(__half{0xfc01}))); VERIFY((numext::isinf)(half(__half{0x7c00}))); VERIFY((numext::isnan)(half(__half{0x7c01}))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 VERIFY((numext::isnan)(half(0.0 / 0.0))); VERIFY((numext::isinf)(half(1.0 / 0.0))); VERIFY((numext::isinf)(half(-1.0 / 0.0))); +#endif } void test_arithmetic() @@ -138,7 +146,7 @@ void test_functions() VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); } -void test_float16() +void test_cxx11_float16() { CALL_SUBTEST(test_conversion()); CALL_SUBTEST(test_arithmetic()); |