From 78ee3d6261ec6971fcd8fdda6ee040962edde1dc Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Thu, 25 Mar 2021 12:37:25 -0700 Subject: Fix CUDA constexpr issues for numeric_limits. Some CUDA/HIP constants fail on device with `constexpr` since they internally rely on non-constexpr functions, e.g. ``` \#define CUDART_INF_F __int_as_float(0x7f800000) ``` This fails for cuda-clang (though passes with nvcc). These constants are currently used by `device::numeric_limits`. For portability, we need to remove `constexpr` from the affected functions. For C++11 or higher, we should be able to rely on the `std::numeric_limits` versions anyways, since the methods themselves are now `constexpr`, so should be supported on device (clang/hipcc natively, nvcc with `--expr-relaxed-constexpr`). --- test/gpu_basic.cu | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'test') diff --git a/test/gpu_basic.cu b/test/gpu_basic.cu index b2e657e72..bf8dcacde 100644 --- a/test/gpu_basic.cu +++ b/test/gpu_basic.cu @@ -343,6 +343,21 @@ struct matrix_inverse { } }; +template +struct numeric_limits_test { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + EIGEN_UNUSED_VARIABLE(in) + int out_idx = i * 5; + out[out_idx++] = numext::numeric_limits::epsilon(); + out[out_idx++] = (numext::numeric_limits::max)(); + out[out_idx++] = (numext::numeric_limits::min)(); + out[out_idx++] = numext::numeric_limits::infinity(); + out[out_idx++] = numext::numeric_limits::quiet_NaN(); + } +}; + template bool verifyIsApproxWithInfsNans(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only { @@ -434,6 +449,9 @@ EIGEN_DECLARE_TEST(gpu_basic) CALL_SUBTEST( run_and_compare_to_gpu(complex_operators(), nthreads, cfin, cfout) ); CALL_SUBTEST( test_with_infs_nans(complex_sqrt(), nthreads, cfin, cfout) ); + // numeric_limits + CALL_SUBTEST( test_with_infs_nans(numeric_limits_test(), 1, in, out) ); + #if defined(__NVCC__) // FIXME // These subtests compiles only with nvcc and fail with HIPCC and clang-cuda -- cgit v1.2.3