diff options
author | Michael Figurnov <mfigurnov@google.com> | 2018-05-31 15:34:53 +0100 |
---|---|---|
committer | Michael Figurnov <mfigurnov@google.com> | 2018-05-31 15:34:53 +0100 |
commit | f216854453887f31ac02ffefb7a7a569dc3fa54d (patch) | |
tree | bf748705db0da48a0dc8c08989184bcb888861fd /unsupported/test | |
parent | 6af1433cb50af7423a1a69afc24c098af9c76bb1 (diff) |
Exponentially scaled modified Bessel functions of order zero and one.
The functions are conventionally called i0e and i1e. The exponentially scaled version is more numerically stable. The standard Bessel functions can be obtained as i0(x) = exp(|x|) i0e(x)
The code is ported from Cephes and tested against SciPy.
Diffstat (limited to 'unsupported/test')
-rw-r--r-- | unsupported/test/cxx11_tensor_cuda.cu | 116 | ||||
-rw-r--r-- | unsupported/test/special_functions.cpp | 40 |
2 files changed, 156 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 9584a539f..63d0a345a 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -1208,6 +1208,116 @@ void test_cuda_betainc() cudaFree(d_out); } +template <typename Scalar> +void test_cuda_i0e() +{ + Tensor<Scalar, 1> in_x(21); + Tensor<Scalar, 1> out(21); + Tensor<Scalar, 1> expected_out(21); + out.setZero(); + + Array<Scalar, 1, Dynamic> in_x_array(21); + Array<Scalar, 1, Dynamic> expected_out_array(21); + + in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, + -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected_out_array << 0.0897803118848, 0.0947062952128, 0.100544127361, + 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857, + 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554, + 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163, + 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128, + 0.0897803118848; + + for (int i = 0; i < 21; ++i) { + in_x(i) = in_x_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21); + + gpu_out.device(gpu_device) = gpu_in.i0e(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 21; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_in); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_i1e() +{ + Tensor<Scalar, 1> in_x(21); + Tensor<Scalar, 1> out(21); + Tensor<Scalar, 1> expected_out(21); + out.setZero(); + + Array<Scalar, 1, Dynamic> in_x_array(21); + Array<Scalar, 1, Dynamic> expected_out_array(21); + + in_x_array << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, + -2.0, 0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected_out_array << -0.0875062221833, -0.092036796872, -0.0973496147565, + -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293, + -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249, + 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384, + 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872, + 0.0875062221833; + + for (int i = 0; i < 21; ++i) { + in_x(i) = in_x_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in_x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 21); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 21); + + gpu_out.device(gpu_device) = gpu_in.i1e(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 21; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_in); + cudaFree(d_out); +} + void test_cxx11_tensor_cuda() { @@ -1280,5 +1390,11 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_6(test_cuda_betainc<float>()); CALL_SUBTEST_6(test_cuda_betainc<double>()); + + CALL_SUBTEST_6(test_cuda_i0e<float>()); + CALL_SUBTEST_6(test_cuda_i0e<double>()); + + CALL_SUBTEST_6(test_cuda_i1e<float>()); + CALL_SUBTEST_6(test_cuda_i1e<double>()); #endif } diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp index 057fb3e92..48d0db95e 100644 --- a/unsupported/test/special_functions.cpp +++ b/unsupported/test/special_functions.cpp @@ -335,6 +335,46 @@ template<typename ArrayType> void array_special_functions() ArrayType test = betainc(a, b + one, x) + eps; verify_component_wise(test, expected);); } + + // Test Bessel function i0e. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << 0.0897803118848, 0.0947062952128, 0.100544127361, + 0.107615251671, 0.116426221213, 0.127833337163, 0.143431781857, + 0.16665743264, 0.207001921224, 0.308508322554, 1.0, 0.308508322554, + 0.207001921224, 0.16665743264, 0.143431781857, 0.127833337163, + 0.116426221213, 0.107615251671, 0.100544127361, 0.0947062952128, + 0.0897803118848; + + CALL_SUBTEST(res = i0e(x); + verify_component_wise(res, expected);); + } + + // Test Bessel function i1e. Reference results obtained with SciPy. + { + ArrayType x(21); + ArrayType expected(21); + ArrayType res(21); + + x << -20.0, -18.0, -16.0, -14.0, -12.0, -10.0, -8.0, -6.0, -4.0, -2.0, 0.0, + 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0; + + expected << -0.0875062221833, -0.092036796872, -0.0973496147565, + -0.103697667463, -0.11146429929, -0.121262681384, -0.134142493293, + -0.152051459309, -0.178750839502, -0.215269289249, 0.0, 0.215269289249, + 0.178750839502, 0.152051459309, 0.134142493293, 0.121262681384, + 0.11146429929, 0.103697667463, 0.0973496147565, 0.092036796872, + 0.0875062221833; + + CALL_SUBTEST(res = i1e(x); + verify_component_wise(res, expected);); + } #endif } |