diff options
Diffstat (limited to 'unsupported/test/cxx11_tensor_cuda.cu')
-rw-r--r-- | unsupported/test/cxx11_tensor_cuda.cu | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 63d0a345a..f238ed5be 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -1318,6 +1318,157 @@ void test_cuda_i1e() cudaFree(d_out); } +template <typename Scalar> +void test_cuda_igamma_der_a() +{ + Tensor<Scalar, 1> in_x(30); + Tensor<Scalar, 1> in_a(30); + Tensor<Scalar, 1> out(30); + Tensor<Scalar, 1> expected_out(30); + out.setZero(); + + Array<Scalar, 1, Dynamic> in_a_array(30); + Array<Scalar, 1, Dynamic> in_x_array(30); + Array<Scalar, 1, Dynamic> expected_out_array(30); + + // See special_functions.cpp for the Python code that generates the test data. + + in_a_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0, + 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0, + 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0; + + in_x_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05, + 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065, + 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288, + 1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458, + 7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233, + 92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677, + 968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568; + + expected_out_array << -32.7256441441, -36.4394150514, -9.66467612263, + -36.4394150514, -36.4394150514, -1.0891900302, -2.66351229645, + -2.48666868596, -0.929700494428, -3.56327722764, -0.455320135314, + -0.391437214323, -0.491352055991, -0.350454834292, -0.471773162921, + -0.104084440522, -0.0723646747909, -0.0992828975532, -0.121638215446, + -0.122619605294, -0.0317670267286, -0.0359974812869, -0.0154359225363, + -0.0375775365921, -0.00794899153653, -0.00777303219211, -0.00796085782042, + -0.0125850719397, -0.00455500206958, -0.00476436993148; + + for (int i = 0; i < 30; ++i) { + in_x(i) = in_x_array(i); + in_a(i) = in_a_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, in_a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_a(d_a, 30); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_x(d_x, 30); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30); + + gpu_out.device(gpu_device) = gpu_a.igamma_der_a(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 30; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_gamma_sample_der_alpha() +{ + Tensor<Scalar, 1> in_alpha(30); + Tensor<Scalar, 1> in_sample(30); + Tensor<Scalar, 1> out(30); + Tensor<Scalar, 1> expected_out(30); + out.setZero(); + + Array<Scalar, 1, Dynamic> in_alpha_array(30); + Array<Scalar, 1, Dynamic> in_sample_array(30); + Array<Scalar, 1, Dynamic> expected_out_array(30); + + // See special_functions.cpp for the Python code that generates the test data. + + in_alpha_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, + 1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, + 100.0, 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0; + + in_sample_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05, + 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065, + 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288, + 1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458, + 7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233, + 92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677, + 968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568; + + expected_out_array << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738, + 1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243, + 0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302, + 1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534, + 0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812, + 1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061, + 0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206, + 1.00106492525, 0.97734200649, 1.02198794179; + + for (int i = 0; i < 30; ++i) { + in_alpha(i) = in_alpha_array(i); + in_sample(i) = in_sample_array(i); + expected_out(i) = expected_out_array(i); + } + + std::size_t bytes = in_alpha.size() * sizeof(Scalar); + + Scalar* d_alpha; + Scalar* d_sample; + Scalar* d_out; + cudaMalloc((void**)(&d_alpha), bytes); + cudaMalloc((void**)(&d_sample), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_alpha, in_alpha.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_sample, in_sample.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_alpha(d_alpha, 30); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_sample(d_sample, 30); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30); + + gpu_out.device(gpu_device) = gpu_alpha.gamma_sample_der_alpha(gpu_sample); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 30; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_alpha); + cudaFree(d_sample); + cudaFree(d_out); +} void test_cxx11_tensor_cuda() { @@ -1396,5 +1547,11 @@ void test_cxx11_tensor_cuda() CALL_SUBTEST_6(test_cuda_i1e<float>()); CALL_SUBTEST_6(test_cuda_i1e<double>()); + + CALL_SUBTEST_6(test_cuda_igamma_der_a<float>()); + CALL_SUBTEST_6(test_cuda_igamma_der_a<double>()); + + CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<float>()); + CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<double>()); #endif } |