aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test/cxx11_tensor_cuda.cu
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/test/cxx11_tensor_cuda.cu')
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu157
1 files changed, 157 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 63d0a345a..f238ed5be 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -1318,6 +1318,157 @@ void test_cuda_i1e()
cudaFree(d_out);
}
+template <typename Scalar>
+void test_cuda_igamma_der_a()
+{
+ Tensor<Scalar, 1> in_x(30);
+ Tensor<Scalar, 1> in_a(30);
+ Tensor<Scalar, 1> out(30);
+ Tensor<Scalar, 1> expected_out(30);
+ out.setZero();
+
+ Array<Scalar, 1, Dynamic> in_a_array(30);
+ Array<Scalar, 1, Dynamic> in_x_array(30);
+ Array<Scalar, 1, Dynamic> expected_out_array(30);
+
+ // See special_functions.cpp for the Python code that generates the test data.
+
+ in_a_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0, 100.0,
+ 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
+
+ in_x_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
+ 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065,
+ 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288,
+ 1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458,
+ 7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233,
+ 92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677,
+ 968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
+
+ expected_out_array << -32.7256441441, -36.4394150514, -9.66467612263,
+ -36.4394150514, -36.4394150514, -1.0891900302, -2.66351229645,
+ -2.48666868596, -0.929700494428, -3.56327722764, -0.455320135314,
+ -0.391437214323, -0.491352055991, -0.350454834292, -0.471773162921,
+ -0.104084440522, -0.0723646747909, -0.0992828975532, -0.121638215446,
+ -0.122619605294, -0.0317670267286, -0.0359974812869, -0.0154359225363,
+ -0.0375775365921, -0.00794899153653, -0.00777303219211, -0.00796085782042,
+ -0.0125850719397, -0.00455500206958, -0.00476436993148;
+
+ for (int i = 0; i < 30; ++i) {
+ in_x(i) = in_x_array(i);
+ in_a(i) = in_a_array(i);
+ expected_out(i) = expected_out_array(i);
+ }
+
+ std::size_t bytes = in_x.size() * sizeof(Scalar);
+
+ Scalar* d_a;
+ Scalar* d_x;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_a), bytes);
+ cudaMalloc((void**)(&d_x), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_a, in_a.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_a(d_a, 30);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_x(d_x, 30);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30);
+
+ gpu_out.device(gpu_device) = gpu_a.igamma_der_a(gpu_x);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 30; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+
+ cudaFree(d_a);
+ cudaFree(d_x);
+ cudaFree(d_out);
+}
+
+template <typename Scalar>
+void test_cuda_gamma_sample_der_alpha()
+{
+ Tensor<Scalar, 1> in_alpha(30);
+ Tensor<Scalar, 1> in_sample(30);
+ Tensor<Scalar, 1> out(30);
+ Tensor<Scalar, 1> expected_out(30);
+ out.setZero();
+
+ Array<Scalar, 1, Dynamic> in_alpha_array(30);
+ Array<Scalar, 1, Dynamic> in_sample_array(30);
+ Array<Scalar, 1, Dynamic> expected_out_array(30);
+
+ // See special_functions.cpp for the Python code that generates the test data.
+
+ in_alpha_array << 0.01, 0.01, 0.01, 0.01, 0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 10.0, 10.0, 10.0, 10.0, 10.0, 100.0, 100.0, 100.0,
+ 100.0, 100.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0;
+
+ in_sample_array << 1.25668890405e-26, 1.17549435082e-38, 1.20938905072e-05,
+ 1.17549435082e-38, 1.17549435082e-38, 5.66572070696e-16, 0.0132865061065,
+ 0.0200034203853, 6.29263709118e-17, 1.37160367764e-06, 0.333412038288,
+ 1.18135687766, 0.580629033777, 0.170631439426, 0.786686768458,
+ 7.63873279537, 13.1944344379, 11.896042354, 10.5830172417, 10.5020942233,
+ 92.8918587747, 95.003720371, 86.3715926467, 96.0330217672, 82.6389930677,
+ 968.702906754, 969.463546828, 1001.79726022, 955.047416547, 1044.27458568;
+
+ expected_out_array << 7.42424742367e-23, 1.02004297287e-34, 0.0130155240738,
+ 1.02004297287e-34, 1.02004297287e-34, 1.96505168277e-13, 0.525575786243,
+ 0.713903991771, 2.32077561808e-14, 0.000179348049886, 0.635500453302,
+ 1.27561284917, 0.878125852156, 0.41565819538, 1.03606488534,
+ 0.885964824887, 1.16424049334, 1.10764479598, 1.04590810812,
+ 1.04193666963, 0.965193152414, 0.976217589464, 0.93008035061,
+ 0.98153216096, 0.909196397698, 0.98434963993, 0.984738050206,
+ 1.00106492525, 0.97734200649, 1.02198794179;
+
+ for (int i = 0; i < 30; ++i) {
+ in_alpha(i) = in_alpha_array(i);
+ in_sample(i) = in_sample_array(i);
+ expected_out(i) = expected_out_array(i);
+ }
+
+ std::size_t bytes = in_alpha.size() * sizeof(Scalar);
+
+ Scalar* d_alpha;
+ Scalar* d_sample;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_alpha), bytes);
+ cudaMalloc((void**)(&d_sample), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_alpha, in_alpha.data(), bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_sample, in_sample.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_alpha(d_alpha, 30);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_sample(d_sample, 30);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 30);
+
+ gpu_out.device(gpu_device) = gpu_alpha.gamma_sample_der_alpha(gpu_sample);
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 30; ++i) {
+ VERIFY_IS_APPROX(out(i), expected_out(i));
+ }
+
+ cudaFree(d_alpha);
+ cudaFree(d_sample);
+ cudaFree(d_out);
+}
void test_cxx11_tensor_cuda()
{
@@ -1396,5 +1547,11 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST_6(test_cuda_i1e<float>());
CALL_SUBTEST_6(test_cuda_i1e<double>());
+
+ CALL_SUBTEST_6(test_cuda_igamma_der_a<float>());
+ CALL_SUBTEST_6(test_cuda_igamma_der_a<double>());
+
+ CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<float>());
+ CALL_SUBTEST_6(test_cuda_gamma_sample_der_alpha<double>());
#endif
}