aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test/cxx11_tensor_cuda.cpp
diff options
context:
space:
mode:
authorGravatar Eugene Brevdo <ebrevdo@gmail.com>2015-12-07 15:24:49 -0800
committerGravatar Eugene Brevdo <ebrevdo@gmail.com>2015-12-07 15:24:49 -0800
commitfa4f933c0fe65eda6a051f978db12210f11f5cdb (patch)
treecab17ee4bbffd52a778b00ec40770e7fa4b361cf /unsupported/test/cxx11_tensor_cuda.cpp
parent7dfe75f445835baff18bbe82ba7253f7563cbdc6 (diff)
Add special functions to Eigen: lgamma, erf, erfc.
Includes CUDA support and unit tests.
Diffstat (limited to 'unsupported/test/cxx11_tensor_cuda.cpp')
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cpp139
1 files changed, 139 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_cuda.cpp b/unsupported/test/cxx11_tensor_cuda.cpp
index 5ff082a3a..49e1894ab 100644
--- a/unsupported/test/cxx11_tensor_cuda.cpp
+++ b/unsupported/test/cxx11_tensor_cuda.cpp
@@ -507,6 +507,115 @@ static void test_cuda_convolution_3d()
}
}
+
+template <typename Scalar>
+void test_cuda_lgamma(const Scalar stddev)
+{
+ Tensor<Scalar, 2> in(72,97);
+ in.setRandom();
+ in *= in.constant(stddev);
+ Tensor<Scalar, 2> out(72,97);
+ out.setZero();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+ gpu_out.device(gpu_device) = gpu_in.lgamma();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 72; ++i) {
+ for (int j = 0; j < 97; ++j) {
+ VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
+ }
+ }
+}
+
+template <typename Scalar>
+void test_cuda_erf(const Scalar stddev)
+{
+ Tensor<Scalar, 2> in(72,97);
+ in.setRandom();
+ in *= in.constant(stddev);
+ Tensor<Scalar, 2> out(72,97);
+ out.setZero();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+ gpu_out.device(gpu_device) = gpu_in.erf();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 72; ++i) {
+ for (int j = 0; j < 97; ++j) {
+ VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
+ }
+ }
+}
+
+template <typename Scalar>
+void test_cuda_erfc(const Scalar stddev)
+{
+ Tensor<Scalar, 2> in(72,97);
+ in.setRandom();
+ in *= in.constant(stddev);
+ Tensor<Scalar, 2> out(72,97);
+ out.setZero();
+
+ std::size_t bytes = in.size() * sizeof(Scalar);
+
+ Scalar* d_in;
+ Scalar* d_out;
+ cudaMalloc((void**)(&d_in), bytes);
+ cudaMalloc((void**)(&d_out), bytes);
+
+ cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97);
+ Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97);
+
+ gpu_out.device(gpu_device) = gpu_in.erfc();
+
+ assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 72; ++i) {
+ for (int j = 0; j < 97; ++j) {
+ VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
+ }
+ }
+}
+
void test_cxx11_tensor_cuda()
{
CALL_SUBTEST(test_cuda_elementwise_small());
@@ -522,4 +631,34 @@ void test_cxx11_tensor_cuda()
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
+ CALL_SUBTEST(test_cuda_lgamma<float>(1.0f));
+ CALL_SUBTEST(test_cuda_lgamma<float>(100.0f));
+ CALL_SUBTEST(test_cuda_lgamma<float>(0.01f));
+ CALL_SUBTEST(test_cuda_lgamma<float>(0.001f));
+ CALL_SUBTEST(test_cuda_erf<float>(1.0f));
+ CALL_SUBTEST(test_cuda_erf<float>(100.0f));
+ CALL_SUBTEST(test_cuda_erf<float>(0.01f));
+ CALL_SUBTEST(test_cuda_erf<float>(0.001f));
+ CALL_SUBTEST(test_cuda_erfc<float>(1.0f));
+ // CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
+ CALL_SUBTEST(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
+ CALL_SUBTEST(test_cuda_erfc<float>(0.01f));
+ CALL_SUBTEST(test_cuda_erfc<float>(0.001f));
+ CALL_SUBTEST(test_cuda_tanh<double>(1.0));
+ CALL_SUBTEST(test_cuda_tanh<double>(100.0));
+ CALL_SUBTEST(test_cuda_tanh<double>(0.01));
+ CALL_SUBTEST(test_cuda_tanh<double>(0.001));
+ CALL_SUBTEST(test_cuda_lgamma<double>(1.0));
+ CALL_SUBTEST(test_cuda_lgamma<double>(100.0));
+ CALL_SUBTEST(test_cuda_lgamma<double>(0.01));
+ CALL_SUBTEST(test_cuda_lgamma<double>(0.001));
+ CALL_SUBTEST(test_cuda_erf<double>(1.0));
+ CALL_SUBTEST(test_cuda_erf<double>(100.0));
+ CALL_SUBTEST(test_cuda_erf<double>(0.01));
+ CALL_SUBTEST(test_cuda_erf<double>(0.001));
+ CALL_SUBTEST(test_cuda_erfc<double>(1.0));
+ // CALL_SUBTEST(test_cuda_erfc<double>(100.0));
+ CALL_SUBTEST(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
+ CALL_SUBTEST(test_cuda_erfc<double>(0.01));
+ CALL_SUBTEST(test_cuda_erfc<double>(0.001));
}