diff options
author | RJ Ryan <rjryan@google.com> | 2016-09-20 07:18:20 -0700 |
---|---|---|
committer | RJ Ryan <rjryan@google.com> | 2016-09-20 07:18:20 -0700 |
commit | b2c6dc48d9189eb96f878aa6028aec245eadde85 (patch) | |
tree | d50f0abc9a8873616bea6c0a8a62c4a07fae7c10 /unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu | |
parent | 8a66ca4b100577e5a38082d47a1ffc0183574046 (diff) |
Add CUDA-specific std::complex<T> specializations for scalar_sum_op, scalar_difference_op, scalar_product_op, and scalar_quotient_op.
Diffstat (limited to 'unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu')
-rw-r--r-- | unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu new file mode 100644 index 000000000..54c17ca28 --- /dev/null +++ b/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename T> +void test_cuda_complex_cwise_ops() { + const int kNumItems = 2; + std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>); + + std::complex<T>* d_in1; + std::complex<T>* d_in2; + std::complex<T>* d_out; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out), complex_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out( + d_out, kNumItems); + + const std::complex<T> a(3.14f, 2.7f); + const std::complex<T> b(-10.6f, 1.4f); + + gpu_in1.device(gpu_device) = gpu_in1.constant(a); + gpu_in2.device(gpu_device) = gpu_in2.constant(b); + + enum CwiseOp { + Add, + Sub, + Mul, + Div + }; + + Tensor<std::complex<T>, 1, 0, int> actual(2); + for (CwiseOp op : {Add, Sub, Mul, Div}) { + std::complex<T> expected; + switch (op) { + case Add: + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + expected = a + b; + break; + case Sub: + gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; + expected = a - b; + break; + case Mul: + gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; + expected = a * b; + break; + case Div: + gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; + expected = a / b; + break; + } + assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < kNumItems; ++i) { + VERIFY_IS_APPROX(actual(i), expected); + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); +} + + +void test_cxx11_tensor_complex_cwise_ops() +{ + CALL_SUBTEST(test_cuda_complex_cwise_ops<float>()); + CALL_SUBTEST(test_cuda_complex_cwise_ops<double>()); +} |