aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test/cxx11_tensor_argmax_gpu.cu
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-06-20 16:44:58 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-06-20 16:44:58 -0400
commit1bb6fa99a31d2dcf5431087d3f238e2dcca03084 (patch)
treee62d41b8d6430849aea4bf97785a54488bf542d4 /unsupported/test/cxx11_tensor_argmax_gpu.cu
parentcfdabbcc8f708c06da2bfa4e924edc25619f013a (diff)
merging the CUDA and HIP implementation for the Tensor directory and the unit tests
Diffstat (limited to 'unsupported/test/cxx11_tensor_argmax_gpu.cu')
-rw-r--r--unsupported/test/cxx11_tensor_argmax_gpu.cu90
1 files changed, 46 insertions, 44 deletions
diff --git a/unsupported/test/cxx11_tensor_argmax_gpu.cu b/unsupported/test/cxx11_tensor_argmax_gpu.cu
index 3d73d491a..541a27865 100644
--- a/unsupported/test/cxx11_tensor_argmax_gpu.cu
+++ b/unsupported/test/cxx11_tensor_argmax_gpu.cu
@@ -9,16 +9,18 @@
#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_FUNC cxx11_tensor_cuda
+#define EIGEN_TEST_FUNC cxx11_tensor_gpu
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
+#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
+
using Eigen::Tensor;
template <int Layout>
-void test_cuda_simple_argmax()
+void test_gpu_simple_argmax()
{
Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97));
Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1));
@@ -34,13 +36,13 @@ void test_cuda_simple_argmax()
double* d_in;
DenseIndex* d_out_max;
DenseIndex* d_out_min;
- cudaMalloc((void**)(&d_in), in_bytes);
- cudaMalloc((void**)(&d_out_max), out_bytes);
- cudaMalloc((void**)(&d_out_min), out_bytes);
+ gpuMalloc((void**)(&d_in), in_bytes);
+ gpuMalloc((void**)(&d_out_max), out_bytes);
+ gpuMalloc((void**)(&d_out_min), out_bytes);
- cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice);
+ gpuMemcpy(d_in, in.data(), in_bytes, gpuMemcpyHostToDevice);
- Eigen::CudaStreamDevice stream;
+ Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97));
@@ -50,20 +52,20 @@ void test_cuda_simple_argmax()
gpu_out_max.device(gpu_device) = gpu_in.argmax();
gpu_out_min.device(gpu_device) = gpu_in.argmin();
- assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
- assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
- assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+ assert(gpuMemcpyAsync(out_max.data(), d_out_max, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuMemcpyAsync(out_min.data(), d_out_min, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1);
VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0);
- cudaFree(d_in);
- cudaFree(d_out_max);
- cudaFree(d_out_min);
+ gpuFree(d_in);
+ gpuFree(d_out_max);
+ gpuFree(d_out_min);
}
template <int DataLayout>
-void test_cuda_argmax_dim()
+void test_gpu_argmax_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
std::vector<int> dims;
@@ -97,12 +99,12 @@ void test_cuda_argmax_dim()
float* d_in;
DenseIndex* d_out;
- cudaMalloc((void**)(&d_in), in_bytes);
- cudaMalloc((void**)(&d_out), out_bytes);
+ gpuMalloc((void**)(&d_in), in_bytes);
+ gpuMalloc((void**)(&d_out), out_bytes);
- cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+ gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
- Eigen::CudaStreamDevice stream;
+ Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
@@ -110,8 +112,8 @@ void test_cuda_argmax_dim()
gpu_out.device(gpu_device) = gpu_in.argmax(dim);
- assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
- assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+ assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(tensor_arg.size(),
size_t(2*3*5*7 / tensor.dimension(dim)));
@@ -134,25 +136,25 @@ void test_cuda_argmax_dim()
}
}
- cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+ gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
gpu_out.device(gpu_device) = gpu_in.argmax(dim);
- assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
- assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+ assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
- cudaFree(d_in);
- cudaFree(d_out);
+ gpuFree(d_in);
+ gpuFree(d_out);
}
}
template <int DataLayout>
-void test_cuda_argmin_dim()
+void test_gpu_argmin_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
std::vector<int> dims;
@@ -186,12 +188,12 @@ void test_cuda_argmin_dim()
float* d_in;
DenseIndex* d_out;
- cudaMalloc((void**)(&d_in), in_bytes);
- cudaMalloc((void**)(&d_out), out_bytes);
+ gpuMalloc((void**)(&d_in), in_bytes);
+ gpuMalloc((void**)(&d_out), out_bytes);
- cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+ gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
- Eigen::CudaStreamDevice stream;
+ Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7));
@@ -199,8 +201,8 @@ void test_cuda_argmin_dim()
gpu_out.device(gpu_device) = gpu_in.argmin(dim);
- assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
- assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+ assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(tensor_arg.size(),
2*3*5*7 / tensor.dimension(dim));
@@ -223,29 +225,29 @@ void test_cuda_argmin_dim()
}
}
- cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice);
+ gpuMemcpy(d_in, tensor.data(), in_bytes, gpuMemcpyHostToDevice);
gpu_out.device(gpu_device) = gpu_in.argmin(dim);
- assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
- assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+ assert(gpuMemcpyAsync(tensor_arg.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
+ assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
// Expect max to be in the last index of the reduced dimension
VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
}
- cudaFree(d_in);
- cudaFree(d_out);
+ gpuFree(d_in);
+ gpuFree(d_out);
}
}
-void test_cxx11_tensor_cuda()
+void test_cxx11_tensor_gpu()
{
- CALL_SUBTEST_1(test_cuda_simple_argmax<RowMajor>());
- CALL_SUBTEST_1(test_cuda_simple_argmax<ColMajor>());
- CALL_SUBTEST_2(test_cuda_argmax_dim<RowMajor>());
- CALL_SUBTEST_2(test_cuda_argmax_dim<ColMajor>());
- CALL_SUBTEST_3(test_cuda_argmin_dim<RowMajor>());
- CALL_SUBTEST_3(test_cuda_argmin_dim<ColMajor>());
+ CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>());
+ CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>());
+ CALL_SUBTEST_2(test_gpu_argmax_dim<RowMajor>());
+ CALL_SUBTEST_2(test_gpu_argmax_dim<ColMajor>());
+ CALL_SUBTEST_3(test_gpu_argmin_dim<RowMajor>());
+ CALL_SUBTEST_3(test_gpu_argmin_dim<ColMajor>());
}