From 1bb6fa99a31d2dcf5431087d3f238e2dcca03084 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 20 Jun 2018 16:44:58 -0400 Subject: merging the CUDA and HIP implementation for the Tensor directory and the unit tests --- unsupported/test/cxx11_tensor_scan_gpu.cu | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'unsupported/test/cxx11_tensor_scan_gpu.cu') diff --git a/unsupported/test/cxx11_tensor_scan_gpu.cu b/unsupported/test/cxx11_tensor_scan_gpu.cu index 1d4edef11..51cd3a3cf 100644 --- a/unsupported/test/cxx11_tensor_scan_gpu.cu +++ b/unsupported/test/cxx11_tensor_scan_gpu.cu @@ -9,19 +9,20 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_scan_cuda +#define EIGEN_TEST_FUNC cxx11_tensor_scan_gpu #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU #include "main.h" #include +#include using Eigen::Tensor; typedef Tensor::DimensionPair DimPair; template -void test_cuda_cumsum(int m_size, int k_size, int n_size) +void test_gpu_cumsum(int m_size, int k_size, int n_size) { std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; Tensor t_input(m_size, k_size, n_size); @@ -36,12 +37,12 @@ void test_cuda_cumsum(int m_size, int k_size, int n_size) float* d_t_input; float* d_t_result; - cudaMalloc((void**)(&d_t_input), t_input_bytes); - cudaMalloc((void**)(&d_t_result), t_result_bytes); + gpuMalloc((void**)(&d_t_input), t_input_bytes); + gpuMalloc((void**)(&d_t_result), t_result_bytes); - cudaMemcpy(d_t_input, t_input.data(), t_input_bytes, cudaMemcpyHostToDevice); + gpuMemcpy(d_t_input, t_input.data(), t_input_bytes, gpuMemcpyHostToDevice); - Eigen::CudaStreamDevice stream; + Eigen::GpuStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap > @@ -52,7 +53,7 @@ void test_cuda_cumsum(int m_size, int k_size, int n_size) gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1); t_result = t_input.cumsum(1); - cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + gpuMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, gpuMemcpyDeviceToHost); for (DenseIndex i = 0; i < t_result.size(); i++) { if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) { continue; @@ -65,13 +66,13 @@ void test_cuda_cumsum(int m_size, int k_size, int n_size) assert(false); } - cudaFree((void*)d_t_input); - cudaFree((void*)d_t_result); + gpuFree((void*)d_t_input); + gpuFree((void*)d_t_result); } -void test_cxx11_tensor_scan_cuda() +void test_cxx11_tensor_scan_gpu() { - CALL_SUBTEST_1(test_cuda_cumsum(128, 128, 128)); - CALL_SUBTEST_2(test_cuda_cumsum(128, 128, 128)); + CALL_SUBTEST_1(test_gpu_cumsum(128, 128, 128)); + CALL_SUBTEST_2(test_gpu_cumsum(128, 128, 128)); } -- cgit v1.2.3