From 876f392c396318f33454168db36ed54308e54e0d Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 11 Jul 2018 10:39:54 -0400 Subject: Updates corresponding to the latest round of PR feedback The major changes are 1. Moving CUDA/PacketMath.h to GPU/PacketMath.h 2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h 3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h The above three changes effectively enable the Eigen "Packet" layer for the HIP platform 4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic") 5. Updating the "EIGEN_DEVICE_FUNC" marking in some places The change has been tested on the HIP and CUDA platforms. --- unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h index f009ae855..9966955f7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h @@ -32,8 +32,7 @@ #define gpuGetDeviceCount hipGetDeviceCount #define gpuGetErrorString hipGetErrorString #define gpuGetDeviceProperties hipGetDeviceProperties -// FIXME : use hipStreamDefault instead of 0x00 -#define gpuStreamDefault 0x00 +#define gpuStreamDefault hipStreamDefault #define gpuGetDevice hipGetDevice #define gpuSetDevice hipSetDevice #define gpuMalloc hipMalloc @@ -47,6 +46,7 @@ #define gpuSharedMemConfig hipSharedMemConfig #define gpuDeviceSetSharedMemConfig hipDeviceSetSharedMemConfig #define gpuStreamSynchronize hipStreamSynchronize +#define gpuDeviceSynchronize hipDeviceSynchronize #define gpuMemcpy hipMemcpy #else @@ -73,6 +73,7 @@ #define gpuSharedMemConfig cudaSharedMemConfig #define gpuDeviceSetSharedMemConfig cudaDeviceSetSharedMemConfig #define gpuStreamSynchronize cudaStreamSynchronize +#define gpuDeviceSynchronize cudaDeviceSynchronize #define gpuMemcpy cudaMemcpy #endif -- cgit v1.2.3