aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-07-11 10:39:54 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-07-11 10:39:54 -0400
commit876f392c396318f33454168db36ed54308e54e0d (patch)
treea727bc91873b5c0aeec05312176a0f39e2cb64d5 /unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
parent1fe0b749042320501c59378f2860d9322b0c6e19 (diff)
Updates corresponding to the latest round of PR feedback
The major changes are 1. Moving CUDA/PacketMath.h to GPU/PacketMath.h 2. Moving CUDA/MathFunctions.h to GPU/MathFunction.h 3. Moving CUDA/CudaSpecialFunctions.h to GPU/GpuSpecialFunctions.h The above three changes effectively enable the Eigen "Packet" layer for the HIP platform 4. Merging the "hip_basic" and "cuda_basic" unit tests into one ("gpu_basic") 5. Updating the "EIGEN_DEVICE_FUNC" marking in some places The change has been tested on the HIP and CUDA platforms.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h5
1 files changed, 3 insertions, 2 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
index f009ae855..9966955f7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h
@@ -32,8 +32,7 @@
#define gpuGetDeviceCount hipGetDeviceCount
#define gpuGetErrorString hipGetErrorString
#define gpuGetDeviceProperties hipGetDeviceProperties
-// FIXME : use hipStreamDefault instead of 0x00
-#define gpuStreamDefault 0x00
+#define gpuStreamDefault hipStreamDefault
#define gpuGetDevice hipGetDevice
#define gpuSetDevice hipSetDevice
#define gpuMalloc hipMalloc
@@ -47,6 +46,7 @@
#define gpuSharedMemConfig hipSharedMemConfig
#define gpuDeviceSetSharedMemConfig hipDeviceSetSharedMemConfig
#define gpuStreamSynchronize hipStreamSynchronize
+#define gpuDeviceSynchronize hipDeviceSynchronize
#define gpuMemcpy hipMemcpy
#else
@@ -73,6 +73,7 @@
#define gpuSharedMemConfig cudaSharedMemConfig
#define gpuDeviceSetSharedMemConfig cudaDeviceSetSharedMemConfig
#define gpuStreamSynchronize cudaStreamSynchronize
+#define gpuDeviceSynchronize cudaDeviceSynchronize
#define gpuMemcpy cudaMemcpy
#endif