aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/GPU
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2020-03-11 23:06:56 +0000
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2020-03-12 01:06:24 +0000
commit7158ed4e0e34d40cd0f358a3bf69a5c30d8d0f83 (patch)
tree6ee1f2ce81b3e442210564b283fdf9e953ff0306 /Eigen/src/Core/arch/GPU
parentd53ae40f7bcfb948b85b893acf305cdebcba3ba8 (diff)
Fixing HIP breakage caused by the recent commit that introduces Packet4h2 as the Eigen::Half packet type
Diffstat (limited to 'Eigen/src/Core/arch/GPU')
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h11
1 files changed, 6 insertions, 5 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index 1f6a562c5..dd4e77d3a 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -481,7 +481,7 @@ ptranspose(PacketBlock<double2,2>& kernel) {
// Packet4h2 must be defined in the macro without EIGEN_CUDA_ARCH, meaning
// its corresponding packet_traits<Eigen::half> must be visible on host.
#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC)) || \
- (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIPCC) && defined(EIGEN_HIP_DEVICE_COMPILE)) || \
+ (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIPCC)) || \
(defined(EIGEN_HAS_CUDA_FP16) && defined(__clang__) && defined(__CUDA__))
typedef ulonglong2 Packet4h2;
@@ -515,11 +515,13 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
-#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIP_DEVICE_COMPILE)
+#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIPCC)
half2 r;
r.x = from;
r.y = from;
return r;
+#elif defined(EIGEN_HIPCC)
+ return __half2{from,from};
#else
return __half2half2(from);
#endif
@@ -537,7 +539,7 @@ pset1<Packet4h2>(const Eigen::half& from) {
return r;
}
-#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIP_DEVICE_COMPILE) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
+#if defined(EIGEN_CUDA_ARCH) || defined(EIGEN_HIPCC) || (defined(EIGEN_CUDACC) && EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC)
namespace {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) {
@@ -559,7 +561,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
const half2& from) {
-#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIP_DEVICE_COMPILE)
+#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIPCC)
to[0] = from.x;
to[1] = from.y;
#else
@@ -1056,7 +1058,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt(const half2& a) {
#endif
} // namespace
-
template <>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2
pload<Packet4h2>(const Eigen::half* from) {