aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2020-12-04 15:33:19 -0800
committerGravatar Antonio Sanchez <cantonios@google.com>2020-12-04 16:14:03 -0800
commit5ec4907434742d4555df4aa708b665868b88f3b4 (patch)
tree8daaa2e5b85d649d81ada96be333520c23a09e64 /Eigen
parentf9fac1d5b044afb8104ecb5c1e5183c309bafca4 (diff)
Clean up `#if`s in GPU PacketPath.
Removed redundant checks and redundant code for CUDA/HIP. Note: there are several issues here of calling `__device__` functions from `__host__ __device__` functions, in particular `__low2half`. We do not address that here -- only modifying this file enough to get our current tests to compile. Fixed: #1847
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/GPU/PacketMath.h185
1 files changed, 24 insertions, 161 deletions
diff --git a/Eigen/src/Core/arch/GPU/PacketMath.h b/Eigen/src/Core/arch/GPU/PacketMath.h
index dd4e77d3a..fb32c98ac 100644
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@@ -480,9 +480,7 @@ ptranspose(PacketBlock<double2,2>& kernel) {
// Packet4h2 must be defined in the macro without EIGEN_CUDA_ARCH, meaning
// its corresponding packet_traits<Eigen::half> must be visible on host.
-#if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC)) || \
- (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIPCC)) || \
- (defined(EIGEN_HAS_CUDA_FP16) && defined(__clang__) && defined(__CUDA__))
+#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16)
typedef ulonglong2 Packet4h2;
template<> struct unpacket_traits<Packet4h2> { typedef Eigen::half type; enum {size=8, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4h2 half; };
@@ -515,13 +513,8 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
-#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIPCC)
- half2 r;
- r.x = from;
- r.y = from;
- return r;
-#elif defined(EIGEN_HIPCC)
- return __half2{from,from};
+#if defined(EIGEN_HIPCC)
+ return half2half2(from);
#else
return __half2half2(from);
#endif
@@ -561,50 +554,33 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstore(Eigen::half* to,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu(Eigen::half* to,
const half2& from) {
-#if !defined(EIGEN_CUDA_ARCH) && !defined(EIGEN_HIPCC)
- to[0] = from.x;
- to[1] = from.y;
-#else
+#if defined(EIGEN_GPU_COMPILE_PHASE)
to[0] = __low2half(from);
to[1] = __high2half(from);
+#else
+ // Unfortunately __low2half and __high2half are only __device__ functions.
+ to[0] = __float2half(__low2float(from));
+ to[1] = __float2half(__high2float(from));
#endif
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_aligned(
const Eigen::half* from) {
-
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
return __ldg((const half2*)from);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 350
- return __ldg((const half2*)from);
#else
return __halves2half2(*(from+0), *(from+1));
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro_unaligned(
const Eigen::half* from) {
-
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
return __halves2half2(__ldg(from+0), __ldg(from+1));
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 350
- return __halves2half2(__ldg(from+0), __ldg(from+1));
#else
return __halves2half2(*(from+0), *(from+1));
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pgather(const Eigen::half* from,
@@ -651,20 +627,12 @@ ptranspose(PacketBlock<half2,2>& kernel) {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plset(const Eigen::half& a) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __halves2half2(a, __hadd(a, __float2half(1.0f)));
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __halves2half2(a, __hadd(a, __float2half(1.0f)));
#else
float f = __half2float(a) + 1.0f;
return __halves2half2(a, __float2half(f));
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pselect(const half2& mask,
@@ -749,13 +717,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pandnot(const half2& a,
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
const half2& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hadd2(a, b);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hadd2(a, b);
#else
float a1 = __low2float(a);
@@ -766,19 +728,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd(const half2& a,
float r2 = a2 + b2;
return __floats2half2_rn(r1, r2);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
const half2& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hsub2(a, b);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hsub2(a, b);
#else
float a1 = __low2float(a);
@@ -789,39 +743,23 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 psub(const half2& a,
float r2 = a2 - b2;
return __floats2half2_rn(r1, r2);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pnegate(const half2& a) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hneg2(a);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hneg2(a);
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return __floats2half2_rn(-a1, -a2);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pconj(const half2& a) { return a; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
const half2& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hmul2(a, b);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hmul2(a, b);
#else
float a1 = __low2float(a);
@@ -832,20 +770,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul(const half2& a,
float r2 = a2 * b2;
return __floats2half2_rn(r1, r2);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
const half2& b,
const half2& c) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hfma2(a, b, c);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hfma2(a, b, c);
#else
float a1 = __low2float(a);
@@ -858,18 +788,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& a,
float r2 = a2 * b2 + c2;
return __floats2half2_rn(r1, r2);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
const half2& b) {
#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
return __h2div(a, b);
-
#else // EIGEN_CUDA_ARCH
-
float a1 = __low2float(a);
float a2 = __high2float(a);
float b1 = __low2float(b);
@@ -877,7 +802,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a,
float r1 = a1 / b1;
float r2 = a2 / b2;
return __floats2half2_rn(r1, r2);
-
#endif
}
@@ -904,33 +828,17 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax(const half2& a,
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux(const half2& a) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hadd(__low2half(a), __high2half(a));
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hadd(__low2half(a), __high2half(a));
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return Eigen::half(__float2half(a1 + a2));
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- __half first = __low2half(a);
- __half second = __high2half(a);
- return __hgt(first, second) ? first : second;
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
__half first = __low2half(a);
__half second = __high2half(a);
return __hgt(first, second) ? first : second;
@@ -939,20 +847,10 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_max(const half2& a) {
float a2 = __high2float(a);
return a1 > a2 ? __low2half(a) : __high2half(a);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- __half first = __low2half(a);
- __half second = __high2half(a);
- return __hlt(first, second) ? first : second;
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
__half first = __low2half(a);
__half second = __high2half(a);
return __hlt(first, second) ? first : second;
@@ -961,26 +859,16 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_min(const half2& a) {
float a2 = __high2float(a);
return a1 < a2 ? __low2half(a) : __high2half(a);
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(const half2& a) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hmul(__low2half(a), __high2half(a));
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hmul(__low2half(a), __high2half(a));
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
return Eigen::half(__float2half(a1 * a2));
#endif
-
-#endif
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) {
@@ -1108,14 +996,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<Eigen::half>(
template <>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet4h2
ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- Packet4h2 r;
- r = __ldg((const Packet4h2*)from);
- return r;
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 350
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 350
Packet4h2 r;
r = __ldg((const Packet4h2*)from);
return r;
@@ -1128,8 +1009,6 @@ ploadt_ro<Packet4h2, Aligned>(const Eigen::half* from) {
r_alias[3] = ploadt_ro_aligned(from + 6);
return r;
#endif
-
-#endif
}
template <>
@@ -1294,7 +1173,7 @@ ptranspose(PacketBlock<Packet4h2,8>& kernel) {
ptranspose_half(f_row0[1], f_row1[1]);
ptranspose_half(f_row2[0], f_row3[0]);
ptranspose_half(f_row2[1], f_row3[1]);
-
+
}
template <>
@@ -1685,13 +1564,7 @@ prsqrt<Packet4h2>(const Packet4h2& a) {
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
const half2& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hadd2(a, b);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hadd2(a, b);
#else
float a1 = __low2float(a);
@@ -1702,20 +1575,12 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 padd<half2>(const half2& a,
float r2 = a2 + b2;
return __floats2half2_rn(r1, r2);
#endif
-
-#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
const half2& b) {
-#if defined(EIGEN_HIP_DEVICE_COMPILE)
-
- return __hmul2(a, b);
-
-#else // EIGEN_CUDA_ARCH
-
-#if EIGEN_CUDA_ARCH >= 530
+#if defined(EIGEN_HIP_DEVICE_COMPILE) || EIGEN_CUDA_ARCH >= 530
return __hmul2(a, b);
#else
float a1 = __low2float(a);
@@ -1726,8 +1591,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmul<half2>(const half2& a,
float r2 = a2 * b2;
return __floats2half2_rn(r1, r2);
#endif
-
-#endif
}
template<>