aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Antonio Sanchez <cantonios@google.com>2021-03-08 21:06:28 -0800
committerGravatar Antonio Sanchez <cantonios@google.com>2021-03-08 21:06:28 -0800
commit853a5c4b843a3f1de5de2a25429eefd62dbd153a (patch)
treed7c0d21527edd924a1cd23e5300d3d6d00adbea7 /Eigen
parent94327dbfba1a8c39149ddcb262f6939ade4f5910 (diff)
Fix ambiguous call to CUDA __half constructor.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/Default/Half.h12
1 files changed, 8 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h
index 3eb91a091..eb3030aa7 100644
--- a/Eigen/src/Core/arch/Default/Half.h
+++ b/Eigen/src/Core/arch/Default/Half.h
@@ -848,19 +848,23 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::half>(c
#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 90000
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_sync(unsigned mask, Eigen::half var, int srcLane, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_sync(mask, static_cast<__half>(var), srcLane, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_sync(mask, h, srcLane, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_up_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_up_sync(mask, static_cast<__half>(var), delta, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_up_sync(mask, h, delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_down_sync(unsigned mask, Eigen::half var, unsigned int delta, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_down_sync(mask, static_cast<__half>(var), delta, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_down_sync(mask, h, delta, width));
}
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor_sync(unsigned mask, Eigen::half var, int laneMask, int width=warpSize) {
- return static_cast<Eigen::half>(__shfl_xor_sync(mask, static_cast<__half>(var), laneMask, width));
+ const __half h = var;
+ return static_cast<Eigen::half>(__shfl_xor_sync(mask, h, laneMask, width));
}
#else // HIP or CUDA SDK < 9.0