diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-03-31 10:55:03 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-03-31 10:55:03 -0700 |
commit | c36ab1990247a5b60bcad564759e8903f30fbab5 (patch) | |
tree | 5a757092e592083486be35802dbecfa2c75afcf1 | |
parent | b575fb1d02f7a98c94a576284fbcd4ff85970120 (diff) |
Added __ldg primitive for fp16.
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Half.h | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index dc7119c06..a2a2bac37 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -423,7 +423,14 @@ using ::ceil; __device__ inline Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width)); } +#endif +// ldg() has an overload for __half, but we also need one for Eigen::half. +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320 +static inline EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { + return Eigen::internal::raw_uint16_to_half( + __ldg(reinterpret_cast<const unsigned short*>(ptr))); +} #endif |