aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-31 10:55:03 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-31 10:55:03 -0700
commitc36ab1990247a5b60bcad564759e8903f30fbab5 (patch)
tree5a757092e592083486be35802dbecfa2c75afcf1 /Eigen
parentb575fb1d02f7a98c94a576284fbcd4ff85970120 (diff)
Added __ldg primitive for fp16.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h7
1 files changed, 7 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index dc7119c06..a2a2bac37 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -423,7 +423,14 @@ using ::ceil;
__device__ inline Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
}
+#endif
+// ldg() has an overload for __half, but we also need one for Eigen::half.
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320
+static inline EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
+ return Eigen::internal::raw_uint16_to_half(
+ __ldg(reinterpret_cast<const unsigned short*>(ptr)));
+}
#endif