From c36ab1990247a5b60bcad564759e8903f30fbab5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 31 Mar 2016 10:55:03 -0700 Subject: Added __ldg primitive for fp16. --- Eigen/src/Core/arch/CUDA/Half.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Eigen') diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index dc7119c06..a2a2bac37 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -423,7 +423,14 @@ using ::ceil; __device__ inline Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) { return static_cast(__shfl_xor(static_cast(var), laneMask, width)); } +#endif +// ldg() has an overload for __half, but we also need one for Eigen::half. +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 320 +static inline EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { + return Eigen::internal::raw_uint16_to_half( + __ldg(reinterpret_cast(ptr))); +} #endif -- cgit v1.2.3