diff options
-rw-r--r-- | Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 12 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 4 |
2 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index be0e2bdf2..138881996 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -212,8 +212,8 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2& float a2 = __high2float(a); float b1 = __low2float(b); float b2 = __high2float(b); - half r1 = a1 < b1 ? __low2half(a) : __low2half(b); - half r2 = a2 < b2 ? __high2half(a) : __high2half(b); + __half r1 = a1 < b1 ? __low2half(a) : __low2half(b); + __half r2 = a2 < b2 ? __high2half(a) : __high2half(b); return __halves2half2(r1, r2); } @@ -222,8 +222,8 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2& float a2 = __high2float(a); float b1 = __low2float(b); float b2 = __high2float(b); - half r1 = a1 > b1 ? __low2half(a) : __low2half(b); - half r2 = a2 > b2 ? __high2half(a) : __high2half(b); + __half r1 = a1 > b1 ? __low2half(a) : __low2half(b); + __half r2 = a2 > b2 ? __high2half(a) : __high2half(b); return __halves2half2(r1, r2); } @@ -233,7 +233,7 @@ template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) { #else float a1 = __low2float(a); float a2 = __high2float(a); - return half(__float2half_rn(a1 + a2)); + return half(internal::raw_uint16_to_half(__float2half_rn(a1 + a2))); #endif } @@ -267,7 +267,7 @@ template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) { #else float a1 = __low2float(a); float a2 = __high2float(a); - return half(__float2half_rn(a1 * a2)); + return half(internal::raw_uint16_to_half(__float2half_rn(a1 * a2))); #endif } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index ae4ce3c90..31b361c83 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -129,6 +129,10 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double loadConstant(const double* address) { return __ldg(address); } +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +Eigen::half loadConstant(const Eigen::half* address) { + return Eigen::half(internal::raw_uint16_to_half(__ldg(&address->x))); +} #endif } |