aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMathHalf.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h4
2 files changed, 10 insertions, 6 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index be0e2bdf2..138881996 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -212,8 +212,8 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmin<half2>(const half2&
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
- half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
- half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
+ __half r1 = a1 < b1 ? __low2half(a) : __low2half(b);
+ __half r2 = a2 < b2 ? __high2half(a) : __high2half(b);
return __halves2half2(r1, r2);
}
@@ -222,8 +222,8 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmax<half2>(const half2&
float a2 = __high2float(a);
float b1 = __low2float(b);
float b2 = __high2float(b);
- half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
- half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
+ __half r1 = a1 > b1 ? __low2half(a) : __low2half(b);
+ __half r2 = a2 > b2 ? __high2half(a) : __high2half(b);
return __halves2half2(r1, r2);
}
@@ -233,7 +233,7 @@ template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
- return half(__float2half_rn(a1 + a2));
+ return half(internal::raw_uint16_to_half(__float2half_rn(a1 + a2)));
#endif
}
@@ -267,7 +267,7 @@ template<> EIGEN_DEVICE_FUNC inline half predux_mul<half2>(const half2& a) {
#else
float a1 = __low2float(a);
float a2 = __high2float(a);
- return half(__float2half_rn(a1 * a2));
+ return half(internal::raw_uint16_to_half(__float2half_rn(a1 * a2)));
#endif
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index ae4ce3c90..31b361c83 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -129,6 +129,10 @@ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double loadConstant(const double* address) {
return __ldg(address);
}
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+Eigen::half loadConstant(const Eigen::half* address) {
+ return Eigen::half(internal::raw_uint16_to_half(__ldg(&address->x)));
+}
#endif
}