aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-06 17:11:31 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-06 17:11:31 -0700
commit532fdf24cb8e0ec0ee546a8ba57fc3d75f138e9f (patch)
tree2899042bd90850a0ba2a1499997fd7634d3f1eb6 /Eigen
parent165150e89677bf1006ee8d3a66891744f228206d (diff)
Added support for hardware conversion between fp16 and full floats whenever
possible.
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/Core5
-rw-r--r--Eigen/src/Core/arch/CUDA/Half.h10
2 files changed, 15 insertions, 0 deletions
diff --git a/Eigen/Core b/Eigen/Core
index e44819383..1e62f3ec1 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -204,6 +204,11 @@
#endif
#endif
+#if defined(__F16C__)
+ // We can use the optimized fp16 to float and float to fp16 conversion routines
+ #define EIGEN_HAS_FP16_C
+#endif
+
#if defined __CUDACC__
#define EIGEN_VECTORIZE_CUDA
#include <vector_types.h>
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 916812b61..0638dab5c 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -273,6 +273,12 @@ union FP32 {
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float2half(ff);
+
+#elif defined(EIGEN_HAS_FP16_C)
+ __half h;
+ h.x = _cvtss_sh(ff, 0);
+ return h;
+
#else
FP32 f; f.f = ff;
@@ -321,6 +327,10 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff)
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __half2float(h);
+
+#elif defined(EIGEN_HAS_FP16_C)
+ return _cvtsh_ss(h.x);
+
#else
const FP32 magic = { 113 << 23 };
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift