diff options
author | 2016-04-06 17:11:31 -0700 | |
---|---|---|
committer | 2016-04-06 17:11:31 -0700 | |
commit | 532fdf24cb8e0ec0ee546a8ba57fc3d75f138e9f (patch) | |
tree | 2899042bd90850a0ba2a1499997fd7634d3f1eb6 /Eigen | |
parent | 165150e89677bf1006ee8d3a66891744f228206d (diff) |
Added support for hardware conversion between fp16 and full floats whenever
possible.
Diffstat (limited to 'Eigen')
-rw-r--r-- | Eigen/Core | 5 | ||||
-rw-r--r-- | Eigen/src/Core/arch/CUDA/Half.h | 10 |
2 files changed, 15 insertions, 0 deletions
diff --git a/Eigen/Core b/Eigen/Core index e44819383..1e62f3ec1 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -204,6 +204,11 @@ #endif #endif +#if defined(__F16C__) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C +#endif + #if defined __CUDACC__ #define EIGEN_VECTORIZE_CUDA #include <vector_types.h> diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 916812b61..0638dab5c 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -273,6 +273,12 @@ union FP32 { static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __float2half(ff); + +#elif defined(EIGEN_HAS_FP16_C) + __half h; + h.x = _cvtss_sh(ff, 0); + return h; + #else FP32 f; f.f = ff; @@ -321,6 +327,10 @@ static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) { #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 return __half2float(h); + +#elif defined(EIGEN_HAS_FP16_C) + return _cvtsh_ss(h.x); + #else const FP32 magic = { 113 << 23 }; const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift |