diff options
author | Sami Kama <sami.kama.git@gmail.com> | 2020-03-10 20:28:43 +0000 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2020-03-10 20:28:43 +0000 |
commit | b733b8b680885c0fcdfddea5423171468609b5a6 (patch) | |
tree | 1174a4651bbdbe979a8bd33e97edf4011c8cc7e4 /Eigen/src/Core/arch/GPU/TypeCasting.h | |
parent | a45d28256d020a4e871267c9bf00206fe9d2265e (diff) |
remove duplicate pset1 for half and add some comments about why we need expose pmul/add/div/min/max on host
Diffstat (limited to 'Eigen/src/Core/arch/GPU/TypeCasting.h')
-rw-r--r-- | Eigen/src/Core/arch/GPU/TypeCasting.h | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/Eigen/src/Core/arch/GPU/TypeCasting.h b/Eigen/src/Core/arch/GPU/TypeCasting.h index c278f3fe8..754546225 100644 --- a/Eigen/src/Core/arch/GPU/TypeCasting.h +++ b/Eigen/src/Core/arch/GPU/TypeCasting.h @@ -17,12 +17,13 @@ namespace internal { #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \ (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE)) + template <> struct type_casting_traits<Eigen::half, float> { enum { VectorizedCast = 1, - SrcCoeffRatio = 2, - TgtCoeffRatio = 1 + SrcCoeffRatio = 1, + TgtCoeffRatio = 2 }; }; @@ -32,15 +33,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(con return make_float4(r1.x, r1.y, r2.x, r2.y); } + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) { + Packet4h2 r; + half2* r_alias=reinterpret_cast<half2*>(&r); + r_alias[0]=__floats2half2_rn(a.x,a.y); + r_alias[1]=__floats2half2_rn(a.z,a.w); + r_alias[2]=__floats2half2_rn(b.x,b.y); + r_alias[3]=__floats2half2_rn(b.z,b.w); + return r; +} + template <> struct type_casting_traits<float, Eigen::half> { enum { VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 2 + SrcCoeffRatio = 2, + TgtCoeffRatio = 1 }; }; +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) { + // Simply discard the second half of the input + float4 r; + const half2* a_alias=reinterpret_cast<const half2*>(&a); + float2 r1 = __half22float2(a_alias[0]); + float2 r2 = __half22float2(a_alias[1]); + r.x=static_cast<float>(r1.x); + r.y=static_cast<float>(r1.y); + r.z=static_cast<float>(r2.x); + r.w=static_cast<float>(r2.y); + return r; +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { // Simply discard the second half of the input return __floats2half2_rn(a.x, a.y); |