aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/CUDA/TypeCasting.h
diff options
context:
space:
mode:
Diffstat (limited to 'Eigen/src/Core/arch/CUDA/TypeCasting.h')
-rw-r--r--Eigen/src/Core/arch/CUDA/TypeCasting.h25
1 files changed, 2 insertions, 23 deletions
diff --git a/Eigen/src/Core/arch/CUDA/TypeCasting.h b/Eigen/src/Core/arch/CUDA/TypeCasting.h
index b2a9724de..396b38eaf 100644
--- a/Eigen/src/Core/arch/CUDA/TypeCasting.h
+++ b/Eigen/src/Core/arch/CUDA/TypeCasting.h
@@ -71,6 +71,7 @@ struct functor_traits<scalar_cast_op<half, float> >
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
template <>
struct type_casting_traits<half, float> {
@@ -82,22 +83,9 @@ struct type_casting_traits<half, float> {
};
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
float2 r1 = __half22float2(a);
float2 r2 = __half22float2(b);
return make_float4(r1.x, r1.y, r2.x, r2.y);
-#else
- half r1;
- r1.x = a.x & 0xFFFF;
- half r2;
- r2.x = (a.x & 0xFFFF0000) >> 16;
- half r3;
- r3.x = b.x & 0xFFFF;
- half r4;
- r4.x = (b.x & 0xFFFF0000) >> 16;
- return make_float4(static_cast<float>(r1), static_cast<float>(r2),
- static_cast<float>(r3), static_cast<float>(r4));
-#endif
}
template <>
@@ -111,20 +99,11 @@ struct type_casting_traits<float, half> {
template<> EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
// Simply discard the second half of the input
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
return __float22half2_rn(make_float2(a.x, a.y));
-#else
- half r1 = static_cast<half>(a.x);
- half r2 = static_cast<half>(a.y);
- half2 r;
- r.x = 0;
- r.x |= r1.x;
- r.x |= (static_cast<unsigned int>(r2.x) << 16);
- return r;
-#endif
}
#endif
+#endif
} // end namespace internal