aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-12-21 08:42:58 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-12-21 08:42:58 -0800
commit51be91f15e745fbcc7b1b3584b2d0b947a500272 (patch)
tree2cf8a0ad095909c85fa2eb30210a4f351798e6fe /Eigen
parent6d777e1bc7d31023ad78c84777847896ab31927d (diff)
Added support for CUDA architectures that don's support for 3.5 capabilities
Diffstat (limited to 'Eigen')
-rw-r--r--Eigen/src/Core/arch/CUDA/PacketMath.h18
1 files changed, 16 insertions, 2 deletions
diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h
index cb1b547e0..4495b3741 100644
--- a/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -183,25 +183,39 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pstoreu<double>(double* to
to[1] = from.y;
}
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Aligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const float4*)from);
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Aligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return __ldg((const double2*)from);
+#else
+ return make_float2(from[0], from[1]);
+#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float4 ploadt_ro<float4, Unaligned>(const float* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return make_float4(__ldg(from+0), __ldg(from+1), __ldg(from+2), __ldg(from+3));
+#else
+ return make_float4(from[0], from[1], from[2], from[3]);
+#endif
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double2 ploadt_ro<double2, Unaligned>(const double* from) {
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
return make_double2(__ldg(from+0), __ldg(from+1));
-}
+#else
+ return make_float2(from[0], from[1]);
#endif
+}
template<> EIGEN_DEVICE_FUNC inline float4 pgather<float, float4>(const float* from, Index stride) {
return make_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);