From 81f9e968fd390b003bda9b1373fefd2c4426c453 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Jul 2015 13:32:38 -0700 Subject: Only attempt to use the texture path on GPUs when it's supported by CUDA --- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 30 ++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index a38af84d5..36718e26f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -109,6 +109,24 @@ struct TensorEvaluator const Device& m_device; }; +namespace { +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T loadConstant(const T* address) { + return *address; +} +// Use the texture cache on CUDA devices whenever possible +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float loadConstant(const float* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double loadConstant(const double* address) { + return __ldg(address); +} +#endif +} + // Default evaluator for rvalues template @@ -150,11 +168,7 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { eigen_assert(m_data); -#ifdef __CUDA_ARCH__ - return __ldg(m_data+index); -#else - return m_data[index]; -#endif + return loadConstant(m_data+index); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -167,11 +181,7 @@ struct TensorEvaluator eigen_assert(m_data); const Index index = (static_cast(Layout) == static_cast(ColMajor)) ? m_dims.IndexOfColMajor(coords) : m_dims.IndexOfRowMajor(coords); -#ifdef __CUDA_ARCH__ - return __ldg(m_data+index); -#else - return m_data[index]; -#endif + return loadConstant(m_data+index); } EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } -- cgit v1.2.3