aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-06 13:32:38 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2015-07-06 13:32:38 -0700
commit81f9e968fd390b003bda9b1373fefd2c4426c453 (patch)
treebc4c67461a7758df5829cd2b1224d34fb6118b20 /unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
parent864318e508b46535ce8f97abb5324a30386355d1 (diff)
Only attempt to use the texture path on GPUs when it's supported by CUDA
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h30
1 files changed, 20 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index a38af84d5..36718e26f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -109,6 +109,24 @@ struct TensorEvaluator
const Device& m_device;
};
+namespace {
+template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T loadConstant(const T* address) {
+ return *address;
+}
+// Use the texture cache on CUDA devices whenever possible
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float loadConstant(const float* address) {
+ return __ldg(address);
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double loadConstant(const double* address) {
+ return __ldg(address);
+}
+#endif
+}
+
// Default evaluator for rvalues
template<typename Derived, typename Device>
@@ -150,11 +168,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data);
-#ifdef __CUDA_ARCH__
- return __ldg(m_data+index);
-#else
- return m_data[index];
-#endif
+ return loadConstant(m_data+index);
}
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -167,11 +181,7 @@ struct TensorEvaluator<const Derived, Device>
eigen_assert(m_data);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords);
-#ifdef __CUDA_ARCH__
- return __ldg(m_data+index);
-#else
- return m_data[index];
-#endif
+ return loadConstant(m_data+index);
}
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }