aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h30
1 files changed, 20 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index a38af84d5..36718e26f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -109,6 +109,24 @@ struct TensorEvaluator
const Device& m_device;
};
+namespace {
+template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T loadConstant(const T* address) {
+ return *address;
+}
+// Use the texture cache on CUDA devices whenever possible
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float loadConstant(const float* address) {
+ return __ldg(address);
+}
+template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double loadConstant(const double* address) {
+ return __ldg(address);
+}
+#endif
+}
+
// Default evaluator for rvalues
template<typename Derived, typename Device>
@@ -150,11 +168,7 @@ struct TensorEvaluator<const Derived, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
eigen_assert(m_data);
-#ifdef __CUDA_ARCH__
- return __ldg(m_data+index);
-#else
- return m_data[index];
-#endif
+ return loadConstant(m_data+index);
}
template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -167,11 +181,7 @@ struct TensorEvaluator<const Derived, Device>
eigen_assert(m_data);
const Index index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_dims.IndexOfColMajor(coords)
: m_dims.IndexOfRowMajor(coords);
-#ifdef __CUDA_ARCH__
- return __ldg(m_data+index);
-#else
- return m_data[index];
-#endif
+ return loadConstant(m_data+index);
}
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }