aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-14 21:25:06 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-04-14 21:25:06 -0700
commita62e9246562970f384f316c66d17c6ed4bd2a55a (patch)
treea03410839d3852c7194720518232809fe195f98d
parent18e6f67426139e1bba34c49ed0935b7cc1e1f379 (diff)
Added ability to access the cache sizes from the tensor devices
-rw-r--r--Eigen/src/Core/products/GeneralBlockPanelKernel.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h20
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h9
3 files changed, 39 insertions, 0 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 54e118395..267ac1de9 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -2224,6 +2224,16 @@ inline std::ptrdiff_t l2CacheSize()
return l2;
}
+/** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
+rs.
+* \sa setCpuCacheSize */
+inline std::ptrdiff_t l3CacheSize()
+{
+ std::ptrdiff_t l1, l2, l3;
+ internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+ return l3;
+}
+
/** Set the cpu L1 and L2 cache sizes (in bytes).
* These values are use to adjust the size of the blocks
* for the algorithms working per blocks.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index 267f6f8e3..9d141395b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -44,6 +44,26 @@ struct DefaultDevice {
#endif
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+#ifndef __CUDA_ARCH__
+ // Running on the host CPU
+ return l1CacheSize();
+#else
+ // Running on a CUDA device, return the amount of shared memory available.
+ return 48*1024;
+#endif
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+#ifndef __CUDA_ARCH__
+ // Running single threaded on the host CPU
+ return l3CacheSize();
+#else
+ // Running on a CUDA device
+ return firstLevelCacheSize();
+#endif
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
#ifndef __CUDA_ARCH__
// Running single threaded on the host CPU
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 6da16985f..41918eb19 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -128,6 +128,15 @@ struct ThreadPoolDevice {
return num_threads_;
}
+ EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+ return l1CacheSize();
+ }
+
+ EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+ // The l3 cache size is shared between all the cores.
+ return l3CacheSize() / num_threads_;
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
// Should return an enum that encodes the ISA supported by the CPU
return 1;