Added ability to access the cache sizes from the tensor devices

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-04-14 21:25:06 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-04-14 21:25:06 -0700
commit: a62e9246562970f384f316c66d17c6ed4bd2a55a (patch)
tree: a03410839d3852c7194720518232809fe195f98d
parent: 18e6f67426139e1bba34c49ed0935b7cc1e1f379 (diff)
3 files changed, 39 insertions, 0 deletions
diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 54e118395..267ac1de9 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -2224,6 +2224,16 @@ inline std::ptrdiff_t l2CacheSize()
   return l2;
 }
 
+/** \returns the currently set level 3 cpu cache size (in bytes) used to estimate the ideal blocking size paramete\
+rs.                                                                                                                
+* \sa setCpuCacheSize */
+inline std::ptrdiff_t l3CacheSize()
+{
+  std::ptrdiff_t l1, l2, l3;
+  internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
+  return l3;
+}
+
 /** Set the cpu L1 and L2 cache sizes (in bytes).
   * These values are use to adjust the size of the blocks
   * for the algorithms working per blocks.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index 267f6f8e3..9d141395b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -44,6 +44,26 @@ struct DefaultDevice {
 #endif
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+#ifndef __CUDA_ARCH__
+    // Running on the host CPU
+    return l1CacheSize();
+#else
+    // Running on a CUDA device, return the amount of shared memory available.
+    return 48*1024;
+#endif
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+#ifndef __CUDA_ARCH__
+    // Running single threaded on the host CPU
+    return l3CacheSize();
+#else
+    // Running on a CUDA device
+    return firstLevelCacheSize();
+#endif
+  }
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
 #ifndef __CUDA_ARCH__
     // Running single threaded on the host CPU
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 6da16985f..41918eb19 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -128,6 +128,15 @@ struct ThreadPoolDevice {
     return num_threads_;
   }
 
+  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+    return l1CacheSize();
+  }
+
+  EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+    // The l3 cache size is shared between all the cores.
+    return l3CacheSize() / num_threads_;
+  }
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
     // Should return an enum that encodes the ISA supported by the CPU
     return 1;
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-04-14 21:25:06 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-04-14 21:25:06 -0700
commit	a62e9246562970f384f316c66d17c6ed4bd2a55a (patch)
tree	a03410839d3852c7194720518232809fe195f98d
parent	18e6f67426139e1bba34c49ed0935b7cc1e1f379 (diff)