Added ability to access the cache sizes from the tensor devices

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-04-14 21:25:06 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-04-14 21:25:06 -0700
commit: a62e9246562970f384f316c66d17c6ed4bd2a55a (patch)
tree: a03410839d3852c7194720518232809fe195f98d /unsupported/Eigen/CXX11/src
parent: 18e6f67426139e1bba34c49ed0935b7cc1e1f379 (diff)
2 files changed, 29 insertions, 0 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index 267f6f8e3..9d141395b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -44,6 +44,26 @@ struct DefaultDevice {
 #endif
   }
 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+#ifndef __CUDA_ARCH__
+    // Running on the host CPU
+    return l1CacheSize();
+#else
+    // Running on a CUDA device, return the amount of shared memory available.
+    return 48*1024;
+#endif
+  }
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+#ifndef __CUDA_ARCH__
+    // Running single threaded on the host CPU
+    return l3CacheSize();
+#else
+    // Running on a CUDA device
+    return firstLevelCacheSize();
+#endif
+  }
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
 #ifndef __CUDA_ARCH__
     // Running single threaded on the host CPU
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 6da16985f..41918eb19 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -128,6 +128,15 @@ struct ThreadPoolDevice {
     return num_threads_;
   }
 
+  EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
+    return l1CacheSize();
+  }
+
+  EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const {
+    // The l3 cache size is shared between all the cores.
+    return l3CacheSize() / num_threads_;
+  }
+
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const {
     // Should return an enum that encodes the ISA supported by the CPU
     return 1;
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-04-14 21:25:06 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-04-14 21:25:06 -0700
commit	a62e9246562970f384f316c66d17c6ed4bd2a55a (patch)
tree	a03410839d3852c7194720518232809fe195f98d /unsupported/Eigen/CXX11/src
parent	18e6f67426139e1bba34c49ed0935b7cc1e1f379 (diff)