diff options
author | 2015-11-18 16:41:37 -0800 | |
---|---|---|
committer | 2015-11-18 16:41:37 -0800 | |
commit | f7918e1dcd5b0c1f8114f488fc35a63a81e94535 (patch) | |
tree | 801a5280729f2ea53cf6a16c29a63f352b65a0de /third_party/eigen3/unsupported | |
parent | ab34d55ce7618e52069a2e1c9e51aac5a1ea81c3 (diff) |
TensorFlow: Removal of large assets and small other fixes.
Changes:
- Remove all large assets from the repoistory, incuding the other 50MiB
model protobuf and a lot of images in our g3doc directory. We will
maintain these assets externally for now. g3doc images may be
broken for a little bit, but the website will be fine, which
is the important resource. By @vrv and @petewarden. Updates
READMES to reflect the external model resources.
- Fix to saver's latest_checkpoint function by Zhifeng
- Made protos visibility public by @vrv
- Updates to docs by @mrry, Andy
- Embed tensorboard resource for summary icon by Daniel
- More updates to backwars compat by @josh11b
Base CL: 108194981
Diffstat (limited to 'third_party/eigen3/unsupported')
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h | 22 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h | 16 |
2 files changed, 20 insertions, 18 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h index b6eeb73832..a62682c728 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h @@ -760,11 +760,15 @@ struct GpuDevice { GpuDevice() : stream_(perftools::gputools::MachineManager::singleton()->stream_for_device(0)), allocator_(nullptr), - stream_exec_(stream_->parent()) {} + stream_exec_(stream_->parent()), + device_descr_(&(stream_exec_->GetDeviceDescription())) {} GpuDevice(perftools::gputools::Stream* stream, const Allocator* alloc = nullptr) - : stream_(stream), allocator_(alloc), stream_exec_(stream_->parent()) { } + : stream_(stream), + allocator_(alloc), + stream_exec_(stream_->parent()), + device_descr_(&(stream_exec_->GetDeviceDescription())) {} EIGEN_STRONG_INLINE perftools::gputools::Stream* stream() const { return stream_; @@ -873,28 +877,25 @@ struct GpuDevice { stream_->BlockHostUntilDone(); } - // A gpu::DeviceDescription is cached inside a StreamExecutor, so these calls - // aren't expensive/wasteful. EIGEN_DEVICE_FUNC inline int getNumCudaMultiProcessors() const { - return stream_exec_->GetDeviceDescription().core_count(); + return device_descr_->core_count(); } EIGEN_DEVICE_FUNC inline int maxCudaThreadsPerBlock() const { - return stream_exec_->GetDeviceDescription().threads_per_block_limit(); + return device_descr_->threads_per_block_limit(); } EIGEN_DEVICE_FUNC inline int maxCudaThreadsPerMultiProcessor() const { - return stream_exec_->GetDeviceDescription().threads_per_core_limit(); + return device_descr_->threads_per_core_limit(); } EIGEN_DEVICE_FUNC inline int sharedMemPerBlock() const { - return stream_exec_->GetDeviceDescription().shared_memory_per_block(); + return device_descr_->shared_memory_per_block(); } EIGEN_DEVICE_FUNC inline int majorDeviceVersion() const { int major, minor; - if (stream_exec_->GetDeviceDescription().cuda_compute_capability(&major, - &minor)) { + if (device_descr_->cuda_compute_capability(&major, &minor)) { return major; } else { return 0; @@ -906,6 +907,7 @@ struct GpuDevice { private: perftools::gputools::Stream* stream_; perftools::gputools::StreamExecutor* stream_exec_; + const perftools::gputools::DeviceDescription* device_descr_; const Allocator* allocator_; }; diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 3e90b08c99..6d63b23b2f 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -115,7 +115,7 @@ namespace { } -template <typename T> +template <typename T, bool div_gt_one = false> struct TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { @@ -166,7 +166,7 @@ struct TensorIntDivisor { // Optimized version for signed 32 bit integers. // Derived from Hacker's Delight. template <> -class TensorIntDivisor<int32_t> { +class TensorIntDivisor<int32_t, true> { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { magic = 0; @@ -225,15 +225,15 @@ private: }; -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { +template <typename T, bool div_gt_one> +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { return divisor.divide(numerator); } #else // Reverse to the old code since gcudacc doesn't support the code above. -template <typename T> +template <typename T, bool div_gt_one = false> struct TensorIntDivisor { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { @@ -285,7 +285,7 @@ struct TensorIntDivisor { // Optimized version for signed 32 bit integers. // Derived from Hacker's Delight. template <> -class TensorIntDivisor<int> { +class TensorIntDivisor<int, true> { public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { magic = 0; @@ -344,8 +344,8 @@ private: }; -template <typename T> -static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T>& divisor) { +template <typename T, bool div_gt_one> +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor<T, div_gt_one>& divisor) { return divisor.divide(numerator); } |