diff options
-rw-r--r-- | eigen.BUILD | 2 | ||||
-rw-r--r-- | tensorflow/contrib/cmake/external/eigen.cmake | 4 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_device.cc | 50 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_device.h | 1 | ||||
-rw-r--r-- | tensorflow/workspace.bzl | 4 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/Cholesky | 2 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/Core | 2 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/Eigenvalues | 2 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/LU | 2 | ||||
-rw-r--r-- | third_party/eigen3/Eigen/QR | 2 | ||||
-rw-r--r-- | third_party/eigen3/unsupported/Eigen/CXX11/Tensor | 2 |
11 files changed, 46 insertions, 27 deletions
diff --git a/eigen.BUILD b/eigen.BUILD index e32f3aab49..e8a95be51a 100644 --- a/eigen.BUILD +++ b/eigen.BUILD @@ -1,6 +1,6 @@ package(default_visibility = ["//visibility:public"]) -archive_dir = "eigen-eigen-0c0b79ecd74c" +archive_dir = "eigen-eigen-62a2305d5734" cc_library( name = "eigen", diff --git a/tensorflow/contrib/cmake/external/eigen.cmake b/tensorflow/contrib/cmake/external/eigen.cmake index d3075ab9d2..4fdd66feda 100644 --- a/tensorflow/contrib/cmake/external/eigen.cmake +++ b/tensorflow/contrib/cmake/external/eigen.cmake @@ -7,7 +7,7 @@ include (ExternalProject) -set(eigen_archive_hash "0c0b79ecd74c") +set(eigen_archive_hash "62a2305d5734") set(eigen_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR} @@ -16,7 +16,7 @@ set(eigen_INCLUDE_DIRS ${tensorflow_source_dir}/third_party/eigen3 ) set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_archive_hash}.tar.gz) -set(eigen_HASH SHA256=b4b5884b03bd4bae114d02b36e2435ad1504ed8e51431d16c876b6f6a365882b) +set(eigen_HASH SHA256=d5da5c60f7225bc2f104f3494323b929e68e3a188ccf01dcee61df32ff536888) set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen) set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index c82ae858b0..87e0040ff2 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -123,19 +123,20 @@ class EigenAllocator : public ::Eigen::Allocator { #else class EigenCudaStreamDevice : public ::Eigen::StreamInterface { public: - EigenCudaStreamDevice() : scratch_(nullptr) { Eigen::initializeDeviceProp(); } + EigenCudaStreamDevice() : scratch_(nullptr), semaphore_(nullptr) { + Eigen::initializeDeviceProp(); + } ~EigenCudaStreamDevice() { - if (scratch_) { - deallocate(scratch_); - } } void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream, - int gpu_id, ::tensorflow::Allocator* alloc) { + int gpu_id, ::tensorflow::Allocator* alloc, char* scratch) { if (LogMemory::IsEnabled()) { operation_ = context->op_kernel().name() + "/EigenAllocator"; step_id_ = context->step_id(); } - assert(!scratch_); + scratch_ = scratch; + semaphore_ = + reinterpret_cast<unsigned int*>(scratch + Eigen::kCudaScratchSize); stream_ = cuda_stream; allocator_ = alloc; device_prop_ = &Eigen::m_deviceProperties[gpu_id]; @@ -172,12 +173,15 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface { // Return a pointer to a per stream scratchpad of 1024 bytes residing // in global memory. void* scratchpad() const { - if (scratch_ == nullptr) { - scratch_ = allocate(1024); - } return scratch_; } + // Return a semaphore. The semaphore is initially initialized to 0, and + // each kernel using it is responsible for resetting to 0 upon completion + // to maintain the invariant that the semaphore is always equal to 0 upon + // each kernel start. + unsigned int* semaphore() const { return semaphore_; } + private: struct AsyncFreeData { AsyncFreeData(::tensorflow::Allocator* a, void* p, const string& o, @@ -205,7 +209,8 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface { const cudaStream_t* stream_; // Not owned. const cudaDeviceProp* device_prop_; // Not owned. ::tensorflow::Allocator* allocator_; // Not owned. - mutable void* scratch_; + mutable char* scratch_; + mutable unsigned int* semaphore_; TF_DISALLOW_COPY_AND_ASSIGN(EigenCudaStreamDevice); }; @@ -262,6 +267,16 @@ BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name, streams_.push_back({stream, host_to_device_stream, device_to_host_stream, device_to_device_stream}); + perftools::gputools::DeviceMemory<char> mem = + executor_->AllocateArray<char>(Eigen::kCudaScratchSize + + sizeof(unsigned int)); + scratch_.push_back(static_cast<char*>(mem.opaque())); + bool ok = executor_->SynchronousMemZero( + &mem, Eigen::kCudaScratchSize + sizeof(unsigned int)); + if (!ok) { + LOG(FATAL) << "Failed to initialize device " << gpu_id; + } + device_contexts_.push_back( new GPUDeviceContext(i, stream, host_to_device_stream, device_to_host_stream, device_to_device_stream)); @@ -486,9 +501,10 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice { public: ConcretePerOpGpuDevice() : device_(nullptr) {} void Reinitialize(OpKernelContext* context, gpu::Stream* stream, - Allocator* base_allocator, ::tensorflow::EventMgr* em) { + Allocator* base_allocator, ::tensorflow::EventMgr* em, + char* scratch) { allocator_.Reinitialize(context, stream, base_allocator, em); - device_.Reinitialize(stream, &allocator_); + device_.Reinitialize(stream, &allocator_, scratch); } const Eigen::GpuDevice& device() const override { return device_; } @@ -503,8 +519,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice { ConcretePerOpGpuDevice() : device_(&stream_device_) {} void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream, - int gpu_id, Allocator* base_allocator) { - stream_device_.Reinitialize(context, cuda_stream, gpu_id, base_allocator); + int gpu_id, Allocator* base_allocator, char* scratch) { + stream_device_.Reinitialize(context, cuda_stream, gpu_id, base_allocator, + scratch); } const Eigen::GpuDevice& device() const override { return device_; } @@ -524,11 +541,12 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context, DCHECK(concrete_device); #if defined(__GCUDACC__) || defined(__GCUDACC_HOST__) concrete_device->Reinitialize(context, streams_[stream_id].compute, allocator, - em_.get()); + em_.get(), scratch_[stream_id]); #else const cudaStream_t* cuda_stream = reinterpret_cast<const cudaStream_t*>( streams_[stream_id].compute->implementation()->CudaStreamMemberHack()); - concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator); + concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator, + scratch_[stream_id]); #endif } diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 6e8cb7fbfc..4ac9c4021d 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -91,6 +91,7 @@ class BaseGPUDevice : public LocalDevice { gpu::Stream* device_to_device; }; gtl::InlinedVector<StreamGroup, 4> streams_; + gtl::InlinedVector<char*, 4> scratch_; std::vector<GPUDeviceContext*> device_contexts_; GpuDeviceInfo* gpu_device_info_ = nullptr; mutex trace_mu_; diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index d9cfb85fc3..938f3a6d62 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -6,8 +6,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): native.new_http_archive( name = "eigen_archive", - url = "https://bitbucket.org/eigen/eigen/get/0c0b79ecd74c.tar.gz", - sha256 = "b4b5884b03bd4bae114d02b36e2435ad1504ed8e51431d16c876b6f6a365882b", + url = "https://bitbucket.org/eigen/eigen/get/62a2305d5734.tar.gz", + sha256 = "d5da5c60f7225bc2f104f3494323b929e68e3a188ccf01dcee61df32ff536888", build_file = path_prefix + "eigen.BUILD", ) diff --git a/third_party/eigen3/Eigen/Cholesky b/third_party/eigen3/Eigen/Cholesky index 7415ae4d0d..858ece3f9c 100644 --- a/third_party/eigen3/Eigen/Cholesky +++ b/third_party/eigen3/Eigen/Cholesky @@ -1 +1 @@ -#include "eigen-eigen-0c0b79ecd74c/Eigen/Cholesky" +#include "eigen-eigen-62a2305d5734/Eigen/Cholesky" diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core index 787e1c076e..380cf7124b 100644 --- a/third_party/eigen3/Eigen/Core +++ b/third_party/eigen3/Eigen/Core @@ -1 +1 @@ -#include "eigen-eigen-0c0b79ecd74c/Eigen/Core" +#include "eigen-eigen-62a2305d5734/Eigen/Core" diff --git a/third_party/eigen3/Eigen/Eigenvalues b/third_party/eigen3/Eigen/Eigenvalues index b6e1b81eb5..3f5cf7a31e 100644 --- a/third_party/eigen3/Eigen/Eigenvalues +++ b/third_party/eigen3/Eigen/Eigenvalues @@ -1 +1 @@ -#include "eigen-eigen-0c0b79ecd74c/Eigen/Eigenvalues" +#include "eigen-eigen-62a2305d5734/Eigen/Eigenvalues" diff --git a/third_party/eigen3/Eigen/LU b/third_party/eigen3/Eigen/LU index a0782af040..8f6680ffd9 100644 --- a/third_party/eigen3/Eigen/LU +++ b/third_party/eigen3/Eigen/LU @@ -1 +1 @@ -#include "eigen-eigen-0c0b79ecd74c/Eigen/LU" +#include "eigen-eigen-62a2305d5734/Eigen/LU" diff --git a/third_party/eigen3/Eigen/QR b/third_party/eigen3/Eigen/QR index 0a9bee2898..5e62a1f278 100644 --- a/third_party/eigen3/Eigen/QR +++ b/third_party/eigen3/Eigen/QR @@ -1 +1 @@ -#include "eigen-eigen-0c0b79ecd74c/Eigen/QR" +#include "eigen-eigen-62a2305d5734/Eigen/QR" diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor index 5228bcda62..15fe748231 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor +++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor @@ -1 +1 @@ -#include "eigen-eigen-0c0b79ecd74c/unsupported/Eigen/CXX11/Tensor" +#include "eigen-eigen-62a2305d5734/unsupported/Eigen/CXX11/Tensor" |