aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--eigen.BUILD2
-rw-r--r--tensorflow/contrib/cmake/external/eigen.cmake4
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.cc50
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.h1
-rw-r--r--tensorflow/workspace.bzl4
-rw-r--r--third_party/eigen3/Eigen/Cholesky2
-rw-r--r--third_party/eigen3/Eigen/Core2
-rw-r--r--third_party/eigen3/Eigen/Eigenvalues2
-rw-r--r--third_party/eigen3/Eigen/LU2
-rw-r--r--third_party/eigen3/Eigen/QR2
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/Tensor2
11 files changed, 46 insertions, 27 deletions
diff --git a/eigen.BUILD b/eigen.BUILD
index e32f3aab49..e8a95be51a 100644
--- a/eigen.BUILD
+++ b/eigen.BUILD
@@ -1,6 +1,6 @@
package(default_visibility = ["//visibility:public"])
-archive_dir = "eigen-eigen-0c0b79ecd74c"
+archive_dir = "eigen-eigen-62a2305d5734"
cc_library(
name = "eigen",
diff --git a/tensorflow/contrib/cmake/external/eigen.cmake b/tensorflow/contrib/cmake/external/eigen.cmake
index d3075ab9d2..4fdd66feda 100644
--- a/tensorflow/contrib/cmake/external/eigen.cmake
+++ b/tensorflow/contrib/cmake/external/eigen.cmake
@@ -7,7 +7,7 @@
include (ExternalProject)
-set(eigen_archive_hash "0c0b79ecd74c")
+set(eigen_archive_hash "62a2305d5734")
set(eigen_INCLUDE_DIRS
${CMAKE_CURRENT_BINARY_DIR}
@@ -16,7 +16,7 @@ set(eigen_INCLUDE_DIRS
${tensorflow_source_dir}/third_party/eigen3
)
set(eigen_URL https://bitbucket.org/eigen/eigen/get/${eigen_archive_hash}.tar.gz)
-set(eigen_HASH SHA256=b4b5884b03bd4bae114d02b36e2435ad1504ed8e51431d16c876b6f6a365882b)
+set(eigen_HASH SHA256=d5da5c60f7225bc2f104f3494323b929e68e3a188ccf01dcee61df32ff536888)
set(eigen_BUILD ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen)
set(eigen_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/eigen/install)
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index c82ae858b0..87e0040ff2 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -123,19 +123,20 @@ class EigenAllocator : public ::Eigen::Allocator {
#else
class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
public:
- EigenCudaStreamDevice() : scratch_(nullptr) { Eigen::initializeDeviceProp(); }
+ EigenCudaStreamDevice() : scratch_(nullptr), semaphore_(nullptr) {
+ Eigen::initializeDeviceProp();
+ }
~EigenCudaStreamDevice() {
- if (scratch_) {
- deallocate(scratch_);
- }
}
void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream,
- int gpu_id, ::tensorflow::Allocator* alloc) {
+ int gpu_id, ::tensorflow::Allocator* alloc, char* scratch) {
if (LogMemory::IsEnabled()) {
operation_ = context->op_kernel().name() + "/EigenAllocator";
step_id_ = context->step_id();
}
- assert(!scratch_);
+ scratch_ = scratch;
+ semaphore_ =
+ reinterpret_cast<unsigned int*>(scratch + Eigen::kCudaScratchSize);
stream_ = cuda_stream;
allocator_ = alloc;
device_prop_ = &Eigen::m_deviceProperties[gpu_id];
@@ -172,12 +173,15 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
// Return a pointer to a per stream scratchpad of 1024 bytes residing
// in global memory.
void* scratchpad() const {
- if (scratch_ == nullptr) {
- scratch_ = allocate(1024);
- }
return scratch_;
}
+ // Return a semaphore. The semaphore is initially initialized to 0, and
+ // each kernel using it is responsible for resetting to 0 upon completion
+ // to maintain the invariant that the semaphore is always equal to 0 upon
+ // each kernel start.
+ unsigned int* semaphore() const { return semaphore_; }
+
private:
struct AsyncFreeData {
AsyncFreeData(::tensorflow::Allocator* a, void* p, const string& o,
@@ -205,7 +209,8 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface {
const cudaStream_t* stream_; // Not owned.
const cudaDeviceProp* device_prop_; // Not owned.
::tensorflow::Allocator* allocator_; // Not owned.
- mutable void* scratch_;
+ mutable char* scratch_;
+ mutable unsigned int* semaphore_;
TF_DISALLOW_COPY_AND_ASSIGN(EigenCudaStreamDevice);
};
@@ -262,6 +267,16 @@ BaseGPUDevice::BaseGPUDevice(const SessionOptions& options, const string& name,
streams_.push_back({stream, host_to_device_stream, device_to_host_stream,
device_to_device_stream});
+ perftools::gputools::DeviceMemory<char> mem =
+ executor_->AllocateArray<char>(Eigen::kCudaScratchSize +
+ sizeof(unsigned int));
+ scratch_.push_back(static_cast<char*>(mem.opaque()));
+ bool ok = executor_->SynchronousMemZero(
+ &mem, Eigen::kCudaScratchSize + sizeof(unsigned int));
+ if (!ok) {
+ LOG(FATAL) << "Failed to initialize device " << gpu_id;
+ }
+
device_contexts_.push_back(
new GPUDeviceContext(i, stream, host_to_device_stream,
device_to_host_stream, device_to_device_stream));
@@ -486,9 +501,10 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
public:
ConcretePerOpGpuDevice() : device_(nullptr) {}
void Reinitialize(OpKernelContext* context, gpu::Stream* stream,
- Allocator* base_allocator, ::tensorflow::EventMgr* em) {
+ Allocator* base_allocator, ::tensorflow::EventMgr* em,
+ char* scratch) {
allocator_.Reinitialize(context, stream, base_allocator, em);
- device_.Reinitialize(stream, &allocator_);
+ device_.Reinitialize(stream, &allocator_, scratch);
}
const Eigen::GpuDevice& device() const override { return device_; }
@@ -503,8 +519,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice {
ConcretePerOpGpuDevice() : device_(&stream_device_) {}
void Reinitialize(OpKernelContext* context, const cudaStream_t* cuda_stream,
- int gpu_id, Allocator* base_allocator) {
- stream_device_.Reinitialize(context, cuda_stream, gpu_id, base_allocator);
+ int gpu_id, Allocator* base_allocator, char* scratch) {
+ stream_device_.Reinitialize(context, cuda_stream, gpu_id, base_allocator,
+ scratch);
}
const Eigen::GpuDevice& device() const override { return device_; }
@@ -524,11 +541,12 @@ void BaseGPUDevice::ReinitializeDevice(OpKernelContext* context,
DCHECK(concrete_device);
#if defined(__GCUDACC__) || defined(__GCUDACC_HOST__)
concrete_device->Reinitialize(context, streams_[stream_id].compute, allocator,
- em_.get());
+ em_.get(), scratch_[stream_id]);
#else
const cudaStream_t* cuda_stream = reinterpret_cast<const cudaStream_t*>(
streams_[stream_id].compute->implementation()->CudaStreamMemberHack());
- concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator);
+ concrete_device->Reinitialize(context, cuda_stream, gpu_id_, allocator,
+ scratch_[stream_id]);
#endif
}
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
index 6e8cb7fbfc..4ac9c4021d 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -91,6 +91,7 @@ class BaseGPUDevice : public LocalDevice {
gpu::Stream* device_to_device;
};
gtl::InlinedVector<StreamGroup, 4> streams_;
+ gtl::InlinedVector<char*, 4> scratch_;
std::vector<GPUDeviceContext*> device_contexts_;
GpuDeviceInfo* gpu_device_info_ = nullptr;
mutex trace_mu_;
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index d9cfb85fc3..938f3a6d62 100644
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -6,8 +6,8 @@
def tf_workspace(path_prefix = "", tf_repo_name = ""):
native.new_http_archive(
name = "eigen_archive",
- url = "https://bitbucket.org/eigen/eigen/get/0c0b79ecd74c.tar.gz",
- sha256 = "b4b5884b03bd4bae114d02b36e2435ad1504ed8e51431d16c876b6f6a365882b",
+ url = "https://bitbucket.org/eigen/eigen/get/62a2305d5734.tar.gz",
+ sha256 = "d5da5c60f7225bc2f104f3494323b929e68e3a188ccf01dcee61df32ff536888",
build_file = path_prefix + "eigen.BUILD",
)
diff --git a/third_party/eigen3/Eigen/Cholesky b/third_party/eigen3/Eigen/Cholesky
index 7415ae4d0d..858ece3f9c 100644
--- a/third_party/eigen3/Eigen/Cholesky
+++ b/third_party/eigen3/Eigen/Cholesky
@@ -1 +1 @@
-#include "eigen-eigen-0c0b79ecd74c/Eigen/Cholesky"
+#include "eigen-eigen-62a2305d5734/Eigen/Cholesky"
diff --git a/third_party/eigen3/Eigen/Core b/third_party/eigen3/Eigen/Core
index 787e1c076e..380cf7124b 100644
--- a/third_party/eigen3/Eigen/Core
+++ b/third_party/eigen3/Eigen/Core
@@ -1 +1 @@
-#include "eigen-eigen-0c0b79ecd74c/Eigen/Core"
+#include "eigen-eigen-62a2305d5734/Eigen/Core"
diff --git a/third_party/eigen3/Eigen/Eigenvalues b/third_party/eigen3/Eigen/Eigenvalues
index b6e1b81eb5..3f5cf7a31e 100644
--- a/third_party/eigen3/Eigen/Eigenvalues
+++ b/third_party/eigen3/Eigen/Eigenvalues
@@ -1 +1 @@
-#include "eigen-eigen-0c0b79ecd74c/Eigen/Eigenvalues"
+#include "eigen-eigen-62a2305d5734/Eigen/Eigenvalues"
diff --git a/third_party/eigen3/Eigen/LU b/third_party/eigen3/Eigen/LU
index a0782af040..8f6680ffd9 100644
--- a/third_party/eigen3/Eigen/LU
+++ b/third_party/eigen3/Eigen/LU
@@ -1 +1 @@
-#include "eigen-eigen-0c0b79ecd74c/Eigen/LU"
+#include "eigen-eigen-62a2305d5734/Eigen/LU"
diff --git a/third_party/eigen3/Eigen/QR b/third_party/eigen3/Eigen/QR
index 0a9bee2898..5e62a1f278 100644
--- a/third_party/eigen3/Eigen/QR
+++ b/third_party/eigen3/Eigen/QR
@@ -1 +1 @@
-#include "eigen-eigen-0c0b79ecd74c/Eigen/QR"
+#include "eigen-eigen-62a2305d5734/Eigen/QR"
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
index 5228bcda62..15fe748231 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
@@ -1 +1 @@
-#include "eigen-eigen-0c0b79ecd74c/unsupported/Eigen/CXX11/Tensor"
+#include "eigen-eigen-62a2305d5734/unsupported/Eigen/CXX11/Tensor"