diff options
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc | 12 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h | 2 | ||||
-rw-r--r-- | tensorflow/core/framework/allocator.h | 29 | ||||
-rw-r--r-- | tensorflow/core/framework/op_kernel.h | 38 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor.cc | 17 | ||||
-rw-r--r-- | tensorflow/core/kernels/conv_ops_gpu.h | 5 | ||||
-rw-r--r-- | tensorflow/core/public/tensor.h | 8 |
7 files changed, 97 insertions, 14 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc index a031d2f1e4..ee2d5a869c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc @@ -104,6 +104,18 @@ void* GPUBFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) { kMaxMillisToWait, unused_alignment, num_bytes); } +void* GPUBFCAllocator::AllocateRaw( + size_t unused_alignment, size_t num_bytes, + const AllocationAttributes& allocation_attr) { + if (allocation_attr.no_retry_on_failure) { + // Return immediately upon the first failure if this is for allocating an + // optional scratch space. + return AllocateRawInternal(unused_alignment, num_bytes, true); + } else { + return AllocateRaw(unused_alignment, num_bytes); + } +} + void* GPUBFCAllocator::AllocateRawInternal(size_t unused_alignment, size_t num_bytes, bool dump_log_on_failure) { diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index 925fe8aa21..c2edf76dc0 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -49,6 +49,8 @@ class GPUBFCAllocator : public VisitableAllocator { string Name() override { return "gpu_bfc"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; + void* AllocateRaw(size_t alignment, size_t num_bytes, + const AllocationAttributes& allocation_attr) override; void DeallocateRaw(void* ptr) override; void AddAllocVisitor(Visitor visitor) override; diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 548c9d54d2..41bbb08b3f 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -26,6 +26,16 @@ limitations under the License. namespace tensorflow { +// Attributes for a single allocation call. Different calls to the same +// allocator could potentially have different allocation attributes. +struct AllocationAttributes { + // If the first attempt to allocate the memory fails, the allocation + // should return immediately without retrying. + // An example use case is optional scratch spaces where a failure + // has only performance impact. + bool no_retry_on_failure = false; +}; + // Allocator is an abstract interface for allocating and deallocating // device memory. class Allocator { @@ -41,6 +51,17 @@ class Allocator { // REQUIRES: "alignment" is a power of 2. virtual void* AllocateRaw(size_t alignment, size_t num_bytes) = 0; + // Return an uninitialized block of memory that is "num_bytes" bytes + // in size with specified allocation attributes. The returned pointer is + // guaranteed to be aligned to a multiple of "alignment" bytes. + // REQUIRES: "alignment" is a power of 2. + virtual void* AllocateRaw(size_t alignment, size_t num_bytes, + const AllocationAttributes& allocation_attr) { + // The default behavior is to use the implementation without any allocation + // attributes. + return AllocateRaw(alignment, num_bytes); + } + // Deallocate a block of memory pointer to by "ptr" // REQUIRES: "ptr" was previously returned by a call to AllocateRaw virtual void DeallocateRaw(void* ptr) = 0; @@ -50,6 +71,12 @@ class Allocator { // tensor has too many elements to represent in a single allocation. template <typename T> T* Allocate(size_t num_elements) { + return Allocate<T>(num_elements, AllocationAttributes()); + } + + template <typename T> + T* Allocate(size_t num_elements, + const AllocationAttributes& allocation_attr) { // TODO(jeff): Do we need to allow clients to pass in alignment // requirements? @@ -58,7 +85,7 @@ class Allocator { } void* p = AllocateRaw(32 /* align to 32 byte boundary */, - sizeof(T) * num_elements); + sizeof(T) * num_elements, allocation_attr); return reinterpret_cast<T*>(p); } diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 362202ac65..e61ddd0e2e 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -647,7 +647,13 @@ class OpKernelContext { // may retain references to the temporary tensors after the Op's // Compute method has run. See comment above. Status allocate_temp(DataType type, const TensorShape& shape, - Tensor* out_temp, AllocatorAttributes attr); + Tensor* out_temp, AllocatorAttributes allocator_attr, + const AllocationAttributes& allocation_attr); + Status allocate_temp(DataType type, const TensorShape& shape, + Tensor* out_temp, AllocatorAttributes allocator_attr) { + return allocate_temp(type, shape, out_temp, allocator_attr, + AllocationAttributes()); + } Status allocate_temp(DataType type, const TensorShape& shape, Tensor* out_temp) { return allocate_temp(type, shape, out_temp, AllocatorAttributes()); @@ -851,7 +857,15 @@ class OpKernelContext { // Internal common method used when allocating tensor memory Status allocate_tensor(DataType type, const TensorShape& shape, - Tensor* out_tensor, AllocatorAttributes attr); + Tensor* out_tensor, + AllocatorAttributes allocator_attr) { + return allocate_tensor(type, shape, out_tensor, allocator_attr, + AllocationAttributes()); + } + + Status allocate_tensor(DataType type, const TensorShape& shape, + Tensor* out_tensor, AllocatorAttributes allocator_attr, + const AllocationAttributes& allocation_attr); // This is called by PersistentTensor::AccessTensor whenever the // wrapped tensor is retrieved, to ensure the runtime knows that the @@ -1085,12 +1099,11 @@ inline Status OpKernelContext::allocate_output(int index, return allocate_output(index, shape, output, attr); } -inline Status OpKernelContext::allocate_tensor(DataType type, - const TensorShape& shape, - Tensor* out_tensor, - AllocatorAttributes attr) { +inline Status OpKernelContext::allocate_tensor( + DataType type, const TensorShape& shape, Tensor* out_tensor, + AllocatorAttributes attr, const AllocationAttributes& allocation_attr) { Allocator* a = get_allocator(attr); - Tensor new_tensor(a, type, shape); + Tensor new_tensor(a, type, shape, allocation_attr); if (!new_tensor.IsInitialized() && shape.num_elements() > 0) { return errors::ResourceExhausted("OOM when allocating tensor with shape", @@ -1121,11 +1134,12 @@ inline Status OpKernelContext::allocate_output(int index, return s; } -inline Status OpKernelContext::allocate_temp(DataType type, - const TensorShape& shape, - Tensor* out_temp, - AllocatorAttributes attr) { - Status s = allocate_tensor(type, shape, out_temp, attr); +inline Status OpKernelContext::allocate_temp( + DataType type, const TensorShape& shape, Tensor* out_temp, + AllocatorAttributes allocator_attr, + const AllocationAttributes& allocation_attr) { + Status s = + allocate_tensor(type, shape, out_temp, allocator_attr, allocation_attr); if (s.ok()) { if (params_.device->SaveTemporaryTensors()) { // keep a reference to the underlying memory around diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index f14efdc913..9f573d2056 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -51,6 +51,7 @@ template <typename T> class Buffer : public TensorBuffer { public: Buffer(Allocator* a, int64 n); + Buffer(Allocator* a, int64 n, const AllocationAttributes& allocation_attr); void* data() const override { return data_; } size_t size() const override { return sizeof(T) * elem_; } @@ -277,6 +278,13 @@ Buffer<T>::Buffer(Allocator* a, int64 n) } template <typename T> +Buffer<T>::Buffer(Allocator* a, int64 n, + const AllocationAttributes& allocation_attr) + : alloc_(a), data_(a->Allocate<T>(n, allocation_attr)), elem_(n) { + if (data_) Helper<T>::RunCtor(data_, elem_); +} + +template <typename T> Buffer<T>::~Buffer() { if (data_) { Helper<T>::RunDtor(data_, elem_); @@ -409,6 +417,15 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape) } } +Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape, + const AllocationAttributes& allocation_attr) + : type_(type), shape_(shape), buf_(nullptr) { + CHECK_NOTNULL(a); + if (shape_.num_elements() > 0) { + CASES(type, buf_ = new Buffer<T>(a, shape.num_elements(), allocation_attr)); + } +} + Tensor::Tensor(DataType type, const TensorShape& shape) : Tensor(cpu_allocator(), type, shape) {} diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h index bcdc1c3510..8f131f7b81 100644 --- a/tensorflow/core/kernels/conv_ops_gpu.h +++ b/tensorflow/core/kernels/conv_ops_gpu.h @@ -57,8 +57,11 @@ class CudnnScratchAllocator : public perftools::gputools::ScratchAllocator { AllocateBytes(perftools::gputools::Stream* stream, int64 byte_size) override { Tensor temporary_memory; + AllocationAttributes allocation_attr; + allocation_attr.no_retry_on_failure = true; Status allocation_status(context_->allocate_temp( - DT_UINT8, TensorShape({byte_size}), &temporary_memory)); + DT_UINT8, TensorShape({byte_size}), &temporary_memory, + AllocatorAttributes(), allocation_attr)); if (!allocation_status.ok()) { LOG(WARNING) << allocation_status; return perftools::gputools::port::StatusOr< diff --git a/tensorflow/core/public/tensor.h b/tensorflow/core/public/tensor.h index c613831a8d..cbed45363d 100644 --- a/tensorflow/core/public/tensor.h +++ b/tensorflow/core/public/tensor.h @@ -54,6 +54,14 @@ class Tensor { /// `a` must outlive the lifetime of this Tensor. Tensor(Allocator* a, DataType type, const TensorShape& shape); + /// \brief Creates a tensor with the input `type` and `shape`, using the + /// allocator `a` and the specified "allocation_attr" to allocate the + /// underlying buffer. + /// + /// `a` must outlive the lifetime of this Tensor. + Tensor(Allocator* a, DataType type, const TensorShape& shape, + const AllocationAttributes& allocation_attr); + /// Creates an uninitialized Tensor of the given data type. explicit Tensor(DataType type); |