aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc12
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h2
-rw-r--r--tensorflow/core/framework/allocator.h29
-rw-r--r--tensorflow/core/framework/op_kernel.h38
-rw-r--r--tensorflow/core/framework/tensor.cc17
-rw-r--r--tensorflow/core/kernels/conv_ops_gpu.h5
-rw-r--r--tensorflow/core/public/tensor.h8
7 files changed, 97 insertions, 14 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
index a031d2f1e4..ee2d5a869c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc
@@ -104,6 +104,18 @@ void* GPUBFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes) {
kMaxMillisToWait, unused_alignment, num_bytes);
}
+void* GPUBFCAllocator::AllocateRaw(
+ size_t unused_alignment, size_t num_bytes,
+ const AllocationAttributes& allocation_attr) {
+ if (allocation_attr.no_retry_on_failure) {
+ // Return immediately upon the first failure if this is for allocating an
+ // optional scratch space.
+ return AllocateRawInternal(unused_alignment, num_bytes, true);
+ } else {
+ return AllocateRaw(unused_alignment, num_bytes);
+ }
+}
+
void* GPUBFCAllocator::AllocateRawInternal(size_t unused_alignment,
size_t num_bytes,
bool dump_log_on_failure) {
diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
index 925fe8aa21..c2edf76dc0 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h
@@ -49,6 +49,8 @@ class GPUBFCAllocator : public VisitableAllocator {
string Name() override { return "gpu_bfc"; }
void* AllocateRaw(size_t alignment, size_t num_bytes) override;
+ void* AllocateRaw(size_t alignment, size_t num_bytes,
+ const AllocationAttributes& allocation_attr) override;
void DeallocateRaw(void* ptr) override;
void AddAllocVisitor(Visitor visitor) override;
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 548c9d54d2..41bbb08b3f 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -26,6 +26,16 @@ limitations under the License.
namespace tensorflow {
+// Attributes for a single allocation call. Different calls to the same
+// allocator could potentially have different allocation attributes.
+struct AllocationAttributes {
+ // If the first attempt to allocate the memory fails, the allocation
+ // should return immediately without retrying.
+ // An example use case is optional scratch spaces where a failure
+ // has only performance impact.
+ bool no_retry_on_failure = false;
+};
+
// Allocator is an abstract interface for allocating and deallocating
// device memory.
class Allocator {
@@ -41,6 +51,17 @@ class Allocator {
// REQUIRES: "alignment" is a power of 2.
virtual void* AllocateRaw(size_t alignment, size_t num_bytes) = 0;
+ // Return an uninitialized block of memory that is "num_bytes" bytes
+ // in size with specified allocation attributes. The returned pointer is
+ // guaranteed to be aligned to a multiple of "alignment" bytes.
+ // REQUIRES: "alignment" is a power of 2.
+ virtual void* AllocateRaw(size_t alignment, size_t num_bytes,
+ const AllocationAttributes& allocation_attr) {
+ // The default behavior is to use the implementation without any allocation
+ // attributes.
+ return AllocateRaw(alignment, num_bytes);
+ }
+
// Deallocate a block of memory pointer to by "ptr"
// REQUIRES: "ptr" was previously returned by a call to AllocateRaw
virtual void DeallocateRaw(void* ptr) = 0;
@@ -50,6 +71,12 @@ class Allocator {
// tensor has too many elements to represent in a single allocation.
template <typename T>
T* Allocate(size_t num_elements) {
+ return Allocate<T>(num_elements, AllocationAttributes());
+ }
+
+ template <typename T>
+ T* Allocate(size_t num_elements,
+ const AllocationAttributes& allocation_attr) {
// TODO(jeff): Do we need to allow clients to pass in alignment
// requirements?
@@ -58,7 +85,7 @@ class Allocator {
}
void* p = AllocateRaw(32 /* align to 32 byte boundary */,
- sizeof(T) * num_elements);
+ sizeof(T) * num_elements, allocation_attr);
return reinterpret_cast<T*>(p);
}
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 362202ac65..e61ddd0e2e 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -647,7 +647,13 @@ class OpKernelContext {
// may retain references to the temporary tensors after the Op's
// Compute method has run. See comment above.
Status allocate_temp(DataType type, const TensorShape& shape,
- Tensor* out_temp, AllocatorAttributes attr);
+ Tensor* out_temp, AllocatorAttributes allocator_attr,
+ const AllocationAttributes& allocation_attr);
+ Status allocate_temp(DataType type, const TensorShape& shape,
+ Tensor* out_temp, AllocatorAttributes allocator_attr) {
+ return allocate_temp(type, shape, out_temp, allocator_attr,
+ AllocationAttributes());
+ }
Status allocate_temp(DataType type, const TensorShape& shape,
Tensor* out_temp) {
return allocate_temp(type, shape, out_temp, AllocatorAttributes());
@@ -851,7 +857,15 @@ class OpKernelContext {
// Internal common method used when allocating tensor memory
Status allocate_tensor(DataType type, const TensorShape& shape,
- Tensor* out_tensor, AllocatorAttributes attr);
+ Tensor* out_tensor,
+ AllocatorAttributes allocator_attr) {
+ return allocate_tensor(type, shape, out_tensor, allocator_attr,
+ AllocationAttributes());
+ }
+
+ Status allocate_tensor(DataType type, const TensorShape& shape,
+ Tensor* out_tensor, AllocatorAttributes allocator_attr,
+ const AllocationAttributes& allocation_attr);
// This is called by PersistentTensor::AccessTensor whenever the
// wrapped tensor is retrieved, to ensure the runtime knows that the
@@ -1085,12 +1099,11 @@ inline Status OpKernelContext::allocate_output(int index,
return allocate_output(index, shape, output, attr);
}
-inline Status OpKernelContext::allocate_tensor(DataType type,
- const TensorShape& shape,
- Tensor* out_tensor,
- AllocatorAttributes attr) {
+inline Status OpKernelContext::allocate_tensor(
+ DataType type, const TensorShape& shape, Tensor* out_tensor,
+ AllocatorAttributes attr, const AllocationAttributes& allocation_attr) {
Allocator* a = get_allocator(attr);
- Tensor new_tensor(a, type, shape);
+ Tensor new_tensor(a, type, shape, allocation_attr);
if (!new_tensor.IsInitialized() && shape.num_elements() > 0) {
return errors::ResourceExhausted("OOM when allocating tensor with shape",
@@ -1121,11 +1134,12 @@ inline Status OpKernelContext::allocate_output(int index,
return s;
}
-inline Status OpKernelContext::allocate_temp(DataType type,
- const TensorShape& shape,
- Tensor* out_temp,
- AllocatorAttributes attr) {
- Status s = allocate_tensor(type, shape, out_temp, attr);
+inline Status OpKernelContext::allocate_temp(
+ DataType type, const TensorShape& shape, Tensor* out_temp,
+ AllocatorAttributes allocator_attr,
+ const AllocationAttributes& allocation_attr) {
+ Status s =
+ allocate_tensor(type, shape, out_temp, allocator_attr, allocation_attr);
if (s.ok()) {
if (params_.device->SaveTemporaryTensors()) {
// keep a reference to the underlying memory around
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index f14efdc913..9f573d2056 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -51,6 +51,7 @@ template <typename T>
class Buffer : public TensorBuffer {
public:
Buffer(Allocator* a, int64 n);
+ Buffer(Allocator* a, int64 n, const AllocationAttributes& allocation_attr);
void* data() const override { return data_; }
size_t size() const override { return sizeof(T) * elem_; }
@@ -277,6 +278,13 @@ Buffer<T>::Buffer(Allocator* a, int64 n)
}
template <typename T>
+Buffer<T>::Buffer(Allocator* a, int64 n,
+ const AllocationAttributes& allocation_attr)
+ : alloc_(a), data_(a->Allocate<T>(n, allocation_attr)), elem_(n) {
+ if (data_) Helper<T>::RunCtor(data_, elem_);
+}
+
+template <typename T>
Buffer<T>::~Buffer() {
if (data_) {
Helper<T>::RunDtor(data_, elem_);
@@ -409,6 +417,15 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape)
}
}
+Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape,
+ const AllocationAttributes& allocation_attr)
+ : type_(type), shape_(shape), buf_(nullptr) {
+ CHECK_NOTNULL(a);
+ if (shape_.num_elements() > 0) {
+ CASES(type, buf_ = new Buffer<T>(a, shape.num_elements(), allocation_attr));
+ }
+}
+
Tensor::Tensor(DataType type, const TensorShape& shape)
: Tensor(cpu_allocator(), type, shape) {}
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index bcdc1c3510..8f131f7b81 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -57,8 +57,11 @@ class CudnnScratchAllocator : public perftools::gputools::ScratchAllocator {
AllocateBytes(perftools::gputools::Stream* stream, int64 byte_size) override {
Tensor temporary_memory;
+ AllocationAttributes allocation_attr;
+ allocation_attr.no_retry_on_failure = true;
Status allocation_status(context_->allocate_temp(
- DT_UINT8, TensorShape({byte_size}), &temporary_memory));
+ DT_UINT8, TensorShape({byte_size}), &temporary_memory,
+ AllocatorAttributes(), allocation_attr));
if (!allocation_status.ok()) {
LOG(WARNING) << allocation_status;
return perftools::gputools::port::StatusOr<
diff --git a/tensorflow/core/public/tensor.h b/tensorflow/core/public/tensor.h
index c613831a8d..cbed45363d 100644
--- a/tensorflow/core/public/tensor.h
+++ b/tensorflow/core/public/tensor.h
@@ -54,6 +54,14 @@ class Tensor {
/// `a` must outlive the lifetime of this Tensor.
Tensor(Allocator* a, DataType type, const TensorShape& shape);
+ /// \brief Creates a tensor with the input `type` and `shape`, using the
+ /// allocator `a` and the specified "allocation_attr" to allocate the
+ /// underlying buffer.
+ ///
+ /// `a` must outlive the lifetime of this Tensor.
+ Tensor(Allocator* a, DataType type, const TensorShape& shape,
+ const AllocationAttributes& allocation_attr);
+
/// Creates an uninitialized Tensor of the given data type.
explicit Tensor(DataType type);