diff options
author | 2017-03-14 15:31:58 -0800 | |
---|---|---|
committer | 2017-03-14 16:44:40 -0700 | |
commit | e11c2c2841153f662e57ecf00d6e4813d19bd515 (patch) | |
tree | 8ed0820443703793d5ce7ed44f6511ca72b7291a | |
parent | fafd5b24223c4e07e3bbb7267750a06a80e37d95 (diff) |
Record allocated sizes for tensors instead of actual tensor sizes.
Substract back temp memory for reduction op because its temp memory becomes output memory.
Change: 150130275
-rw-r--r-- | tensorflow/core/framework/op_kernel.h | 2 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor.cc | 12 | ||||
-rw-r--r-- | tensorflow/core/framework/tensor.h | 3 | ||||
-rw-r--r-- | tensorflow/core/kernels/reduction_ops_common.h | 8 | ||||
-rw-r--r-- | tensorflow/core/kernels/typed_queue.h | 6 | ||||
-rw-r--r-- | tensorflow/core/kernels/variable_ops.h | 5 |
6 files changed, 30 insertions, 6 deletions
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 4951319fd8..7c26b86429 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -207,7 +207,7 @@ class PersistentTensor { int64 NumElements() const { return tensor_.NumElements(); } - int64 TotalBytes() const { return tensor_.TotalBytes(); } + int64 AllocatedBytes() const { return tensor_.AllocatedBytes(); } private: Tensor tensor_; diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 68c6817448..62ced69719 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -730,6 +730,18 @@ size_t Tensor::TotalBytes() const { return 0; // Makes compiler happy. } +size_t Tensor::AllocatedBytes() const { + TensorDescription tensor_description; + FillDescription(&tensor_description); + if (tensor_description.has_allocation_description() && + tensor_description.allocation_description().allocated_bytes() > 0) { + return tensor_description.allocation_description().allocated_bytes(); + } else { + // Fall back to TotalBytes() if the allocator doesn't have its size. + return TotalBytes(); + } +} + bool Tensor::CanUseDMA() const { CASES(dtype(), return is_simple_type<T>::value); return false; // Makes compiler happy. diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index d9b22525c4..2d5e70cf76 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -144,6 +144,9 @@ class Tensor { /// Returns the estimated memory usage of this tensor. size_t TotalBytes() const; + // Returns the size of sallocated memory for this tensor. + size_t AllocatedBytes() const; + /// Returns true iff this tensor is aligned. bool IsAligned() const { #if EIGEN_MAX_ALIGN_BYTES == 0 diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h index 0d309c2185..0cd9c255bc 100644 --- a/tensorflow/core/kernels/reduction_ops_common.h +++ b/tensorflow/core/kernels/reduction_ops_common.h @@ -235,6 +235,14 @@ class ReductionOp : public OpKernel { if (!out.CopyFrom(tmp_out, helper.out_shape())) { ctx->SetStatus(errors::Internal("Error during reduction copy.")); } + if (ctx->track_allocations()) { + // The temporary memory becomes the output memory. + if (ctx->allocate_on_host(alloc_attr)) { + ctx->record_host_temp_memory_size(-out.AllocatedBytes()); + } else { + ctx->record_device_temp_memory_size(-out.AllocatedBytes()); + } + } ctx->set_output(0, out); } diff --git a/tensorflow/core/kernels/typed_queue.h b/tensorflow/core/kernels/typed_queue.h index 193b1f5034..0d608d9b87 100644 --- a/tensorflow/core/kernels/typed_queue.h +++ b/tensorflow/core/kernels/typed_queue.h @@ -84,7 +84,7 @@ int64 SizeOf(const std::deque<PersistentTensor>& sq) { if (sq.empty()) { return 0; } - return sq.size() * sq.front().TotalBytes(); + return sq.size() * sq.front().AllocatedBytes(); } template <> @@ -92,7 +92,7 @@ int64 SizeOf(const std::vector<PersistentTensor>& sq) { if (sq.empty()) { return 0; } - return sq.size() * sq.front().TotalBytes(); + return sq.size() * sq.front().AllocatedBytes(); } using TensorPair = std::pair<int64, PersistentTensor>; @@ -102,7 +102,7 @@ int64 SizeOf(const std::priority_queue<TensorPair, U, V>& sq) { if (sq.empty()) { return 0; } - return sq.size() * (sizeof(TensorPair) + sq.top().second.TotalBytes()); + return sq.size() * (sizeof(TensorPair) + sq.top().second.AllocatedBytes()); } } // namespace diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h index 642bff055f..8c173a4ba3 100644 --- a/tensorflow/core/kernels/variable_ops.h +++ b/tensorflow/core/kernels/variable_ops.h @@ -157,10 +157,11 @@ class DestroyTemporaryVariableOp : public OpKernel { context->step_container()->name(), var_name_)); if (context->track_allocations()) { if (context->allocate_on_host(AllocatorAttributes())) { - context->record_host_persistent_memory_allocation(-tmpvar.TotalBytes()); + context->record_host_persistent_memory_allocation( + -tmpvar.AllocatedBytes()); } else { context->record_device_persistent_memory_allocation( - -tmpvar.TotalBytes()); + -tmpvar.AllocatedBytes()); } } } |