aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Yuefeng Zhou <yuefengz@google.com>2017-03-14 15:31:58 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-03-14 16:44:40 -0700
commite11c2c2841153f662e57ecf00d6e4813d19bd515 (patch)
tree8ed0820443703793d5ce7ed44f6511ca72b7291a
parentfafd5b24223c4e07e3bbb7267750a06a80e37d95 (diff)
Record allocated sizes for tensors instead of actual tensor sizes.
Substract back temp memory for reduction op because its temp memory becomes output memory. Change: 150130275
-rw-r--r--tensorflow/core/framework/op_kernel.h2
-rw-r--r--tensorflow/core/framework/tensor.cc12
-rw-r--r--tensorflow/core/framework/tensor.h3
-rw-r--r--tensorflow/core/kernels/reduction_ops_common.h8
-rw-r--r--tensorflow/core/kernels/typed_queue.h6
-rw-r--r--tensorflow/core/kernels/variable_ops.h5
6 files changed, 30 insertions, 6 deletions
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 4951319fd8..7c26b86429 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -207,7 +207,7 @@ class PersistentTensor {
int64 NumElements() const { return tensor_.NumElements(); }
- int64 TotalBytes() const { return tensor_.TotalBytes(); }
+ int64 AllocatedBytes() const { return tensor_.AllocatedBytes(); }
private:
Tensor tensor_;
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 68c6817448..62ced69719 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -730,6 +730,18 @@ size_t Tensor::TotalBytes() const {
return 0; // Makes compiler happy.
}
+size_t Tensor::AllocatedBytes() const {
+ TensorDescription tensor_description;
+ FillDescription(&tensor_description);
+ if (tensor_description.has_allocation_description() &&
+ tensor_description.allocation_description().allocated_bytes() > 0) {
+ return tensor_description.allocation_description().allocated_bytes();
+ } else {
+ // Fall back to TotalBytes() if the allocator doesn't have its size.
+ return TotalBytes();
+ }
+}
+
bool Tensor::CanUseDMA() const {
CASES(dtype(), return is_simple_type<T>::value);
return false; // Makes compiler happy.
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index d9b22525c4..2d5e70cf76 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -144,6 +144,9 @@ class Tensor {
/// Returns the estimated memory usage of this tensor.
size_t TotalBytes() const;
+ // Returns the size of sallocated memory for this tensor.
+ size_t AllocatedBytes() const;
+
/// Returns true iff this tensor is aligned.
bool IsAligned() const {
#if EIGEN_MAX_ALIGN_BYTES == 0
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 0d309c2185..0cd9c255bc 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -235,6 +235,14 @@ class ReductionOp : public OpKernel {
if (!out.CopyFrom(tmp_out, helper.out_shape())) {
ctx->SetStatus(errors::Internal("Error during reduction copy."));
}
+ if (ctx->track_allocations()) {
+ // The temporary memory becomes the output memory.
+ if (ctx->allocate_on_host(alloc_attr)) {
+ ctx->record_host_temp_memory_size(-out.AllocatedBytes());
+ } else {
+ ctx->record_device_temp_memory_size(-out.AllocatedBytes());
+ }
+ }
ctx->set_output(0, out);
}
diff --git a/tensorflow/core/kernels/typed_queue.h b/tensorflow/core/kernels/typed_queue.h
index 193b1f5034..0d608d9b87 100644
--- a/tensorflow/core/kernels/typed_queue.h
+++ b/tensorflow/core/kernels/typed_queue.h
@@ -84,7 +84,7 @@ int64 SizeOf(const std::deque<PersistentTensor>& sq) {
if (sq.empty()) {
return 0;
}
- return sq.size() * sq.front().TotalBytes();
+ return sq.size() * sq.front().AllocatedBytes();
}
template <>
@@ -92,7 +92,7 @@ int64 SizeOf(const std::vector<PersistentTensor>& sq) {
if (sq.empty()) {
return 0;
}
- return sq.size() * sq.front().TotalBytes();
+ return sq.size() * sq.front().AllocatedBytes();
}
using TensorPair = std::pair<int64, PersistentTensor>;
@@ -102,7 +102,7 @@ int64 SizeOf(const std::priority_queue<TensorPair, U, V>& sq) {
if (sq.empty()) {
return 0;
}
- return sq.size() * (sizeof(TensorPair) + sq.top().second.TotalBytes());
+ return sq.size() * (sizeof(TensorPair) + sq.top().second.AllocatedBytes());
}
} // namespace
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 642bff055f..8c173a4ba3 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -157,10 +157,11 @@ class DestroyTemporaryVariableOp : public OpKernel {
context->step_container()->name(), var_name_));
if (context->track_allocations()) {
if (context->allocate_on_host(AllocatorAttributes())) {
- context->record_host_persistent_memory_allocation(-tmpvar.TotalBytes());
+ context->record_host_persistent_memory_allocation(
+ -tmpvar.AllocatedBytes());
} else {
context->record_device_persistent_memory_allocation(
- -tmpvar.TotalBytes());
+ -tmpvar.AllocatedBytes());
}
}
}