Record allocated sizes for tensors instead of actual tensor sizes.

Substract back temp memory for reduction op because its temp memory becomes output memory. Change: 150130275
author: Yuefeng Zhou <yuefengz@google.com> 2017-03-14 15:31:58 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-03-14 16:44:40 -0700
commit: e11c2c2841153f662e57ecf00d6e4813d19bd515 (patch)
tree: 8ed0820443703793d5ce7ed44f6511ca72b7291a
parent: fafd5b24223c4e07e3bbb7267750a06a80e37d95 (diff)
6 files changed, 30 insertions, 6 deletions
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 4951319fd8..7c26b86429 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -207,7 +207,7 @@ class PersistentTensor {
 
   int64 NumElements() const { return tensor_.NumElements(); }
 
-  int64 TotalBytes() const { return tensor_.TotalBytes(); }
+  int64 AllocatedBytes() const { return tensor_.AllocatedBytes(); }
 
  private:
   Tensor tensor_;
diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc
index 68c6817448..62ced69719 100644
--- a/tensorflow/core/framework/tensor.cc
+++ b/tensorflow/core/framework/tensor.cc
@@ -730,6 +730,18 @@ size_t Tensor::TotalBytes() const {
   return 0;  // Makes compiler happy.
 }
 
+size_t Tensor::AllocatedBytes() const {
+  TensorDescription tensor_description;
+  FillDescription(&tensor_description);
+  if (tensor_description.has_allocation_description() &&
+      tensor_description.allocation_description().allocated_bytes() > 0) {
+    return tensor_description.allocation_description().allocated_bytes();
+  } else {
+    // Fall back to TotalBytes() if the allocator doesn't have its size.
+    return TotalBytes();
+  }
+}
+
 bool Tensor::CanUseDMA() const {
   CASES(dtype(), return is_simple_type<T>::value);
   return false;  // Makes compiler happy.
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index d9b22525c4..2d5e70cf76 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -144,6 +144,9 @@ class Tensor {
   /// Returns the estimated memory usage of this tensor.
   size_t TotalBytes() const;
 
+  // Returns the size of sallocated memory for this tensor.
+  size_t AllocatedBytes() const;
+
   /// Returns true iff this tensor is aligned.
   bool IsAligned() const {
 #if EIGEN_MAX_ALIGN_BYTES == 0
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 0d309c2185..0cd9c255bc 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -235,6 +235,14 @@ class ReductionOp : public OpKernel {
     if (!out.CopyFrom(tmp_out, helper.out_shape())) {
       ctx->SetStatus(errors::Internal("Error during reduction copy."));
     }
+    if (ctx->track_allocations()) {
+      // The temporary memory becomes the output memory.
+      if (ctx->allocate_on_host(alloc_attr)) {
+        ctx->record_host_temp_memory_size(-out.AllocatedBytes());
+      } else {
+        ctx->record_device_temp_memory_size(-out.AllocatedBytes());
+      }
+    }
     ctx->set_output(0, out);
   }
 
diff --git a/tensorflow/core/kernels/typed_queue.h b/tensorflow/core/kernels/typed_queue.h
index 193b1f5034..0d608d9b87 100644
--- a/tensorflow/core/kernels/typed_queue.h
+++ b/tensorflow/core/kernels/typed_queue.h
@@ -84,7 +84,7 @@ int64 SizeOf(const std::deque<PersistentTensor>& sq) {
   if (sq.empty()) {
     return 0;
   }
-  return sq.size() * sq.front().TotalBytes();
+  return sq.size() * sq.front().AllocatedBytes();
 }
 
 template <>
@@ -92,7 +92,7 @@ int64 SizeOf(const std::vector<PersistentTensor>& sq) {
   if (sq.empty()) {
     return 0;
   }
-  return sq.size() * sq.front().TotalBytes();
+  return sq.size() * sq.front().AllocatedBytes();
 }
 
 using TensorPair = std::pair<int64, PersistentTensor>;
@@ -102,7 +102,7 @@ int64 SizeOf(const std::priority_queue<TensorPair, U, V>& sq) {
   if (sq.empty()) {
     return 0;
   }
-  return sq.size() * (sizeof(TensorPair) + sq.top().second.TotalBytes());
+  return sq.size() * (sizeof(TensorPair) + sq.top().second.AllocatedBytes());
 }
 
 }  // namespace
diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h
index 642bff055f..8c173a4ba3 100644
--- a/tensorflow/core/kernels/variable_ops.h
+++ b/tensorflow/core/kernels/variable_ops.h
@@ -157,10 +157,11 @@ class DestroyTemporaryVariableOp : public OpKernel {
                                 context->step_container()->name(), var_name_));
     if (context->track_allocations()) {
       if (context->allocate_on_host(AllocatorAttributes())) {
-        context->record_host_persistent_memory_allocation(-tmpvar.TotalBytes());
+        context->record_host_persistent_memory_allocation(
+            -tmpvar.AllocatedBytes());
       } else {
         context->record_device_persistent_memory_allocation(
-            -tmpvar.TotalBytes());
+            -tmpvar.AllocatedBytes());
       }
     }
   }
author	Yuefeng Zhou <yuefengz@google.com>	2017-03-14 15:31:58 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-03-14 16:44:40 -0700
commit	e11c2c2841153f662e57ecf00d6e4813d19bd515 (patch)
tree	8ed0820443703793d5ce7ed44f6511ca72b7291a
parent	fafd5b24223c4e07e3bbb7267750a06a80e37d95 (diff)