[TF:XLA] Improve the accounting for subcomputations in the heap simulator.

Subtract the size of the aliased buffers from the subcomputation estimate instead of from the current computation. This way, the memory estimate for the current computation is more accurate. For the newly added test, the heap simulation calculates 48 bytes at head instead of the correct 64 bytes. PiperOrigin-RevId: 215653047
author: Dimitris Vardoulakis <dimvar@google.com> 2018-10-03 16:47:49 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-10-03 16:56:47 -0700
commit: 13941241e984e4a4296891f4e61a9ed5b3107b22 (patch)
tree: 187a641531c02b98eff5f3338bf36615cdb20185 /tensorflow/compiler/xla/service/heap_simulator.cc
parent: aeb044c9784d30a25c0d15fa31f479001be55052 (diff)
1 files changed, 15 insertions, 19 deletions
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index b343305554..9220865867 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -240,6 +240,7 @@ Status HeapSimulator::RunComputation(
 
     // Make sure each buffer get reused at most once.
     flat_hash_set<const BufferValue*> reused_buffers;
+    int64 alloc_size_by_instruction = 0;
     for (const BufferValue* buffer : buffers_defined_by_instruction) {
       if (IgnoreBuffer(buffer)) {
         continue;
@@ -272,14 +273,15 @@ Status HeapSimulator::RunComputation(
 
       if (!shared) {
         VLOG(3) << "  Allocating: " << buffer->ToString();
+        alloc_size_by_instruction += size_fn_(*buffer);
         Alloc(buffer, instruction);
       }
     }
     // Account for the memory used by subcomputations when estimating the
     // current heap size.
     if (memory_by_computation_ != nullptr) {
-      algorithm_->AccountForSubcomputationMemory(instruction,
-                                                 *memory_by_computation_);
+      algorithm_->AccountForSubcomputationMemory(
+          instruction, alloc_size_by_instruction, *memory_by_computation_);
     }
 
     // If all computations in the module have been scheduled, we can save memory
@@ -385,10 +387,8 @@ void HeapSimulator::Alloc(const BufferValue* buffer,
 
   allocated_buffers_.insert(buffer);
   const int64 size = size_fn_(*buffer);
-  const HloInstruction* instruction_to_calc_aliasing =
-      memory_by_computation_ == nullptr ? nullptr : instruction;
-  algorithm_->Alloc(buffer, size, instruction_to_calc_aliasing);
-  no_fragmentation_stats_->Alloc(buffer, size, instruction_to_calc_aliasing);
+  algorithm_->Alloc(buffer, size);
+  no_fragmentation_stats_->Alloc(buffer, size);
   FillDebugTrace(HeapSimulatorTrace::Event::ALLOC, buffer, instruction,
                  nullptr);
 }
@@ -526,20 +526,8 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size) {
   }
 }
 
-void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size,
-                                     const HloInstruction* instruction) {
-  // The output buffer of while/call/conditional is always aliased with the
-  // output buffer of the root instruction in the body. Don't double count.
-  if (instruction == nullptr ||
-      (instruction->opcode() != HloOpcode::kWhile &&
-       instruction->opcode() != HloOpcode::kCall &&
-       instruction->opcode() != HloOpcode::kConditional)) {
-    Alloc(buffer, size);
-  }
-}
-
 void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
-    const HloInstruction* instruction,
+    const HloInstruction* instruction, int64 alloc_size_by_instruction,
     const absl::flat_hash_map<const HloComputation*, int64>&
         memory_by_computation) {
   // We only count the memory usage of the largest subcomputation, instead of
@@ -554,6 +542,14 @@ void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
       }
     }
   }
+  if (max_subcomputation_bytes > 0 &&
+      (instruction->opcode() == HloOpcode::kWhile ||
+       instruction->opcode() == HloOpcode::kCall ||
+       instruction->opcode() == HloOpcode::kConditional)) {
+    // The output buffer of while/call/conditional is always aliased with the
+    // output buffer of the root instruction in the body. Don't double count.
+    max_subcomputation_bytes -= alloc_size_by_instruction;
+  }
   max_heap_size_ =
       std::max(max_heap_size_, current_heap_size_ + max_subcomputation_bytes);
 }
author	Dimitris Vardoulakis <dimvar@google.com>	2018-10-03 16:47:49 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-10-03 16:56:47 -0700
commit	13941241e984e4a4296891f4e61a9ed5b3107b22 (patch)
tree	187a641531c02b98eff5f3338bf36615cdb20185 /tensorflow/compiler/xla/service/heap_simulator.cc
parent	aeb044c9784d30a25c0d15fa31f479001be55052 (diff)