aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/heap_simulator.cc
diff options
context:
space:
mode:
authorGravatar Dimitris Vardoulakis <dimvar@google.com>2018-10-03 16:47:49 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-10-03 16:56:47 -0700
commit13941241e984e4a4296891f4e61a9ed5b3107b22 (patch)
tree187a641531c02b98eff5f3338bf36615cdb20185 /tensorflow/compiler/xla/service/heap_simulator.cc
parentaeb044c9784d30a25c0d15fa31f479001be55052 (diff)
[TF:XLA] Improve the accounting for subcomputations in the heap simulator.
Subtract the size of the aliased buffers from the subcomputation estimate instead of from the current computation. This way, the memory estimate for the current computation is more accurate. For the newly added test, the heap simulation calculates 48 bytes at head instead of the correct 64 bytes. PiperOrigin-RevId: 215653047
Diffstat (limited to 'tensorflow/compiler/xla/service/heap_simulator.cc')
-rw-r--r--tensorflow/compiler/xla/service/heap_simulator.cc34
1 files changed, 15 insertions, 19 deletions
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index b343305554..9220865867 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -240,6 +240,7 @@ Status HeapSimulator::RunComputation(
// Make sure each buffer get reused at most once.
flat_hash_set<const BufferValue*> reused_buffers;
+ int64 alloc_size_by_instruction = 0;
for (const BufferValue* buffer : buffers_defined_by_instruction) {
if (IgnoreBuffer(buffer)) {
continue;
@@ -272,14 +273,15 @@ Status HeapSimulator::RunComputation(
if (!shared) {
VLOG(3) << " Allocating: " << buffer->ToString();
+ alloc_size_by_instruction += size_fn_(*buffer);
Alloc(buffer, instruction);
}
}
// Account for the memory used by subcomputations when estimating the
// current heap size.
if (memory_by_computation_ != nullptr) {
- algorithm_->AccountForSubcomputationMemory(instruction,
- *memory_by_computation_);
+ algorithm_->AccountForSubcomputationMemory(
+ instruction, alloc_size_by_instruction, *memory_by_computation_);
}
// If all computations in the module have been scheduled, we can save memory
@@ -385,10 +387,8 @@ void HeapSimulator::Alloc(const BufferValue* buffer,
allocated_buffers_.insert(buffer);
const int64 size = size_fn_(*buffer);
- const HloInstruction* instruction_to_calc_aliasing =
- memory_by_computation_ == nullptr ? nullptr : instruction;
- algorithm_->Alloc(buffer, size, instruction_to_calc_aliasing);
- no_fragmentation_stats_->Alloc(buffer, size, instruction_to_calc_aliasing);
+ algorithm_->Alloc(buffer, size);
+ no_fragmentation_stats_->Alloc(buffer, size);
FillDebugTrace(HeapSimulatorTrace::Event::ALLOC, buffer, instruction,
nullptr);
}
@@ -526,20 +526,8 @@ void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size) {
}
}
-void NoFragmentationStatsHeap::Alloc(const BufferValue* buffer, int64 size,
- const HloInstruction* instruction) {
- // The output buffer of while/call/conditional is always aliased with the
- // output buffer of the root instruction in the body. Don't double count.
- if (instruction == nullptr ||
- (instruction->opcode() != HloOpcode::kWhile &&
- instruction->opcode() != HloOpcode::kCall &&
- instruction->opcode() != HloOpcode::kConditional)) {
- Alloc(buffer, size);
- }
-}
-
void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
- const HloInstruction* instruction,
+ const HloInstruction* instruction, int64 alloc_size_by_instruction,
const absl::flat_hash_map<const HloComputation*, int64>&
memory_by_computation) {
// We only count the memory usage of the largest subcomputation, instead of
@@ -554,6 +542,14 @@ void NoFragmentationStatsHeap::AccountForSubcomputationMemory(
}
}
}
+ if (max_subcomputation_bytes > 0 &&
+ (instruction->opcode() == HloOpcode::kWhile ||
+ instruction->opcode() == HloOpcode::kCall ||
+ instruction->opcode() == HloOpcode::kConditional)) {
+ // The output buffer of while/call/conditional is always aliased with the
+ // output buffer of the root instruction in the body. Don't double count.
+ max_subcomputation_bytes -= alloc_size_by_instruction;
+ }
max_heap_size_ =
std::max(max_heap_size_, current_heap_size_ + max_subcomputation_bytes);
}