aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/heap_simulator.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-05-25 18:19:02 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-05-25 18:22:34 -0700
commit3e767e9db0e0a00a509354ec18462841ea4d40f2 (patch)
treeb4eaf4f8aadab0a4ffec95e4ce67fe3548353b65 /tensorflow/compiler/xla/service/heap_simulator.cc
parentca0d25c2d7f7dea6f35e3dea20c8a755fd58c637 (diff)
Add debug protos that serialize HLO graph information.
Also add flags to dump this data in JSON format, for each backend. This is useful for upcoming debugging tools. PiperOrigin-RevId: 157178357
Diffstat (limited to 'tensorflow/compiler/xla/service/heap_simulator.cc')
-rw-r--r--tensorflow/compiler/xla/service/heap_simulator.cc90
1 files changed, 62 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc
index 645c68e043..86f62accd3 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.cc
+++ b/tensorflow/compiler/xla/service/heap_simulator.cc
@@ -58,13 +58,13 @@ StatusOr<HeapSimulator::Result> HeapSimulator::Run(
const TuplePointsToAnalysis& points_to_analysis,
const LogicalBuffer::SizeFunction& size_fn,
const FlatSet<const LogicalBuffer*>* buffers_to_assign) {
- HeapSimulator heap(std::move(algorithm), size_fn, buffers_to_assign);
+ HeapSimulator heap(std::move(algorithm), size_fn, buffers_to_assign,
+ &module_sequence);
const HloComputation* entry_computation = module.entry_computation();
const std::vector<const HloInstruction*>& instruction_sequence =
FindOrDie(module_sequence, entry_computation);
- TF_RETURN_IF_ERROR(heap.RunComputation(*entry_computation,
- instruction_sequence,
- points_to_analysis, &module_sequence));
+ TF_RETURN_IF_ERROR(heap.RunComputation(
+ *entry_computation, instruction_sequence, points_to_analysis));
return heap.Finish();
}
@@ -75,22 +75,19 @@ StatusOr<HeapSimulator::Result> HeapSimulator::Run(
const TuplePointsToAnalysis& points_to_analysis,
const LogicalBuffer::SizeFunction& size_fn,
const FlatSet<const LogicalBuffer*>* buffers_to_assign) {
- HeapSimulator heap(std::move(algorithm), size_fn, buffers_to_assign);
+ HeapSimulator heap(std::move(algorithm), size_fn, buffers_to_assign,
+ /*module_sequence=*/nullptr);
TF_RETURN_IF_ERROR(heap.RunComputation(computation, instruction_sequence,
- points_to_analysis,
- /*module_sequence=*/nullptr));
+ points_to_analysis));
return heap.Finish();
}
// Runs a heap simulation for the given 'computation', assuming the given
-// 'instruction_sequence'. If 'module_sequence' is non-null, it is used to find
-// kCall and kWhile sub-computations, and the heap simulation for those
-// sub-computations will be run recursively.
+// 'instruction_sequence'.
Status HeapSimulator::RunComputation(
const HloComputation& computation,
const std::vector<const HloInstruction*>& instruction_sequence,
- const TuplePointsToAnalysis& points_to_analysis,
- const SequentialHloOrdering::HloModuleSequence* module_sequence) {
+ const TuplePointsToAnalysis& points_to_analysis) {
// The goal here is to minimize memory usage, assuming the given sequential
// ordering of instructions. The strategy is to walk through the instruction
// sequence, calling Alloc and Free on the underlying heap algorithm. The
@@ -191,14 +188,14 @@ Status HeapSimulator::RunComputation(
CanShareOperandBufferWithUser(
operand_buffer->instruction(), operand_buffer->index(),
buffer->instruction(), buffer->index(), points_to_analysis)) {
- ShareBuffer(buffer, operand_buffer);
+ ShareBuffer(buffer, operand_buffer, instruction);
shared = true;
break;
}
}
if (!shared) {
- Alloc(buffer);
+ Alloc(buffer, instruction);
}
}
@@ -210,16 +207,15 @@ Status HeapSimulator::RunComputation(
// The order that the sub-computations are simulated does not affect
// correctness; since the whole module is sequential, we know that the
// sub-computations will never be run concurrently.
- if (module_sequence != nullptr) {
+ if (module_sequence_ != nullptr) {
if (instruction->opcode() == HloOpcode::kCall ||
instruction->opcode() == HloOpcode::kWhile) {
for (const HloComputation* called_computation :
instruction->called_computations()) {
const std::vector<const HloInstruction*>& called_sequence =
- FindOrDie(*module_sequence, called_computation);
- TF_RETURN_IF_ERROR(RunComputation(*called_computation,
- called_sequence, points_to_analysis,
- module_sequence));
+ FindOrDie(*module_sequence_, called_computation);
+ TF_RETURN_IF_ERROR(RunComputation(
+ *called_computation, called_sequence, points_to_analysis));
}
}
@@ -231,10 +227,10 @@ Status HeapSimulator::RunComputation(
// Free buffers that are no longer live. This is the earliest point that we
// can de-allocate; right after the last use of the buffer.
for (const LogicalBuffer* buffer : dead_buffers_to_free) {
- Free(buffer);
+ Free(buffer, instruction);
}
for (const LogicalBuffer* buffer : operand_buffers_to_free) {
- Free(buffer);
+ Free(buffer, instruction);
}
}
@@ -245,7 +241,7 @@ Status HeapSimulator::RunComputation(
const FlatSet<const HloInstruction*>& pending = buffer_pending.second;
CHECK_EQ(pending.size(), 1) << *buffer;
CHECK(*pending.begin() == nullptr) << *buffer;
- Free(buffer);
+ Free(buffer, root);
}
return Status::OK();
@@ -254,11 +250,15 @@ Status HeapSimulator::RunComputation(
HeapSimulator::HeapSimulator(
std::unique_ptr<HeapAlgorithm> algorithm,
const LogicalBuffer::SizeFunction& size_fn,
- const FlatSet<const LogicalBuffer*>* buffers_to_assign)
+ const FlatSet<const LogicalBuffer*>* buffers_to_assign,
+ const SequentialHloOrdering::HloModuleSequence* module_sequence)
: no_fragmentation_stats_(MakeUnique<NoFragmentationStatsHeap>()),
algorithm_(std::move(algorithm)),
size_fn_(size_fn),
- buffers_to_assign_(buffers_to_assign) {}
+ buffers_to_assign_(buffers_to_assign),
+ module_sequence_(module_sequence) {
+ debug_trace_.set_whole_module_simulation(module_sequence_ != nullptr);
+}
HeapSimulator::~HeapSimulator() {}
@@ -273,7 +273,8 @@ bool HeapSimulator::IgnoreBuffer(const LogicalBuffer* buffer) const {
}
// Alloc always calls the underlying heap algorithm.
-void HeapSimulator::Alloc(const LogicalBuffer* buffer) {
+void HeapSimulator::Alloc(const LogicalBuffer* buffer,
+ const HloInstruction* instruction) {
CHECK(allocated_buffers_.count(buffer) == 0)
<< "Alloc called on allocated buffer: " << *buffer;
CHECK(freed_buffers_.count(buffer) == 0)
@@ -283,13 +284,17 @@ void HeapSimulator::Alloc(const LogicalBuffer* buffer) {
const int64 size = size_fn_(*buffer);
algorithm_->Alloc(buffer, size);
no_fragmentation_stats_->Alloc(buffer, size);
+
+ FillDebugTrace(HeapSimulatorTrace::Event::ALLOC, buffer, instruction,
+ nullptr);
}
// Free calls the underlying algorithm for non-shared buffers, and for shared
// buffers whose group liveness has expired. Shared group liveness is tracked
// by maintaining a refcount; the Free call on the last buffer in the group
// causes Free to be called on the underlying algorithm.
-void HeapSimulator::Free(const LogicalBuffer* buffer) {
+void HeapSimulator::Free(const LogicalBuffer* buffer,
+ const HloInstruction* instruction) {
auto shared_it = shared_buffers_.find(buffer);
if (shared_it != shared_buffers_.end()) {
std::shared_ptr<SharedGroup> group = shared_it->second;
@@ -311,6 +316,8 @@ void HeapSimulator::Free(const LogicalBuffer* buffer) {
const int64 size = size_fn_(*buffer);
algorithm_->Free(buffer, size);
no_fragmentation_stats_->Free(buffer, size);
+
+ FillDebugTrace(HeapSimulatorTrace::Event::FREE, buffer, instruction, nullptr);
}
// ShareBuffer associates buffers with their SharedGroup in shared_buffers_.
@@ -318,7 +325,8 @@ void HeapSimulator::Free(const LogicalBuffer* buffer) {
// Alloc. The 'shared' buffer must be a previously allocated or shared buffer.
// Both 'buffer' and 'shared' will be associated with the same SharedGroup.
void HeapSimulator::ShareBuffer(const LogicalBuffer* buffer,
- const LogicalBuffer* shared) {
+ const LogicalBuffer* shared,
+ const HloInstruction* instruction) {
CHECK_LE(size_fn_(*buffer), size_fn_(*shared))
<< "ShareBuffer oversized buffer" << *buffer << " shared: " << *shared;
CHECK(allocated_buffers_.count(buffer) == 0)
@@ -328,11 +336,13 @@ void HeapSimulator::ShareBuffer(const LogicalBuffer* buffer,
CHECK(freed_buffers_.count(shared) == 0)
<< "ShareBuffer called on freed shared buffer: " << *shared;
+ const LogicalBuffer* canonical = nullptr;
auto shared_it = shared_buffers_.find(shared);
if (shared_it != shared_buffers_.end()) {
// The 'shared' buffer already has a group; it might be the canonical, but
// also might not be. Just add 'buffer' to the existing group.
std::shared_ptr<SharedGroup> group = shared_it->second;
+ canonical = group->canonical;
++group->refcount;
shared_buffers_.emplace(buffer, group);
} else {
@@ -341,11 +351,15 @@ void HeapSimulator::ShareBuffer(const LogicalBuffer* buffer,
CHECK(allocated_buffers_.count(shared) > 0)
<< "ShareBuffer called on non-allocated shared buffer: " << *shared;
auto group = std::make_shared<SharedGroup>();
- group->canonical = shared;
+ canonical = shared;
+ group->canonical = canonical;
group->refcount = 2;
shared_buffers_.emplace(buffer, group);
shared_buffers_.emplace(shared, group);
}
+
+ FillDebugTrace(HeapSimulatorTrace::Event::SHARE_WITH, buffer, instruction,
+ canonical);
}
HeapSimulator::Result HeapSimulator::Finish() {
@@ -378,9 +392,29 @@ HeapSimulator::Result HeapSimulator::Finish() {
const Result no_frag_result = no_fragmentation_stats_->Finish();
result.fragmentation_size = result.heap_size - no_frag_result.heap_size;
+ // Copy the debug trace we collected to the final result.
+ result.debug_trace.Swap(&debug_trace_);
+
return result;
}
+void HeapSimulator::FillDebugTrace(HeapSimulatorTrace::Event::Kind kind,
+ const LogicalBuffer* buffer,
+ const HloInstruction* instruction,
+ const LogicalBuffer* share_with_canonical) {
+ HeapSimulatorTrace::Event* event = debug_trace_.add_events();
+ event->set_kind(kind);
+ event->set_buffer_id(buffer->id());
+ event->set_computation_name(instruction->parent()->name());
+ event->set_instruction_name(instruction->name());
+ if (kind == HeapSimulatorTrace::Event::SHARE_WITH) {
+ CHECK(share_with_canonical != nullptr);
+ event->set_share_with_canonical_id(share_with_canonical->id());
+ } else {
+ CHECK(share_with_canonical == nullptr);
+ }
+}
+
void NoFragmentationStatsHeap::Alloc(const LogicalBuffer* buffer, int64 size) {
current_heap_size_ += size;
if (current_heap_size_ > max_heap_size_) {