aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-22 03:36:06 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-22 03:39:37 -0700
commit4613ceab3bf55e98dc529dad62c385c6a0b6ea7e (patch)
tree16b6a6ba613120aff9aa59edad6c3641b02632e4
parentebf8d59e23bb84a245332bc60fd93f0f27fc6a35 (diff)
Improve performance of HloDCE
Previously we performed a DFS traversal to find all dead roots in the graph but the same result can be achieved just by looking at the user count because we don't remove any instruction with control predecessors or successors (due to HloComputation::IsRemovable). PiperOrigin-RevId: 201663343
-rw-r--r--tensorflow/compiler/xla/service/hlo_dce.cc11
1 files changed, 2 insertions, 9 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc
index 8aa26bf520..7d35e251ca 100644
--- a/tensorflow/compiler/xla/service/hlo_dce.cc
+++ b/tensorflow/compiler/xla/service/hlo_dce.cc
@@ -41,20 +41,13 @@ StatusOr<bool> HloDCE::Run(HloModule* module) {
XLA_VLOG_LINES(2, module->ToString());
for (auto* computation : module->MakeComputationPostOrder()) {
- std::unordered_set<HloInstruction*> live_instructions;
- TF_RETURN_IF_ERROR(computation->root_instruction()->Accept(
- [&live_instructions](HloInstruction* instruction) {
- live_instructions.insert(instruction);
- return Status::OK();
- }));
-
// Remove any dead roots and their dead transitive operands. Collect them
// into a separate list first to avoid problems with iterating through the
// computation's instruction while simultaneously removing instructions.
std::vector<HloInstruction*> dead_roots;
for (auto* instruction : computation->instructions()) {
- if (instruction->user_count() == 0 &&
- live_instructions.count(instruction) == 0 &&
+ if (instruction != computation->root_instruction() &&
+ instruction->user_count() == 0 &&
computation->IsRemovable(instruction) &&
!instruction->HasSideEffect()) {
dead_roots.push_back(instruction);