aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-28 17:32:25 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-28 17:34:58 -0700
commit3e15ac3dd22d58e45b7d6db17dedbb189d789891 (patch)
tree264b4d2399c61c2a707a6d013e77f3f8ce4c01b4
parent6dc9977e1dffc9558835cf4d0a62b61b6c85cc19 (diff)
[TF:XLA] Copy elision does not need to know about existing copies.
It already detects layout-changing copies and those are already left unchanged by copy elision. Special case copies are also skipped because they are tagged separately (SetCopyElisionAllowed) PiperOrigin-RevId: 202574858
-rw-r--r--tensorflow/compiler/xla/service/copy_insertion.cc26
-rw-r--r--tensorflow/compiler/xla/service/copy_insertion.h8
-rw-r--r--tensorflow/compiler/xla/service/copy_insertion_test.cc33
-rw-r--r--tensorflow/compiler/xla/service/hlo_rematerialization.cc2
4 files changed, 33 insertions, 36 deletions
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index b0ad433d8d..ab3d846403 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -1093,8 +1093,7 @@ void MaybeDumpModule(const string& message, const HloModule& module) {
} // namespace
Status RemoveUnnecessaryCopies(
- const HloOrdering& ordering,
- const tensorflow::gtl::FlatSet<int>& copies_to_exclude, HloModule* module,
+ const HloOrdering& ordering, HloModule* module,
const HloDataflowAnalysis::FusionCanShareBufferFunction&
fusion_can_share_buffer) {
MaybeDumpModule("after adding copies to resolve interference", *module);
@@ -1108,7 +1107,6 @@ Status RemoveUnnecessaryCopies(
for (HloComputation* computation : module->computations()) {
for (HloInstruction* instruction : computation->instructions()) {
if (instruction->opcode() == HloOpcode::kCopy &&
- !ContainsKey(copies_to_exclude, instruction->unique_id()) &&
instruction->CopyElisionAllowed()) {
TF_RETURN_IF_ERROR(copy_remover.TryElideCopy(instruction).status());
}
@@ -1152,16 +1150,13 @@ StatusOr<bool> CopyInsertion::Run(HloModule* module) {
"Call graph must be flattened before copy insertion.");
}
- // Gather Ids of existing kCopy instructions in the module. We avoid removing
- // these copies (except via DCE in TupleSimplifier) because they may have been
- // added for reasons not considered by copy insertion (eg, layout assignment).
- // Instruction id is used instead of HloInstruction* because the pointer
- // values may be recycled.
- tensorflow::gtl::FlatSet<int> existing_copies;
- for (HloComputation* computation : module->computations()) {
- for (HloInstruction* instruction : computation->instructions()) {
- if (instruction->opcode() == HloOpcode::kCopy) {
- existing_copies.insert(instruction->unique_id());
+ int64 num_existing_copies = 0;
+ if (VLOG_IS_ON(1)) {
+ for (HloComputation* computation : module->computations()) {
+ for (HloInstruction* instruction : computation->instructions()) {
+ if (instruction->opcode() == HloOpcode::kCopy) {
+ ++num_existing_copies;
+ }
}
}
}
@@ -1181,8 +1176,7 @@ StatusOr<bool> CopyInsertion::Run(HloModule* module) {
TF_DCHECK_OK(VerifyNoLiveRangeInterference(module));
DependencyHloOrdering ordering(module);
- TF_RETURN_IF_ERROR(
- RemoveUnnecessaryCopies(ordering, existing_copies, module));
+ TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, module));
TF_RETURN_IF_ERROR(AddSpecialCaseCopies(*call_graph, module));
@@ -1203,7 +1197,7 @@ StatusOr<bool> CopyInsertion::Run(HloModule* module) {
}
}
}
- VLOG(1) << "Num copies before copy-insertion: " << existing_copies.size();
+ VLOG(1) << "Num copies before copy-insertion: " << num_existing_copies;
VLOG(1) << "Num copies after copy-insertion: " << num_total_copies;
}
diff --git a/tensorflow/compiler/xla/service/copy_insertion.h b/tensorflow/compiler/xla/service/copy_insertion.h
index 6d25706089..e1973db928 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.h
+++ b/tensorflow/compiler/xla/service/copy_insertion.h
@@ -21,7 +21,6 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
#include "tensorflow/compiler/xla/service/hlo_module.h"
#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
-#include "tensorflow/core/lib/gtl/flatmap.h"
namespace xla {
@@ -79,11 +78,10 @@ class CopyInsertion : public HloPassInterface {
};
// Try to remove as many copies from the module as possible without introducing
-// live range interference. Copy instructions (identified by their unique id) in
-// the set copies_to_exclude are not considered for removal.
+// live range interference. Only copy instructions that are eligible for
+// copy elision are considered for removal.
Status RemoveUnnecessaryCopies(
- const HloOrdering& ordering,
- const tensorflow::gtl::FlatSet<int>& copies_to_exclude, HloModule* module,
+ const HloOrdering& ordering, HloModule* module,
const HloDataflowAnalysis::FusionCanShareBufferFunction&
fusion_can_share_buffer = nullptr);
diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc
index e7539759ce..7ae8799b61 100644
--- a/tensorflow/compiler/xla/service/copy_insertion_test.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc
@@ -125,21 +125,27 @@ TEST_F(CopyInsertionTest, SingleConstant) {
}
TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) {
- // Verify that an kCopy instructions which exist in the pass before
+ // Verify that kCopy instructions which change layout and exist before
// copy-insertion remain in the graph after copy-insertion.
auto module = CreateNewModule();
auto builder = HloComputation::Builder(TestName());
- HloInstruction* constant = builder.AddInstruction(
- HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
- HloInstruction* copy_1 = builder.AddInstruction(HloInstruction::CreateUnary(
- constant->shape(), HloOpcode::kCopy, constant));
- HloInstruction* copy_2 = builder.AddInstruction(HloInstruction::CreateUnary(
- constant->shape(), HloOpcode::kCopy, constant));
+ HloInstruction* constant =
+ builder.AddInstruction(HloInstruction::CreateConstant(
+ Literal::CreateR2<float>({{0.f, 2.f}, {2.f, 4.f}})));
+ auto minor_to_major = LayoutUtil::MinorToMajor(constant->shape());
+ Layout reversed_layout =
+ LayoutUtil::MakeLayoutFromMajorToMinor(minor_to_major);
+ Shape copy_shape = constant->shape();
+ *copy_shape.mutable_layout() = reversed_layout;
+ HloInstruction* copy_1 = builder.AddInstruction(
+ HloInstruction::CreateUnary(copy_shape, HloOpcode::kCopy, constant));
+ HloInstruction* copy_2 = builder.AddInstruction(
+ HloInstruction::CreateUnary(copy_shape, HloOpcode::kCopy, constant));
HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary(
constant->shape(), HloOpcode::kAdd, copy_1, copy_2));
- HloInstruction* add_copy = builder.AddInstruction(
- HloInstruction::CreateUnary(constant->shape(), HloOpcode::kCopy, add));
+ builder.AddInstruction(
+ HloInstruction::CreateUnary(add->shape(), HloOpcode::kCopy, add));
module->AddEntryComputation(builder.Build());
@@ -147,12 +153,11 @@ TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) {
InsertCopies(module.get());
- EXPECT_EQ(CountCopies(*module), 3);
+ EXPECT_EQ(CountCopies(*module), 2);
- EXPECT_EQ(module->entry_computation()->root_instruction(), add_copy);
- EXPECT_THAT(
- module->entry_computation()->root_instruction(),
- op::Copy(op::Add(op::Copy(op::Constant()), op::Copy(op::Constant()))));
+ EXPECT_EQ(module->entry_computation()->root_instruction(), add);
+ EXPECT_THAT(module->entry_computation()->root_instruction(),
+ op::Add(op::Copy(op::Constant()), op::Copy(op::Constant())));
}
TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) {
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 62c07d7fac..59a8800a7d 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -1244,7 +1244,7 @@ StatusOr<bool> HloRematerialization::Run(
// TODO(b/80249101): Instead of a separate copy elision pass, use the
// ordering from the HLO schedule directly for copy insertion.
SequentialHloOrdering ordering(module, *sequence);
- TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, {}, module));
+ TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, module));
}
// Compute peak memory usage of all computations in the module called in a