Reduce XLA compile time by ~7% for a convolutional image model:

* Added CompactPointerSet<T>, which is optimized for set size <= 1. * Changed expensive CHECKs to DCHECKS in buffer_assignment.cc * Reserve space in DFS state array before starting DFS. * Use unsigned arithmetic in DFS state maintenance. * HloInstruction: - Moved frequently used fields to start for better cache locality. - Use InlinedVector instead of vector for operand array. - Use InlinedVector instead of vector for DFS stack. * Pre-compute "is array" and "is tuple" for LogicalBuffer. * PointsToSet: - Combine two ShapeTrees into one. - Use CompactPointerSet instead of std::set to hold sources. - Use CompactPointerSet instead of std::set to hold flattened buffers. * ShapeTree: use unique_ptr instead of optional for shape storage (reduces size and destruction overhead). * Add proper const qualifiers to some FlatSet iterator methods. Co-author=jeff PiperOrigin-RevId: 165759117
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-08-18 15:11:24 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-08-18 15:15:38 -0700
commit: 5ead76420dee762a5f710fda6893075f1292d5d3 (patch)
tree: 3be8d931076c5f1386afa6014973cbca28c429eb /tensorflow/compiler/xla/service/buffer_liveness_test.cc
parent: a0544b0b8e5bc730b63b9f00fde587a3bedc80b5 (diff)
1 files changed, 11 insertions, 10 deletions
diff --git a/tensorflow/compiler/xla/service/buffer_liveness_test.cc b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
index a5f7cc0aeb..7a102d65ce 100644
--- a/tensorflow/compiler/xla/service/buffer_liveness_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_liveness_test.cc
@@ -37,10 +37,9 @@ class BufferLivenessTest : public HloTestBase {
   const LogicalBuffer& GetBuffer(const BufferLiveness& liveness,
                                  const HloInstruction* instruction,
                                  const ShapeIndex& index) {
-    const std::vector<const LogicalBuffer*>& pointed_to =
-        liveness.points_to_analysis()
-            .GetPointsToSet(instruction)
-            .element(index);
+    const auto& pointed_to = liveness.points_to_analysis()
+                                 .GetPointsToSet(instruction)
+                                 .element(index);
     CHECK_EQ(1, pointed_to.size());
     CHECK_EQ(instruction, pointed_to[0]->instruction());
     CHECK(index == pointed_to[0]->index());
@@ -72,9 +71,9 @@ class BufferLivenessTest : public HloTestBase {
                               ShapeUtil::GetSubshape(b->shape(), index)));
     // Lookup PointsTo set for instructions 'a' and 'b'.
     auto& points_to_analysis = liveness.points_to_analysis();
-    const std::vector<const LogicalBuffer*>& points_to_a =
+    const auto& points_to_a =
         points_to_analysis.GetPointsToSet(a).element(index);
-    const std::vector<const LogicalBuffer*>& points_to_b =
+    const auto& points_to_b =
         points_to_analysis.GetPointsToSet(b).element(index);
     // Make sure PointsTo sets for 'a' and 'b' are unambiguous.
     EXPECT_EQ(1, points_to_a.size());
@@ -435,8 +434,9 @@ TEST_F(BufferLivenessTest, IndependentTupleElements) {
   auto builder = HloComputation::Builder(TestName());
   // Create param0 Tuple.
   auto tuple_param0 = builder.AddInstruction(HloInstruction::CreateParameter(
-      0, ShapeUtil::MakeTupleShape(
-             {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(S32, {4})}),
+      0,
+      ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(S32, {4})}),
       "param0"));
   // Create independent computations for each tuple elememt.
 
@@ -498,8 +498,9 @@ TEST_F(BufferLivenessTest, DependentTupleElements) {
   auto builder = HloComputation::Builder(TestName());
   // Create param0 Tuple.
   auto tuple_param0 = builder.AddInstruction(HloInstruction::CreateParameter(
-      0, ShapeUtil::MakeTupleShape(
-             {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(F32, {8})}),
+      0,
+      ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShape(F32, {8}), ShapeUtil::MakeShape(F32, {8})}),
       "param0"));
   // Create dependent computations for each tuple elememt.
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-08-18 15:11:24 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-08-18 15:15:38 -0700
commit	5ead76420dee762a5f710fda6893075f1292d5d3 (patch)
tree	3be8d931076c5f1386afa6014973cbca28c429eb /tensorflow/compiler/xla/service/buffer_liveness_test.cc
parent	a0544b0b8e5bc730b63b9f00fde587a3bedc80b5 (diff)