Lower vector-matrix dot to LLVM IR if the RHS of the dot can be made

column major. The naive dot lowering to LLVM IR (already present in XLA today) is cache efficient if the dot has LHS of shape [1,K]{1,0} and RHS of shape [K x N]{0,1}. This change teaches the layout assignment pass to exploit this property by converting a constant RHS matrix to a column major layout when possible. Couple of related things I had to touch in this change: - In LayoutAssignmentTest.TupleLayout we used to generate a kCopy to satisfy the conflicting constraints between the result and the constant shapes, but with this change we change the layout of the constants themselves. So the EXPECT_FALSE is now an EXPECT_TRUE. - The extra instruction layout constraints added at the end of CpuLayoutAssignment::AddBackendConstraints seemed redundant. The layout assignment pass already tries to make all unconstrained buffers have the default row-major layout. Moreover, they were blocking this optimization in some cases by introducing conflicting constraints. - The changes to literal_util.h have to be made to deal with the Literal::Relayout calls we now get on literals of various types. PiperOrigin-RevId: 168761204
author: Sanjoy Das <sanjoy@google.com> 2017-09-14 16:06:55 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-09-14 16:10:49 -0700
commit: c82a933f449e637ee83244d2c40162e24cdde0e1 (patch)
tree: 74e0cb414c7c6bbdd67c548c35efbcab20c3e11a /tensorflow/compiler/xla/service/layout_assignment.cc
parent: dd22dbc7b9be5a47db91fa28557b911dabf720ec (diff)
1 files changed, 28 insertions, 18 deletions
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 47ca070eda..6991ad6a27 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -378,10 +378,7 @@ Status LayoutAssignment::AddMandatoryConstraints(
   // layouts.
   for (auto& instruction : computation->instructions()) {
     Shape const* shape_with_layout = nullptr;
-    if (instruction->opcode() == HloOpcode::kConstant) {
-      // Constant layouts must match the layout of their literal.
-      shape_with_layout = &instruction->literal().shape();
-    } else if (instruction->opcode() == HloOpcode::kInfeed) {
+    if (instruction->opcode() == HloOpcode::kInfeed) {
       // Infeed layouts must match the layout of the original inserted
       // instruction.
       // TODO(b/31425034): Change infeeds to be more like parameters, with
@@ -1073,7 +1070,8 @@ StatusOr<Layout> InferArrayLayout(
 
   // The instruction should not define the buffer at this index.
   TF_RET_CHECK(
-      !points_to_analysis.InstructionDefinesBufferAtIndex(instruction, index));
+      !points_to_analysis.InstructionDefinesBufferAtIndex(instruction, index))
+      << instruction->ToString();
 
   const auto& source_buffers =
       points_to_analysis.GetPointsToSet(instruction).element(index);
@@ -1237,11 +1235,10 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
       }
     }
 
-    // Set the layouts of the array shapes this instruction defines as
-    // indicated by the respective BufferLayoutConstraints. Any array shapes
-    // in the output of the instruction which are not defined by the instruction
-    // (eg, array elements in a Tuple instruction) will be assigned below via
-    // inference.
+    // Set the layouts of the array shapes this instruction defines as indicated
+    // by the respective BufferLayoutConstraints. Any array shapes in the output
+    // of the instruction which are not defined by the instruction (eg, array
+    // elements in a Tuple instruction) will be assigned below via inference.
     for (const LogicalBuffer* buffer :
          constraints.points_to_analysis().GetBuffersDefinedByInstruction(
              instruction)) {
@@ -1250,11 +1247,18 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints,
       }
 
       TF_RET_CHECK(buffer->instruction() == instruction);
-      Shape* buffer_subshape = ShapeUtil::GetMutableSubshape(
-          instruction->mutable_shape(), buffer->index());
       const Layout* buffer_layout = constraints.BufferLayout(*buffer);
       TF_RET_CHECK(buffer_layout != nullptr);
-      *buffer_subshape->mutable_layout() = *buffer_layout;
+
+      if (instruction->opcode() == HloOpcode::kConstant) {
+        // For constants, we also need to change the layout of the internal
+        // literal.
+        instruction->RelayoutConstant(*buffer_layout, buffer->index());
+      } else {
+        Shape* buffer_subshape = ShapeUtil::GetMutableSubshape(
+            instruction->mutable_shape(), buffer->index());
+        *buffer_subshape->mutable_layout() = *buffer_layout;
+      }
     }
 
     // Any remaining layouts in the output of the instruction must be
@@ -1331,13 +1335,19 @@ Status LayoutAssignment::RunOnComputation(
     int unconstrained_count = constraints.unconstrained_buffer_ids().size();
 
     // Arbitrarily pick the first unconstrained buffer and give it the default
-    // layout. By construction unconstrained_buffers() has a stable sort based
-    // on LogicalBuffer::Id.
+    // layout (or the literal layout, in case of constants). By construction
+    // unconstrained_buffers() has a stable sort based on LogicalBuffer::Id.
     const LogicalBuffer& buffer = points_to_analysis.GetBuffer(
         *constraints.unconstrained_buffer_ids().begin());
-    TF_RETURN_IF_ERROR(constraints.SetBufferLayout(
-        LayoutUtil::GetDefaultLayoutForShape(buffer.shape()), buffer,
-        /*mandatory=*/false));
+    const HloInstruction* instruction = buffer.instruction();
+    Layout new_layout =
+        instruction->opcode() == HloOpcode::kConstant
+            ? ShapeUtil::GetSubshape(instruction->literal().shape(),
+                                     buffer.index())
+                  .layout()
+            : LayoutUtil::GetDefaultLayoutForShape(buffer.shape());
+    TF_RETURN_IF_ERROR(constraints.SetBufferLayout(new_layout, buffer,
+                                                   /*mandatory=*/false));
 
     TF_RETURN_IF_ERROR(PropagateConstraints(&constraints));
author	Sanjoy Das <sanjoy@google.com>	2017-09-14 16:06:55 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-09-14 16:10:49 -0700
commit	c82a933f449e637ee83244d2c40162e24cdde0e1 (patch)
tree	74e0cb414c7c6bbdd67c548c35efbcab20c3e11a /tensorflow/compiler/xla/service/layout_assignment.cc
parent	dd22dbc7b9be5a47db91fa28557b911dabf720ec (diff)