[XLA] Initial sparse layout support

Adds SparseIndexArray and support methods to Literal. SparseIndexArray manages the array of sparse indices and is exposed by sparse Literals. Also adds HloSupportChecker classes for CPU and GPU. This will be run as the first HloPass during compilation, and verifies that the graph is supported by the backend. Currently only verifies shapes, and that the layout is not sparse since no backend supports sparse layouts yet. PiperOrigin-RevId: 181244401
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-01-08 17:36:56 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-01-08 17:40:55 -0800
commit: 6fda97ab7540fb003d46f7c9810d6aab6dbc6c19 (patch)
tree: e892e09a5717aeddcebfb617c4e45459054f2cba /tensorflow
parent: 91332633c2703727f3e776efbb4eba567cef6de1 (diff)
25 files changed, 1160 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 88de17a5ff..dcbe1fe9e5 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -302,6 +302,7 @@ cc_library(
         ":array4d",
         ":shape_tree",
         ":shape_util",
+        ":sparse_index_array",
         ":status_macros",
         ":types",
         ":util",
@@ -628,6 +629,28 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "sparse_index_array",
+    srcs = ["sparse_index_array.cc"],
+    hdrs = ["sparse_index_array.h"],
+    deps = [
+        ":array2d",
+        ":shape_util",
+        ":xla_data_proto",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "sparse_index_array_test",
+    srcs = ["sparse_index_array_test.cc"],
+    deps = [
+        ":sparse_index_array",
+        ":test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/index_util.cc b/tensorflow/compiler/xla/index_util.cc
index 2ee23927d8..ffd1fb79e9 100644
--- a/tensorflow/compiler/xla/index_util.cc
+++ b/tensorflow/compiler/xla/index_util.cc
@@ -149,4 +149,33 @@ namespace xla {
   return stride;
 }
 
+/* static */ bool IndexUtil::IndexInBounds(
+    const Shape& shape, tensorflow::gtl::ArraySlice<int64> index) {
+  int64 rank = ShapeUtil::Rank(shape);
+  if (rank != index.size()) {
+    return false;
+  }
+  for (int64 d = 0; d < rank; ++d) {
+    if (index[d] >= shape.dimensions(d)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/* static */ int IndexUtil::CompareIndices(
+    tensorflow::gtl::ArraySlice<int64> lhs,
+    tensorflow::gtl::ArraySlice<int64> rhs) {
+  int64 rank = lhs.size();
+  CHECK_EQ(rhs.size(), rank);
+  for (int64 dim = 0; dim < rank; ++dim) {
+    if (lhs[dim] < rhs[dim]) {
+      return -1;
+    } else if (lhs[dim] > rhs[dim]) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/index_util.h b/tensorflow/compiler/xla/index_util.h
index c9838966a5..0b9188e852 100644
--- a/tensorflow/compiler/xla/index_util.h
+++ b/tensorflow/compiler/xla/index_util.h
@@ -69,6 +69,18 @@ class IndexUtil {
   //    sizeof(dimension(3)) * sizeof(dimension(2)) == 4 * 10
   static int64 GetDimensionStride(const Shape& shape, int64 dimension);
 
+  // Returns true iff the given multi-index is contained in the bounds for the
+  // shape.
+  static bool IndexInBounds(const Shape& shape,
+                            tensorflow::gtl::ArraySlice<int64> index);
+
+  // Compares the given indices in lexicographic order.  lhs[0] and rhs[0] are
+  // compared first, and lhs[rank-1] and rhs[rank-1] last.  If lhs is larger,
+  // then -1 is returned. If rhs is larger, then 1 is returned.  Otherwise, 0 is
+  // returned.
+  static int CompareIndices(tensorflow::gtl::ArraySlice<int64> lhs,
+                            tensorflow::gtl::ArraySlice<int64> rhs);
+
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(IndexUtil);
 };
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 6435226fbe..ddf091e19f 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -64,6 +64,13 @@ void SetDefaultLayoutToContainer(
   return layout;
 }
 
+/* static */ Layout LayoutUtil::MakeSparseLayout(int64 max_sparse_elements) {
+  Layout layout;
+  layout.set_format(SPARSE);
+  layout.set_max_sparse_elements(max_sparse_elements);
+  return layout;
+}
+
 namespace {
 
 // Internal helper that creates a default layout for an array of the given rank.
@@ -234,7 +241,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   LayoutUtil::ClearLayout(program_shape->mutable_result());
 }
 
-/* static */ bool LayoutUtil::IsDense(const Shape& shape) {
+/* static */ bool LayoutUtil::IsDenseArray(const Shape& shape) {
   return ShapeUtil::IsArray(shape) && shape.has_layout() &&
          IsDense(shape.layout());
 }
@@ -260,7 +267,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
       shape.layout().padded_dimensions_size() == 0) {
     return false;
   }
-  CHECK(IsDense(shape));
+  CHECK(IsDenseArray(shape));
   CHECK_EQ(shape.dimensions_size(), shape.layout().padded_dimensions_size());
   for (int64 i = 0; i < shape.dimensions_size(); ++i) {
     if (shape.layout().padded_dimensions(i) > shape.dimensions(i)) {
@@ -272,21 +279,35 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 
 /* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::PaddedDimensions(
     const Shape& shape) {
-  CHECK(IsDense(shape));
+  CHECK(IsDenseArray(shape));
   return AsInt64Slice(shape.layout().padded_dimensions());
 }
 
 /* static */ int64 LayoutUtil::PaddedDimension(const Shape& shape,
                                                int64 index) {
-  CHECK(IsDense(shape));
+  CHECK(IsDenseArray(shape));
   return shape.layout().padded_dimensions(index);
 }
 
 /* static */ PaddingValue LayoutUtil::GetPaddingValue(const Shape& shape) {
-  CHECK(IsDense(shape));
+  CHECK(IsDenseArray(shape));
   return shape.layout().padding_value();
 }
 
+/* static */ bool LayoutUtil::IsSparseArray(const Shape& shape) {
+  return ShapeUtil::IsArray(shape) && shape.has_layout() &&
+         IsSparse(shape.layout());
+}
+
+/* static */ bool LayoutUtil::IsSparse(const Layout& layout) {
+  return layout.format() == SPARSE;
+}
+
+/* static */ int64 LayoutUtil::MaxSparseElements(const Layout& layout) {
+  CHECK(IsSparse(layout));
+  return layout.max_sparse_elements();
+}
+
 /* static */ bool LayoutUtil::HasLayout(const Shape& shape) {
   if (ShapeUtil::IsTuple(shape)) {
     // Tuple shape: all subshapes must have a layout.
@@ -313,7 +334,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 
 /* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor(
     const Shape& shape) {
-  CHECK(IsDense(shape));
+  CHECK(IsDenseArray(shape));
   return AsInt64Slice(shape.layout().minor_to_major());
 }
 
@@ -419,6 +440,7 @@ tensorflow::Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src,
 
 /* static */ bool LayoutUtil::AreDimensionsConsecutive(
     const Layout& layout, tensorflow::gtl::ArraySlice<int64> dims) {
+  CHECK(IsDense(layout));
   std::vector<int64> positions_in_layout;
   for (int64 dim : dims) {
     positions_in_layout.push_back(
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index f73cc95764..7c1ba4b022 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -36,6 +36,10 @@ class LayoutUtil {
   // convenience function for protobuf construction.)
   static Layout MakeLayout(tensorflow::gtl::ArraySlice<int64> minor_to_major);
 
+  // Creates a sparse layout with the given maximum number of elements. (This is
+  // a convenience function for protobuf construction.)
+  static Layout MakeSparseLayout(int64 max_sparse_elements);
+
   // Returns default layout for the given shape.
   static Layout GetDefaultLayoutForShape(const Shape& shape);
 
@@ -72,7 +76,7 @@ class LayoutUtil {
   static void ClearLayout(ProgramShape* program_shape);
 
   // Returns whether the given Shape is an array and has a dense format layout.
-  static bool IsDense(const Shape& shape);
+  static bool IsDenseArray(const Shape& shape);
 
   // Returns whether the given Layout has a dense format.
   static bool IsDense(const Layout& layout);
@@ -107,6 +111,17 @@ class LayoutUtil {
   // an array and has a dense layout.
   static PaddingValue GetPaddingValue(const Shape& shape);
 
+  // Returns whether the given Shape is an array (i.e. not a tuple) and has a
+  // sparse format layout.
+  static bool IsSparseArray(const Shape& shape);
+
+  // Returns whether the given Layout has a sparse format.
+  static bool IsSparse(const Layout& layout);
+
+  // Returns the maximum number of elements that can be stored in a sparse
+  // layout.
+  static int64 MaxSparseElements(const Layout& layout);
+
   // Returns whether the given shape has a layout. For tuple shapes, true is
   // returned only if all elements have layouts.
   static bool HasLayout(const Shape& shape);
diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index 331bb9afa9..daf4dc10ac 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc
@@ -30,6 +30,14 @@ class LayoutUtilTest : public ::testing::Test {
     *shape.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major);
     return shape;
   }
+
+  Shape MakeShapeWithSparseLayout(PrimitiveType element_type,
+                                  tensorflow::gtl::ArraySlice<int64> dimensions,
+                                  int64 max_sparse_elements) {
+    Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
+    *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
+    return shape;
+  }
 };
 
 TEST_F(LayoutUtilTest, TupleLayoutComparison) {
@@ -81,6 +89,29 @@ TEST_F(LayoutUtilTest, CopyLayoutArray) {
   EXPECT_FALSE(dst.has_layout());
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutSparse) {
+  Shape src = MakeShapeWithSparseLayout(F32, {2, 3}, 2);
+  Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
+
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+
+  // Should work if destination has no layout.
+  dst.clear_layout();
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+
+  // If source is cleared, then destination should be cleared.
+  src.clear_layout();
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_TRUE(dst.has_layout());
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_FALSE(dst.has_layout());
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutTuple) {
   Shape src = ShapeUtil::MakeTupleShape(
       {MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -100,6 +131,25 @@ TEST_F(LayoutUtilTest, CopyLayoutTuple) {
   EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutTupleSparse) {
+  Shape src = ShapeUtil::MakeTupleShape(
+      {MakeShapeWithSparseLayout(F32, {2, 3}, 4),
+       MakeShapeWithSparseLayout(F32, {42, 123}, 4),
+       ShapeUtil::MakeTupleShape(
+           {MakeShapeWithLayout(F32, {}, {}),
+            MakeShapeWithSparseLayout(F32, {1, 2, 3}, 6)})});
+  Shape dst = ShapeUtil::MakeTupleShape(
+      {MakeShapeWithLayout(F32, {2, 3}, {1, 0}),
+       MakeShapeWithLayout(F32, {42, 123}, {1, 0}),
+       ShapeUtil::MakeTupleShape(
+           {MakeShapeWithLayout(F32, {}, {}),
+            MakeShapeWithLayout(F32, {1, 2, 3}, {1, 2, 0})})});
+
+  EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+  EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
   Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
   Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
@@ -107,6 +157,13 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
   EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleSameRank) {
+  Shape src = MakeShapeWithSparseLayout(F32, {123, 42, 7}, 6);
+  Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
+  ASSERT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+  EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
   Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
   Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
@@ -116,6 +173,15 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
               ::testing::ContainsRegex("cannot copy layout from shape"));
 }
 
+TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleDifferentRank) {
+  Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
+  Shape dst = MakeShapeWithSparseLayout(F32, {2, 3}, 4);
+  auto status = LayoutUtil::CopyLayoutBetweenShapes(src, &dst);
+  EXPECT_FALSE(status.ok());
+  EXPECT_THAT(status.error_message(),
+              ::testing::ContainsRegex("cannot copy layout from shape"));
+}
+
 TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleTuple) {
   Shape src =
       ShapeUtil::MakeTupleShape({MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -221,5 +287,10 @@ TEST_F(LayoutUtilTest, DefaultLayoutGettersMajorToMinor) {
                             ShapeUtil::MakeShape(F32, {10, 20, 30, 15, 25}))));
 }
 
+TEST_F(LayoutUtilTest, SparseLayoutMaxElements) {
+  EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)),
+            101);
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index cc1735e6f2..dff5c1381a 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -94,9 +94,15 @@ Literal::Literal(const Shape& shape, bool allocate_arrays)
     Piece& piece = pair.second;
 
     piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
-    if (ShapeUtil::IsArray(piece.subshape())) {
+    const Shape& subshape = piece.subshape();
+    if (ShapeUtil::IsArray(subshape)) {
       if (allocate_arrays) {
         piece.set_buffer(new char[piece.size_bytes()]);
+        if (LayoutUtil::IsSparseArray(subshape)) {
+          piece.set_sparse_indices(new SparseIndexArray(
+              LayoutUtil::MaxSparseElements(subshape.layout()),
+              ShapeUtil::Rank(subshape)));
+        }
       } else {
         piece.set_buffer(nullptr);
       }
@@ -112,6 +118,7 @@ void Literal::DeallocateBuffers() {
       Piece& piece = pair.second;
       if (piece.buffer() != nullptr) {
         delete[] piece.buffer();
+        delete piece.sparse_indices();
       }
     }
   }
@@ -164,6 +171,15 @@ std::unique_ptr<Literal> Literal::CreateFromShape(const Shape& shape) {
   return literal;
 }
 
+const SparseIndexArray* Literal::sparse_indices(
+    const ShapeIndex& shape_index) const {
+  return piece(shape_index).sparse_indices();
+}
+
+SparseIndexArray* Literal::sparse_indices(const ShapeIndex& shape_index) {
+  return piece(shape_index).sparse_indices();
+}
+
 /* static */ std::unique_ptr<Literal> Literal::CreateFromDimensions(
     PrimitiveType primitive_type,
     tensorflow::gtl::ArraySlice<int64> dimensions) {
@@ -247,9 +263,12 @@ std::vector<Literal> Literal::DecomposeTuple() {
       }
       Piece& src_piece = piece(src_index);
 
-      // Move the respective buffer over to the element Literal.
+      // Move the respective buffer and sparse indices over to the element
+      // Literal.
       dest_piece.set_buffer(src_piece.buffer());
       src_piece.set_buffer(nullptr);
+      dest_piece.set_sparse_indices(src_piece.sparse_indices());
+      src_piece.set_sparse_indices(nullptr);
     }
   }
   // Set this literal to be nil-shaped.
@@ -406,6 +425,8 @@ Status Literal::MoveFrom(Literal&& src_literal,
     Piece& dest_piece = piece(dest_index);
     delete[] dest_piece.buffer();
     dest_piece.set_buffer(src_piece.buffer());
+    delete dest_piece.sparse_indices();
+    dest_piece.set_sparse_indices(src_piece.sparse_indices());
   }
 
   src_literal.shape_ = ShapeUtil::MakeNil();
@@ -764,7 +785,7 @@ std::unique_ptr<Literal> Literal::Transpose(
   // dimension has within the transposed array, a layout is affine if
   // MinMaj(Di) == TMinMaj(T(Di)), with TMinMaj() being the minor to major
   // vector of the affine layout.
-  CHECK(LayoutUtil::IsDense(permuted_shape));
+  CHECK(LayoutUtil::IsDenseArray(permuted_shape));
   Layout* layout = permuted_shape.mutable_layout();
   layout->clear_minor_to_major();
   for (auto index : LayoutUtil::MinorToMajor(shape())) {
@@ -1573,6 +1594,12 @@ LiteralProto Literal::ToProto() const {
     }
     piece.WriteToProto(proto_piece);
   }
+
+  if (LayoutUtil::IsSparseArray(shape())) {
+    CopyToRepeatedField(proto.mutable_sparse_indices(),
+                        sparse_indices()->data());
+  }
+
   return proto;
 }
 
@@ -1653,6 +1680,7 @@ LiteralView::LiteralView(const Literal& literal, const ShapeIndex& view_root) {
     }
     const Piece& src_piece = literal.piece(src_index);
     piece.set_buffer(src_piece.buffer());
+    piece.set_sparse_indices(src_piece.sparse_indices());
     piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
   }
 }
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index dc29c6359c..50e25bbdd0 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/ptr_util.h"
 #include "tensorflow/compiler/xla/shape_tree.h"
 #include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/sparse_index_array.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -103,6 +104,12 @@ class Literal {
   tensorflow::gtl::MutableArraySlice<NativeT> data(
       const ShapeIndex& shape_index = {});
 
+  // Returns a pointer to the sparse index array. Returns nullptr if the literal
+  // is not a sparse array.
+  const SparseIndexArray* sparse_indices(
+      const ShapeIndex& shape_index = {}) const;
+  SparseIndexArray* sparse_indices(const ShapeIndex& shape_index = {});
+
   // Returns a pointer to (or size of) the underlying buffer holding the array
   // at the given shape index. CHECKs if the subshape of the literal at the
   // given ShapeIndex is not array.
@@ -160,6 +167,56 @@ class Literal {
   // array.
   string GetR1U8AsString() const;
 
+  // Creates a literal with a sparse layout and the given indices and values.
+  // The shape is initialized from the given dimensions.  The minor dimension of
+  // the indices array must equal the rank of the shape (i.e. size of the
+  // dimensions array). The major dimension of the indices array must equal the
+  // number of elements in the values array. The maximum number of elements in
+  // the array is taken from the max_indices() value of the index array.
+  //
+  // XLA assumes that sparse literals are in sorted order for all operations. If
+  // the `sort` argument is true, then the indices and values will be sorted
+  // while copying them into the literal. If you have ensured that the indices
+  // and values are already sorted, then you may set the `sort` argument to
+  // false to skip the sorting step.
+  //
+  // For example:
+  //
+  //   CreateSparse(
+  //     {12, 12, 12},
+  //     SparseIndexArray(10, 3,
+  //                      Array2D{
+  //                        {0, 1, 2},
+  //                        {3, 4, 5},
+  //                        {6, 7, 8},
+  //                        {9, 10, 11},
+  //                      }),
+  //     {1.0, 2.0 3.0, 4.0})
+  //
+  // This creates an array with shape F64[12,12,12]sparse{10}, that has the
+  // following non-zero values:
+  //
+  //     [0,  1,  2]: 1.0
+  //     [3,  4,  5]: 2.0
+  //     [6,  7,  8]: 3.0
+  //     [9, 10, 11]: 4.0
+  //
+  template <typename NativeT>
+  static std::unique_ptr<Literal> CreateSparse(
+      tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices,
+      tensorflow::gtl::ArraySlice<NativeT> values, bool sort = true);
+
+  // Populates a literal with a sparse layout with the given indices and values.
+  // Each index in the indices array is CHECKed against the dimensions in the
+  // literal's shape.  If sort is true, then the indices and values will be
+  // sorted.  If sort is false, then the indices and values are assumed to
+  // already be in sorted order.  See CreateSparse for an example of how data
+  // are populated.
+  template <typename NativeT>
+  void PopulateSparse(SparseIndexArray indices,
+                      tensorflow::gtl::ArraySlice<NativeT> values,
+                      bool sort = true);
+
   // Creates a new Literal object with the shape specified as parameter.
   // The content of the literal values is the default value of the primitive
   // type of literal itself (0 for numeric types, and false for predicates).
@@ -358,7 +415,7 @@ class Literal {
            const ShapeIndex& shape_index, NativeT value);
 
   // Overloads of Get and Set for array literals. CHECKs if the literal is not
-  // array-shaped.
+  // array-shaped and dense.
   template <typename NativeT>
   NativeT Get(tensorflow::gtl::ArraySlice<int64> multi_index) const;
   template <typename NativeT>
@@ -408,6 +465,8 @@ class Literal {
   // This function is useful if you want a polymorphic representation
   // of the tensor's elements (turning it to a string for something
   // like representation in a protobuf).
+  //
+  // This literal must have a dense layout.
   void EachCellAsString(
       const std::function<void(tensorflow::gtl::ArraySlice<int64> indices,
                                const string& value)>& per_cell) const;
@@ -447,6 +506,8 @@ class Literal {
   //
   // generator must be a callable of the type
   // NativeT(tensorflow::gtl::ArraySlice<int64> indexes) or compatible.
+  //
+  // This literal must have a dense layout.
   template <typename NativeT, typename FnType>
   Status Populate(const FnType& generator);
 
@@ -485,10 +546,12 @@ class Literal {
   // admonishments about floating-point equality checks apply.  We expect you to
   // use this to check for complex values that can be expressed precisely as
   // float pairs e.g. (-0.5, 1.0).
+  //
+  // This literal must have a dense layout.
   bool IsAllComplex(complex64 value) const;
 
   // Returns whether this literal is zero at the specified index. This literal
-  // must be an array.
+  // must be an array with a dense layout.
   bool IsZero(tensorflow::gtl::ArraySlice<int64> indices) const;
 
   // Return the count of the elements in the array at the given shape index in
@@ -563,6 +626,14 @@ class Literal {
     char* buffer() const { return buffer_; }
     void set_buffer(char* buffer) { buffer_ = buffer; }
 
+    // The array of multi-indices that provide the locations of non-zero
+    // elements in a sparse array.  Only used if
+    // LayoutUtil::IsSparseArray(shape()) is true.
+    SparseIndexArray* sparse_indices() const { return sparse_indices_; }
+    void set_sparse_indices(SparseIndexArray* sparse_indices) {
+      sparse_indices_ = sparse_indices;
+    }
+
     // Gets or sets the subshape of this piece. This reference points to a
     // subshape within the shape in the containing Literal (Literal::shape_).
     const Shape& subshape() const { return *subshape_; }
@@ -598,6 +669,9 @@ class Literal {
     // For array-shaped pieces, this is the buffer holding the literal data.
     char* buffer_ = nullptr;
 
+    // For sparse arrays, this is the array of indices.
+    SparseIndexArray* sparse_indices_ = nullptr;
+
     // The shape of piece. This points into the shape of the containing Literal
     // (Literal::shape_).
     const Shape* subshape_ = nullptr;
@@ -837,6 +911,21 @@ template <typename NativeT>
 }
 
 template <typename NativeT>
+/* static */ std::unique_ptr<Literal> Literal::CreateSparse(
+    tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices,
+    tensorflow::gtl::ArraySlice<NativeT> values, bool sort) {
+  int64 num_elements = values.size();
+  int64 rank = dimensions.size();
+  CHECK_EQ(num_elements, indices.index_count());
+  CHECK_EQ(rank, indices.rank());
+  auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithSparseLayout(
+      primitive_util::NativeToPrimitiveType<NativeT>(), dimensions,
+      indices.max_indices()));
+  literal->PopulateSparse(indices, values, sort);
+  return literal;
+}
+
+template <typename NativeT>
 /* static */ std::unique_ptr<Literal> Literal::CreateR4(
     std::initializer_list<std::initializer_list<
         std::initializer_list<std::initializer_list<NativeT>>>>
@@ -1044,11 +1133,35 @@ void Literal::PopulateR4FromArray4D(const Array4D<NativeT>& values) {
   PopulateFromArray(values);
 }
 
+template <typename NativeT>
+void Literal::PopulateSparse(SparseIndexArray indices,
+                             tensorflow::gtl::ArraySlice<NativeT> values,
+                             bool sort) {
+  CHECK(LayoutUtil::IsSparseArray(shape()));
+  int rank = ShapeUtil::Rank(shape());
+  CHECK_EQ(indices.rank(), rank);
+  int64 max_elements = LayoutUtil::MaxSparseElements(shape().layout());
+  CHECK_LE(indices.max_indices(), max_elements);
+  int64 num_elements = values.size();
+  CHECK_LE(num_elements, max_elements);
+  CHECK_EQ(num_elements, indices.index_count());
+  auto root_data = root_piece().data<NativeT>();
+  root_data.remove_suffix(max_elements - values.size());
+  std::copy(values.begin(), values.end(), root_data.begin());
+  *this->root_piece().sparse_indices() = std::move(indices);
+  if (sort) {
+    auto root_data = this->root_piece().data<NativeT>();
+    root_data.remove_suffix(root_data.size() - num_elements);
+    this->root_piece().sparse_indices()->SortWithValues(root_data);
+  }
+  DCHECK(this->root_piece().sparse_indices()->Validate(shape()));
+}
+
 template <typename NativeT, typename FnType>
 Status Literal::Populate(const FnType& generator) {
   const Shape& this_shape = shape();
   const int64 rank = ShapeUtil::Rank(this_shape);
-  TF_RET_CHECK(ShapeUtil::IsArray(this_shape));
+  TF_RET_CHECK(LayoutUtil::IsDenseArray(this_shape));
   TF_RET_CHECK(this_shape.element_type() ==
                primitive_util::NativeToPrimitiveType<NativeT>());
   tensorflow::gtl::MutableArraySlice<NativeT> literal_data = data<NativeT>();
diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc
index 4974ead048..29efb4312f 100644
--- a/tensorflow/compiler/xla/literal_util_test.cc
+++ b/tensorflow/compiler/xla/literal_util_test.cc
@@ -193,6 +193,34 @@ TEST_F(LiteralUtilTest, CreateR3FromArray3d) {
   ASSERT_EQ(expected, result);
 }
 
+TEST_F(LiteralUtilTest, CreateSparse) {
+  std::vector<int64> dimensions = {8, 8, 8};
+  Array2D<int64> indices = {
+      {3, 4, 5},
+      {1, 2, 3},
+      {2, 3, 4},
+      {3, 5, 6},
+  };
+  std::vector<int64> values = {7, 8, 9, 10};
+  auto literal = Literal::CreateSparse<int64>(
+      dimensions, SparseIndexArray(indices.n1() + 3, indices), values);
+
+  Array2D<int64> expected_indices = {
+      {1, 2, 3},
+      {2, 3, 4},
+      {3, 4, 5},
+      {3, 5, 6},
+  };
+  std::vector<int64> expected_values = {8, 9, 7, 10};
+
+  EXPECT_EQ(literal->sparse_indices()->data(),
+            tensorflow::gtl::ArraySlice<int64>(
+                expected_indices.data(), expected_indices.num_elements()));
+  EXPECT_EQ(tensorflow::gtl::ArraySlice<int64>(literal->data<int64>().data(),
+                                               expected_values.size()),
+            tensorflow::gtl::ArraySlice<int64>(expected_values));
+}
+
 TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) {
   // clang-format off
   auto literal = Literal::CreateR4Projected<float>({
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 14c86e2e72..a6b3c0c7c4 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -81,6 +81,7 @@ cc_library(
         ":conv_canonicalization",
         ":cpu_copy_insertion",
         ":cpu_executable",
+        ":cpu_hlo_support_checker",
         ":cpu_instruction_fusion",
         ":cpu_layout_assignment",
         ":cpu_options",
@@ -873,6 +874,32 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "cpu_hlo_support_checker",
+    srcs = ["cpu_hlo_support_checker.cc"],
+    hdrs = ["cpu_hlo_support_checker.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "cpu_hlo_support_checker_test",
+    srcs = ["cpu_hlo_support_checker_test.cc"],
+    deps = [
+        ":cpu_hlo_support_checker",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 8e6562c237..42fd4f100b 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -50,6 +50,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
@@ -258,6 +259,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
   // Optimization pipeline.
   HloPassPipeline pipeline("CPU");
   pipeline.AddInvariantChecker<HloVerifier>(ShapeSizeBytesFunction());
+  pipeline.AddPass<CpuHloSupportChecker>();
 
   ReducePrecisionInsertion::AddPasses(
       &pipeline, module->config().debug_options(),
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
new file mode 100644
index 0000000000..7bd4741a04
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+StatusOr<bool> CpuHloSupportChecker::Run(HloModule* module) {
+  for (auto* computation : module->computations()) {
+    for (const auto& instruction : computation->instructions()) {
+      TF_RETURN_IF_ERROR(
+          ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+      TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
+          instruction->shape(),
+          [&instruction](const Shape& subshape, const ShapeIndex&) {
+            if (LayoutUtil::IsSparseArray(subshape)) {
+              return xla::Unimplemented(
+                  "CPU backend does not support HLO instruction %s with shape "
+                  "containing a sparse layout: %s",
+                  instruction->ToString().c_str(),
+                  ShapeUtil::HumanStringWithLayout(instruction->shape())
+                      .c_str());
+            }
+            return Status::OK();
+          }));
+    }
+  }
+  return false;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
new file mode 100644
index 0000000000..2271af7b24
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// This pass should run early in the HLO pipeline and checks for HLO constructs
+// which are not supported by the CPU backend and cannot be removed via HLO
+// transformations (eg, sparse layouts).
+class CpuHloSupportChecker : public HloPassInterface {
+ public:
+  CpuHloSupportChecker() = default;
+  ~CpuHloSupportChecker() override = default;
+
+  tensorflow::StringPiece name() const override {
+    return "cpu_hlo_support_checker";
+  }
+
+  // Note: always returns false (no instructions are ever modified by this
+  // pass).
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
new file mode 100644
index 0000000000..0f463e6de6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+class CpuHloSupportCheckerTest : public HloTestBase {
+ protected:
+  CpuHloSupportChecker& checker() { return checker_; }
+
+ private:
+  CpuHloSupportChecker checker_;
+};
+
+TEST_F(CpuHloSupportCheckerTest, Add) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(checker().Run(module.get()).status());
+}
+
+TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) {
+  HloComputation::Builder builder(TestName());
+  const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, sparse_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, sparse_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      sparse_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  Status status = checker().Run(module.get()).status();
+  ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("CPU backend does not support"));
+  EXPECT_THAT(status.error_message(),
+              HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index a86d3583a6..69ccc7179f 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -437,6 +437,7 @@ cc_library(
         ":fusion_merger",
         ":gpu_copy_insertion",
         ":gpu_executable",
+        ":gpu_hlo_support_checker",
         ":gpu_layout_assignment",
         ":hlo_schedule",
         ":instruction_fusion",
@@ -610,6 +611,32 @@ tf_cc_test(
     ],
 )
 
+cc_library(
+    name = "gpu_hlo_support_checker",
+    srcs = ["gpu_hlo_support_checker.cc"],
+    hdrs = ["gpu_hlo_support_checker.h"],
+    deps = [
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/core:lib",
+    ],
+)
+
+tf_cc_test(
+    name = "gpu_hlo_support_checker_test",
+    srcs = ["gpu_hlo_support_checker_test.cc"],
+    deps = [
+        ":gpu_hlo_support_checker",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:test",
+    ],
+)
+
 # -----------------------------------------------------------------------------
 
 filegroup(
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 9f34866ff5..93db9ebbee 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -39,6 +39,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h"
 #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -137,6 +138,7 @@ tensorflow::Status OptimizeHloModule(
   {
     HloPassPipeline pipeline("optimization");
     pipeline.AddInvariantChecker<HloVerifier>(shape_size_function);
+    pipeline.AddPass<GpuHloSupportChecker>();
     ReducePrecisionInsertion::AddPasses(
         &pipeline, hlo_module->config().debug_options(),
         ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
new file mode 100644
index 0000000000..4944c41f7d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+StatusOr<bool> GpuHloSupportChecker::Run(HloModule* module) {
+  for (auto* computation : module->computations()) {
+    for (const auto& instruction : computation->instructions()) {
+      TF_RETURN_IF_ERROR(
+          ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+      TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
+          instruction->shape(),
+          [&instruction](const Shape& subshape, const ShapeIndex&) {
+            if (LayoutUtil::IsSparseArray(subshape)) {
+              return xla::Unimplemented(
+                  "GPU backend does not support HLO instruction %s with shape "
+                  "containing a sparse layout: %s",
+                  instruction->ToString().c_str(),
+                  ShapeUtil::HumanStringWithLayout(instruction->shape())
+                      .c_str());
+            }
+            return Status::OK();
+          }));
+    }
+  }
+  return false;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
new file mode 100644
index 0000000000..d9550f81b5
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// his pass should run early in the HLO pipeline and checks for HLO constructs
+// which are not supported by the GPU backend and cannot be removed via HLO
+// transformations (eg, sparse layouts).
+class GpuHloSupportChecker : public HloPassInterface {
+ public:
+  GpuHloSupportChecker() = default;
+  ~GpuHloSupportChecker() override = default;
+
+  tensorflow::StringPiece name() const override {
+    return "gpu_hlo_support_checker";
+  }
+
+  // Note: always returns false (no instructions are ever modified by this
+  // pass).
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
new file mode 100644
index 0000000000..0a4089df4c
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+class GpuHloSupportCheckerTest : public HloTestBase {
+ protected:
+  GpuHloSupportChecker& checker() { return checker_; }
+
+ private:
+  GpuHloSupportChecker checker_;
+};
+
+TEST_F(GpuHloSupportCheckerTest, Add) {
+  HloComputation::Builder builder(TestName());
+  const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, scalar_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      scalar_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  TF_ASSERT_OK(checker().Run(module.get()).status());
+}
+
+TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) {
+  HloComputation::Builder builder(TestName());
+  const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
+  HloInstruction* param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, sparse_shape, "param0"));
+  HloInstruction* param1 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, sparse_shape, "param1"));
+  builder.AddInstruction(HloInstruction::CreateBinary(
+      sparse_shape, HloOpcode::kAdd, param0, param1));
+  auto module = CreateNewModule();
+  module->AddEntryComputation(builder.Build());
+
+  Status status = checker().Run(module.get()).status();
+  ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
+  EXPECT_THAT(status.error_message(),
+              HasSubstr("GPU backend does not support"));
+  EXPECT_THAT(status.error_message(),
+              HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 3d4080e353..290ea9b496 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -84,7 +84,7 @@ bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) {
     if (lhs.layout().format() != rhs.layout().format()) {
       return false;
     }
-    if (LayoutUtil::IsDense(lhs)) {
+    if (LayoutUtil::IsDenseArray(lhs)) {
       if (!ContainersEqual(LayoutUtil::MinorToMajor(lhs),
                            LayoutUtil::MinorToMajor(rhs))) {
         VLOG(3) << "CompareShapes: lhs layout != rhs layout";
@@ -202,6 +202,17 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
   return MakeShapeWithLayout(element_type, dimensions, layout);
 }
 
+/* static */ Shape ShapeUtil::MakeShapeWithSparseLayout(
+    PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
+    int64 max_sparse_elements) {
+  DCHECK_NE(TUPLE, element_type);
+  DCHECK_NE(OPAQUE, element_type);
+  Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
+  *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
+  TF_DCHECK_OK(ShapeUtil::ValidateShape(shape));
+  return shape;
+}
+
 /* static */ Shape
 ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
     const Shape& shape) {
@@ -249,7 +260,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
 }
 
 /* static */ void ShapeUtil::AppendMajorDimension(int bound, Shape* shape) {
-  CHECK(LayoutUtil::IsDense(*shape));
+  CHECK(LayoutUtil::IsDenseArray(*shape));
   shape->mutable_layout()->add_minor_to_major(Rank(*shape));
   shape->add_dimensions(bound);
   TF_DCHECK_OK(ValidateShape(*shape));
@@ -658,23 +669,55 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
   TF_DCHECK_OK(ValidateShape(shape));
   DCHECK_NE(OPAQUE, shape.element_type());
   if (shape.element_type() == TUPLE) {
-    CHECK_GT(pointer_size, 0);
-    return pointer_size * shape.tuple_shapes_size();
+    return ByteSizeOfTupleIndexTable(shape, pointer_size);
+  }
+  int64 byte_size = ByteSizeOfElements(shape);
+  if (LayoutUtil::IsSparseArray(shape)) {
+    byte_size += ByteSizeOfSparseIndices(shape);
   }
+  return byte_size;
+}
+
+/* static */ int64 ShapeUtil::ByteSizeOfTupleIndexTable(const Shape& shape,
+                                                        int64 pointer_size) {
+  TF_DCHECK_OK(ValidateShape(shape));
+  DCHECK_EQ(TUPLE, shape.element_type());
+  CHECK_GT(pointer_size, 0);
+  return pointer_size * shape.tuple_shapes_size();
+}
+
+/* static */ int64 ShapeUtil::ByteSizeOfElements(const Shape& shape) {
+  TF_DCHECK_OK(ValidateShape(shape));
+  DCHECK(ShapeUtil::IsArray(shape));
   int64 allocated_element_count;
-  if (shape.layout().padded_dimensions_size() > 0) {
-    CHECK_EQ(Rank(shape), shape.layout().padded_dimensions_size());
-    allocated_element_count = 1;
-    for (int64 dimension_size : shape.layout().padded_dimensions()) {
-      allocated_element_count *= dimension_size;
-    }
+
+  if (LayoutUtil::IsSparseArray(shape)) {
+    allocated_element_count = LayoutUtil::MaxSparseElements(shape.layout());
   } else {
-    allocated_element_count = ElementsIn(shape);
+    CHECK(LayoutUtil::IsDenseArray(shape));
+    tensorflow::gtl::ArraySlice<int64> padded_dimensions =
+        LayoutUtil::PaddedDimensions(shape);
+    if (!padded_dimensions.empty()) {
+      CHECK_EQ(Rank(shape), padded_dimensions.size());
+      allocated_element_count = 1;
+      for (int64 dimension_size : padded_dimensions) {
+        allocated_element_count *= dimension_size;
+      }
+    } else {
+      allocated_element_count = ElementsIn(shape);
+    }
   }
   return allocated_element_count *
          ByteSizeOfPrimitiveType(shape.element_type());
 }
 
+/* static */ int64 ShapeUtil::ByteSizeOfSparseIndices(const Shape& shape) {
+  TF_DCHECK_OK(ValidateShape(shape));
+  DCHECK(LayoutUtil::IsSparseArray(shape));
+  return LayoutUtil::MaxSparseElements(shape.layout()) *
+         ShapeUtil::Rank(shape) * sizeof(int64);
+}
+
 /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal(
     const Shape& shape) {
   if (shape.element_type() == TUPLE) {
@@ -900,7 +943,7 @@ Status ForEachMutableSubshapeHelper(
     new_shape.add_dimensions(dim);
   }
   if (shape.has_layout()) {
-    CHECK(LayoutUtil::IsDense(shape));
+    CHECK(LayoutUtil::IsDenseArray(shape));
     Layout* new_layout = new_shape.mutable_layout();
     new_layout->set_format(DENSE);
     new_layout->clear_minor_to_major();
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 59bdffee5a..453d4ec047 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -143,7 +143,10 @@ std::ostream& operator<<(std::ostream& out, const ShapeIndexView& shape_index);
 class ShapeUtil {
  public:
   // Returns the number of elements are contained within the provided shape;
-  // e.g. for rank 0 (scalars) the result is always 1.
+  // e.g. for rank 0 (scalars) the result is always 1. Note that sparse shapes
+  // may not actually be able to store this number of elements. See
+  // LayoutUtil::MaxSparseElements(shape) to obtain the maximum number of
+  // elements that can be stored in a sparse shape.
   // Precondition: !IsTuple(shape)
   static int64 ElementsIn(const Shape& shape);
 
@@ -164,6 +167,27 @@ class ShapeUtil {
   // Precondition: !ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)
   static int64 ByteSizeOfPrimitiveType(PrimitiveType primitive_type);
 
+  // Returns the number of bytes required to store the tuple member pointers for
+  // a allocation of shape. The `shape` must be a TUPLE shape, and
+  // `pointer_size` must be larger than zero.
+  static int64 ByteSizeOfTupleIndexTable(const Shape& shape,
+                                         int64 pointer_size);
+
+  // Returns the number of bytes required for the elements in an allocation of
+  // `shape`, which must be an array shape. The return value does not include
+  // the bytes needed to store sparse indices. Dense shapes use a separate
+  // memory location for each element, and so for these shapes,
+  // `ByteSizeOf(shape) == ByteSizeOfElements(shape)`. For dense shapes, this
+  // size also includes padding if present in the layout. For sparse shapes,
+  // `ByteSizeOf(shape) == ByteSizeOfElements(shape) +
+  // ByteSizeOfSparseindices(shape)`.
+  static int64 ByteSizeOfElements(const Shape& shape);
+
+  // Returns the number of bytes required for the sparse indices in an
+  // allocation of shape. The shape must be an array shape. The return value
+  // does not include the bytes needed to store sparse indices.
+  static int64 ByteSizeOfSparseIndices(const Shape& shape);
+
   // Returns a human-readable string that represents the given shape, with or
   // without layout. e.g. "f32[42x12] {0, 1}" or "f32[64]".
   static string HumanString(const Shape& shape);
@@ -269,6 +293,10 @@ class ShapeUtil {
       PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
       tensorflow::gtl::ArraySlice<int64> minor_to_major);
 
+  static Shape MakeShapeWithSparseLayout(
+      PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
+      int64 max_sparse_elements);
+
   // Constructs a new shape with major-first layout (i.e. {n, n-1, ..., 0}).
   static Shape MakeShapeWithDescendingLayout(
       PrimitiveType element_type,
diff --git a/tensorflow/compiler/xla/sparse_index_array.cc b/tensorflow/compiler/xla/sparse_index_array.cc
new file mode 100644
index 0000000000..e7738e6790
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array.cc
@@ -0,0 +1,110 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/sparse_index_array.h"
+
+#include "tensorflow/compiler/xla/index_util.h"
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace xla {
+
+SparseIndexArray::SparseIndexArray() : rank_(0), max_indices_(0) {}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
+                                   std::vector<int64> indices)
+    : indices_(std::move(indices)), rank_(rank), max_indices_(max_indices) {
+  CHECK_GT(rank_, 0);
+  CHECK_EQ(indices_.size() % rank_, 0)
+      << "indices_.size(): " << indices_.size() << ", rank_: " << rank_;
+  CHECK_LT(index_count(), max_indices_);
+}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
+                                   tensorflow::gtl::ArraySlice<int64> indices)
+    : SparseIndexArray(max_indices, rank,
+                       std::vector<int64>(indices.begin(), indices.end())) {}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices,
+                                   const Array2D<int64>& indices)
+    : SparseIndexArray(max_indices, indices.n2(),
+                       std::vector<int64>(indices.begin(), indices.end())) {}
+
+int64 SparseIndexArray::index_count() const {
+  CHECK_GT(rank_, 0);
+  CHECK_EQ(indices_.size() % rank_, 0);
+  return indices_.size() / rank_;
+}
+
+tensorflow::gtl::ArraySlice<int64> SparseIndexArray::At(
+    int64 sparse_index_number) const {
+  CHECK_GT(rank_, 0);
+  CHECK_GE(sparse_index_number, 0);
+  CHECK_LE(rank_ * sparse_index_number + rank_, indices_.size());
+  return tensorflow::gtl::ArraySlice<int64>(
+      indices_.data() + rank_ * sparse_index_number, rank_);
+}
+
+tensorflow::gtl::MutableArraySlice<int64> SparseIndexArray::At(
+    int64 sparse_index_number) {
+  CHECK_GT(rank_, 0);
+  CHECK_GE(sparse_index_number, 0);
+  CHECK_LE(rank_ * sparse_index_number + rank_, indices_.size());
+  return tensorflow::gtl::MutableArraySlice<int64>(
+      indices_.data() + rank_ * sparse_index_number, rank_);
+}
+
+void SparseIndexArray::Append(tensorflow::gtl::ArraySlice<int64> index) {
+  CHECK_GT(rank_, 0);
+  CHECK_EQ(index.size(), rank_);
+  indices_.insert(indices_.end(), index.begin(), index.end());
+}
+
+void SparseIndexArray::Clear() { indices_.clear(); }
+
+void SparseIndexArray::Resize(int64 num_indices) {
+  CHECK_GT(rank_, 0);
+  indices_.resize(rank_ * num_indices);
+}
+
+bool SparseIndexArray::Validate(const Shape& shape) const {
+  if (rank_ == 0 || rank_ != ShapeUtil::Rank(shape)) {
+    return false;
+  }
+  int64 num_indices = index_count();
+  if (num_indices > LayoutUtil::MaxSparseElements(shape.layout())) {
+    return false;
+  }
+  if (num_indices < 2) {
+    return true;
+  }
+  tensorflow::gtl::ArraySlice<int64> last = At(0);
+  if (!IndexUtil::IndexInBounds(shape, last)) {
+    return false;
+  }
+  for (int64 n = 1; n < num_indices; ++n) {
+    tensorflow::gtl::ArraySlice<int64> next = At(n);
+    if (!IndexUtil::IndexInBounds(shape, next)) {
+      return false;
+    }
+    if (IndexUtil::CompareIndices(last, next) >= 0) {
+      return false;
+    }
+    last = next;
+  }
+  return true;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/sparse_index_array.h b/tensorflow/compiler/xla/sparse_index_array.h
new file mode 100644
index 0000000000..f67f34760e
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array.h
@@ -0,0 +1,176 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Utility class for managing sparse array indices.
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/index_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+
+// Encapsulates the array of indices for a sparse array.  A SparseIndexArray
+// contain indices for up to `max_indices` elements of a sparse array.  Each
+// sparse index is an array of `rank` int64 value that gives the location of a
+// value within a sparse array.  Note that the dimensions of the array are not
+// checked (except for the rank).  To avoid confusion, we refer to the position
+// of an index within a SparseIndexArray as a sparse index number.
+class SparseIndexArray {
+ public:
+  SparseIndexArray();
+  SparseIndexArray(const SparseIndexArray&) = default;
+  SparseIndexArray(SparseIndexArray&&) = default;
+  SparseIndexArray& operator=(const SparseIndexArray&) = default;
+  SparseIndexArray& operator=(SparseIndexArray&&) = default;
+
+  // Constructs a SparseIndexArray that can hold up to `max_indices` sparse
+  // indices, with an initial contents obtained from the given array.  The rank
+  // is taken from the minor dimension of the array.  The major dimension of the
+  // array must not exceed `max_indices`.
+  SparseIndexArray(int64 max_indices, const Array2D<int64>& indices);
+
+  // Like above, but the array is flattened.  For example, the following are
+  // equivalent:
+  //
+  //  SparseIndexArray(10, 3,
+  //                   Array2D{
+  //                     {0, 1, 2},
+  //                     {3, 4, 5},
+  //                     {6, 7, 8},
+  //                     {9, 10, 11},
+  //                   })
+  //
+  //  SparseIndexArray(10, 3,
+  //                   {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+  //
+  SparseIndexArray(int64 max_indices, int64 rank,
+                   std::vector<int64> indices = {});
+  SparseIndexArray(int64 max_indices, int64 rank,
+                   tensorflow::gtl::ArraySlice<int64> indices);
+
+  // Returns the number of elements represented by the indices stored in the
+  // array.
+  int64 index_count() const;
+
+  // Returns a slice that refers to the given sparse index number. The argument
+  // must be in the range [0, element_count()).
+  tensorflow::gtl::ArraySlice<int64> At(int64 sparse_index_number) const;
+  tensorflow::gtl::MutableArraySlice<int64> At(int64 sparse_index_number);
+
+  // Adds the given index at the end of the array.  The new size of the
+  // SparseIndexArray must not exceed `max_indices`.
+  void Append(tensorflow::gtl::ArraySlice<int64> index);
+
+  // Removes all indices from the array.
+  void Clear();
+
+  // Resizes the array to contain the given number of sparse indices.  The new
+  // size must be smaller than `max_indices`.  If the new size is larger than
+  // the old size, the value of the new indices is not specified.
+  void Resize(int64 num_indices);
+
+  // Returns true iff all indices are unique and occur in sorted order, and are
+  // valid for the given shape.
+  bool Validate(const Shape& shape) const;
+
+  int64 rank() const { return rank_; }
+  int64 max_indices() const { return max_indices_; }
+
+  // Returns a pointer to the int64 array that holds the sparse indices.
+  tensorflow::gtl::MutableArraySlice<int64> mutable_data() { return &indices_; }
+  tensorflow::gtl::ArraySlice<int64> data() const { return indices_; }
+
+  // Sorts this sparse index array along with the set of corresponding values.
+  // The indices and values are sorted in the lexicographic order of the
+  // indices, from smallest to largest.
+  //
+  // For example:
+  //
+  //   std::vector<float> v{10.0, 11.0, 12.0};
+  //   SparseIndexArray a(10, 3,
+  //                      {{3, 4, 5},
+  //                       {1, 2, 3},
+  //                       {2, 3, 4}});
+  //   a.SortWithValues(&v);
+  //   // Prints "11.0, 12.0, 10.0":
+  //   std::cout << v[0] << ", " << v[1] << ", " << v[2] << std::endl;
+  //
+  template <typename NativeT>
+  void SortWithValues(tensorflow::gtl::MutableArraySlice<NativeT> values);
+
+ private:
+  std::vector<int64> indices_;
+  int64 rank_;
+  int64 max_indices_;
+};
+
+template <typename NativeT>
+void SparseIndexArray::SortWithValues(
+    tensorflow::gtl::MutableArraySlice<NativeT> values) {
+  int64 num_elements = index_count();
+  CHECK_EQ(values.size(), num_elements);
+  std::vector<int64> sort_order;
+  sort_order.reserve(num_elements);
+  for (int64 i = 0; i < num_elements; ++i) {
+    sort_order.push_back(i);
+  }
+  auto sort_order_less = [this](int64 lhs, int64 rhs) {
+    return IndexUtil::CompareIndices(At(lhs), At(rhs)) < 0;
+  };
+  std::sort(sort_order.begin(), sort_order.end(), sort_order_less);
+
+  // Reorder the array elements according to sort_order.  Work through the array
+  // and follow cycles so we can do the reorder in-place.
+  tensorflow::gtl::InlinedVector<int64, 8> saved_index(rank());
+  for (int64 i = 0; i < num_elements; ++i) {
+    // sort_order[i] == -1 indicates the element has already been copied.
+    if (sort_order[i] < 0) {
+      continue;
+    } else if (i == sort_order[i]) {
+      // The element is already in sorted order.
+      sort_order[i] = -1;
+      continue;
+    }
+
+    std::copy_n(At(i).begin(), rank(), saved_index.begin());
+    NativeT saved_value = values[i];
+    int64 j = i;
+    for (;;) {
+      if (sort_order[j] == i) {
+        std::copy_n(saved_index.begin(), rank(), At(j).begin());
+        values[j] = saved_value;
+        sort_order[j] = -1;
+        break;
+      }
+
+      std::copy_n(At(sort_order[j]).begin(), rank(), At(j).begin());
+      values[j] = values[sort_order[j]];
+
+      int64 k = sort_order[j];
+      sort_order[j] = -1;
+      j = k;
+    }
+  }
+}
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
diff --git a/tensorflow/compiler/xla/sparse_index_array_test.cc b/tensorflow/compiler/xla/sparse_index_array_test.cc
new file mode 100644
index 0000000000..7377f88958
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array_test.cc
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/sparse_index_array.h"
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/test.h"
+
+namespace xla {
+namespace {
+
+TEST(SparseIndexArrayTest, Sort) {
+  SparseIndexArray a(10, 3);
+  a.Append({2, 3, 4});
+  a.Append({3, 4, 5});
+  a.Append({1, 2, 3});
+  a.Append({5, 6, 7});
+  a.Append({4, 5, 6});
+  a.Append({6, 7, 8});
+  std::vector<double> values = {
+      12.0, 13.0, 11.0, 15.0, 14.0, 16.0,
+  };
+  a.SortWithValues<double>(&values);
+  ASSERT_EQ(a.data(), std::vector<int64>({1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6, 5,
+                                          6, 7, 6, 7, 8}));
+  ASSERT_EQ(values, std::vector<double>({11.0, 12.0, 13.0, 14.0, 15.0, 16.0}));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index e34f138b6e..3aea021753 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -120,6 +120,9 @@ enum Format {
   // The default layout, with exactly one storage location per element (ignoring
   // padding).
   DENSE = 1;
+  // A sparsely encoded layout, providing only the index/value pairs of non-zero
+  // elements.
+  SPARSE = 2;
 }
 
 // A layout describes how the array is placed in (1D) memory space.  This
@@ -151,6 +154,11 @@ message Layout {
   // field must be unset unless the format is DENSE.
   PaddingValue padding_value = 3;
 
+  // The maximum number of elements that can be stored for SPARSE formats.  This
+  // can be used to determine the maximum size in bytes of arrays stored in
+  // memory.  This field must be unset unless the format is SPARSE.
+  int64 max_sparse_elements = 5;
+
   // Important: if any field is added, be sure to modify ShapeUtil::Equal()
   // appropriately to account for the new field.
 }
@@ -333,7 +341,8 @@ message LiteralProto {
   // The F16s and BF16s are encoded in little endian byte order
   bytes f16s = 11;
   bytes bf16s = 13;
-  // Next = 14
+  repeated int64 sparse_indices = 14;
+  // Next = 15
 }
 
 message WindowDimension {
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-01-08 17:36:56 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-01-08 17:40:55 -0800
commit	6fda97ab7540fb003d46f7c9810d6aab6dbc6c19 (patch)
tree	e892e09a5717aeddcebfb617c4e45459054f2cba /tensorflow
parent	91332633c2703727f3e776efbb4eba567cef6de1 (diff)