aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/compiler/xla/BUILD23
-rw-r--r--tensorflow/compiler/xla/index_util.cc29
-rw-r--r--tensorflow/compiler/xla/index_util.h12
-rw-r--r--tensorflow/compiler/xla/layout_util.cc34
-rw-r--r--tensorflow/compiler/xla/layout_util.h17
-rw-r--r--tensorflow/compiler/xla/layout_util_test.cc71
-rw-r--r--tensorflow/compiler/xla/literal_util.cc36
-rw-r--r--tensorflow/compiler/xla/literal_util.h119
-rw-r--r--tensorflow/compiler/xla/literal_util_test.cc28
-rw-r--r--tensorflow/compiler/xla/service/cpu/BUILD27
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_compiler.cc2
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc48
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h42
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc72
-rw-r--r--tensorflow/compiler/xla/service/gpu/BUILD27
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_compiler.cc2
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc48
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h42
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc72
-rw-r--r--tensorflow/compiler/xla/shape_util.cc67
-rw-r--r--tensorflow/compiler/xla/shape_util.h30
-rw-r--r--tensorflow/compiler/xla/sparse_index_array.cc110
-rw-r--r--tensorflow/compiler/xla/sparse_index_array.h176
-rw-r--r--tensorflow/compiler/xla/sparse_index_array_test.cc43
-rw-r--r--tensorflow/compiler/xla/xla_data.proto11
25 files changed, 1160 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 88de17a5ff..dcbe1fe9e5 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -302,6 +302,7 @@ cc_library(
":array4d",
":shape_tree",
":shape_util",
+ ":sparse_index_array",
":status_macros",
":types",
":util",
@@ -628,6 +629,28 @@ tf_cc_test(
],
)
+cc_library(
+ name = "sparse_index_array",
+ srcs = ["sparse_index_array.cc"],
+ hdrs = ["sparse_index_array.h"],
+ deps = [
+ ":array2d",
+ ":shape_util",
+ ":xla_data_proto",
+ "//tensorflow/core:lib",
+ ],
+)
+
+tf_cc_test(
+ name = "sparse_index_array_test",
+ srcs = ["sparse_index_array_test.cc"],
+ deps = [
+ ":sparse_index_array",
+ ":test",
+ "//tensorflow/core:test_main",
+ ],
+)
+
# -----------------------------------------------------------------------------
filegroup(
diff --git a/tensorflow/compiler/xla/index_util.cc b/tensorflow/compiler/xla/index_util.cc
index 2ee23927d8..ffd1fb79e9 100644
--- a/tensorflow/compiler/xla/index_util.cc
+++ b/tensorflow/compiler/xla/index_util.cc
@@ -149,4 +149,33 @@ namespace xla {
return stride;
}
+/* static */ bool IndexUtil::IndexInBounds(
+ const Shape& shape, tensorflow::gtl::ArraySlice<int64> index) {
+ int64 rank = ShapeUtil::Rank(shape);
+ if (rank != index.size()) {
+ return false;
+ }
+ for (int64 d = 0; d < rank; ++d) {
+ if (index[d] >= shape.dimensions(d)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* static */ int IndexUtil::CompareIndices(
+ tensorflow::gtl::ArraySlice<int64> lhs,
+ tensorflow::gtl::ArraySlice<int64> rhs) {
+ int64 rank = lhs.size();
+ CHECK_EQ(rhs.size(), rank);
+ for (int64 dim = 0; dim < rank; ++dim) {
+ if (lhs[dim] < rhs[dim]) {
+ return -1;
+ } else if (lhs[dim] > rhs[dim]) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
} // namespace xla
diff --git a/tensorflow/compiler/xla/index_util.h b/tensorflow/compiler/xla/index_util.h
index c9838966a5..0b9188e852 100644
--- a/tensorflow/compiler/xla/index_util.h
+++ b/tensorflow/compiler/xla/index_util.h
@@ -69,6 +69,18 @@ class IndexUtil {
// sizeof(dimension(3)) * sizeof(dimension(2)) == 4 * 10
static int64 GetDimensionStride(const Shape& shape, int64 dimension);
+ // Returns true iff the given multi-index is contained in the bounds for the
+ // shape.
+ static bool IndexInBounds(const Shape& shape,
+ tensorflow::gtl::ArraySlice<int64> index);
+
+ // Compares the given indices in lexicographic order. lhs[0] and rhs[0] are
+ // compared first, and lhs[rank-1] and rhs[rank-1] last. If lhs is larger,
+ // then -1 is returned. If rhs is larger, then 1 is returned. Otherwise, 0 is
+ // returned.
+ static int CompareIndices(tensorflow::gtl::ArraySlice<int64> lhs,
+ tensorflow::gtl::ArraySlice<int64> rhs);
+
private:
TF_DISALLOW_COPY_AND_ASSIGN(IndexUtil);
};
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index 6435226fbe..ddf091e19f 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -64,6 +64,13 @@ void SetDefaultLayoutToContainer(
return layout;
}
+/* static */ Layout LayoutUtil::MakeSparseLayout(int64 max_sparse_elements) {
+ Layout layout;
+ layout.set_format(SPARSE);
+ layout.set_max_sparse_elements(max_sparse_elements);
+ return layout;
+}
+
namespace {
// Internal helper that creates a default layout for an array of the given rank.
@@ -234,7 +241,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
LayoutUtil::ClearLayout(program_shape->mutable_result());
}
-/* static */ bool LayoutUtil::IsDense(const Shape& shape) {
+/* static */ bool LayoutUtil::IsDenseArray(const Shape& shape) {
return ShapeUtil::IsArray(shape) && shape.has_layout() &&
IsDense(shape.layout());
}
@@ -260,7 +267,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
shape.layout().padded_dimensions_size() == 0) {
return false;
}
- CHECK(IsDense(shape));
+ CHECK(IsDenseArray(shape));
CHECK_EQ(shape.dimensions_size(), shape.layout().padded_dimensions_size());
for (int64 i = 0; i < shape.dimensions_size(); ++i) {
if (shape.layout().padded_dimensions(i) > shape.dimensions(i)) {
@@ -272,21 +279,35 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::PaddedDimensions(
const Shape& shape) {
- CHECK(IsDense(shape));
+ CHECK(IsDenseArray(shape));
return AsInt64Slice(shape.layout().padded_dimensions());
}
/* static */ int64 LayoutUtil::PaddedDimension(const Shape& shape,
int64 index) {
- CHECK(IsDense(shape));
+ CHECK(IsDenseArray(shape));
return shape.layout().padded_dimensions(index);
}
/* static */ PaddingValue LayoutUtil::GetPaddingValue(const Shape& shape) {
- CHECK(IsDense(shape));
+ CHECK(IsDenseArray(shape));
return shape.layout().padding_value();
}
+/* static */ bool LayoutUtil::IsSparseArray(const Shape& shape) {
+ return ShapeUtil::IsArray(shape) && shape.has_layout() &&
+ IsSparse(shape.layout());
+}
+
+/* static */ bool LayoutUtil::IsSparse(const Layout& layout) {
+ return layout.format() == SPARSE;
+}
+
+/* static */ int64 LayoutUtil::MaxSparseElements(const Layout& layout) {
+ CHECK(IsSparse(layout));
+ return layout.max_sparse_elements();
+}
+
/* static */ bool LayoutUtil::HasLayout(const Shape& shape) {
if (ShapeUtil::IsTuple(shape)) {
// Tuple shape: all subshapes must have a layout.
@@ -313,7 +334,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
/* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor(
const Shape& shape) {
- CHECK(IsDense(shape));
+ CHECK(IsDenseArray(shape));
return AsInt64Slice(shape.layout().minor_to_major());
}
@@ -419,6 +440,7 @@ tensorflow::Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src,
/* static */ bool LayoutUtil::AreDimensionsConsecutive(
const Layout& layout, tensorflow::gtl::ArraySlice<int64> dims) {
+ CHECK(IsDense(layout));
std::vector<int64> positions_in_layout;
for (int64 dim : dims) {
positions_in_layout.push_back(
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index f73cc95764..7c1ba4b022 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -36,6 +36,10 @@ class LayoutUtil {
// convenience function for protobuf construction.)
static Layout MakeLayout(tensorflow::gtl::ArraySlice<int64> minor_to_major);
+ // Creates a sparse layout with the given maximum number of elements. (This is
+ // a convenience function for protobuf construction.)
+ static Layout MakeSparseLayout(int64 max_sparse_elements);
+
// Returns default layout for the given shape.
static Layout GetDefaultLayoutForShape(const Shape& shape);
@@ -72,7 +76,7 @@ class LayoutUtil {
static void ClearLayout(ProgramShape* program_shape);
// Returns whether the given Shape is an array and has a dense format layout.
- static bool IsDense(const Shape& shape);
+ static bool IsDenseArray(const Shape& shape);
// Returns whether the given Layout has a dense format.
static bool IsDense(const Layout& layout);
@@ -107,6 +111,17 @@ class LayoutUtil {
// an array and has a dense layout.
static PaddingValue GetPaddingValue(const Shape& shape);
+ // Returns whether the given Shape is an array (i.e. not a tuple) and has a
+ // sparse format layout.
+ static bool IsSparseArray(const Shape& shape);
+
+ // Returns whether the given Layout has a sparse format.
+ static bool IsSparse(const Layout& layout);
+
+ // Returns the maximum number of elements that can be stored in a sparse
+ // layout.
+ static int64 MaxSparseElements(const Layout& layout);
+
// Returns whether the given shape has a layout. For tuple shapes, true is
// returned only if all elements have layouts.
static bool HasLayout(const Shape& shape);
diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index 331bb9afa9..daf4dc10ac 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc
@@ -30,6 +30,14 @@ class LayoutUtilTest : public ::testing::Test {
*shape.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major);
return shape;
}
+
+ Shape MakeShapeWithSparseLayout(PrimitiveType element_type,
+ tensorflow::gtl::ArraySlice<int64> dimensions,
+ int64 max_sparse_elements) {
+ Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
+ *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
+ return shape;
+ }
};
TEST_F(LayoutUtilTest, TupleLayoutComparison) {
@@ -81,6 +89,29 @@ TEST_F(LayoutUtilTest, CopyLayoutArray) {
EXPECT_FALSE(dst.has_layout());
}
+TEST_F(LayoutUtilTest, CopyLayoutSparse) {
+ Shape src = MakeShapeWithSparseLayout(F32, {2, 3}, 2);
+ Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
+
+ EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+ EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+ EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+
+ // Should work if destination has no layout.
+ dst.clear_layout();
+ EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+ EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+ EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+
+ // If source is cleared, then destination should be cleared.
+ src.clear_layout();
+ EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+ EXPECT_TRUE(dst.has_layout());
+ EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+ EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+ EXPECT_FALSE(dst.has_layout());
+}
+
TEST_F(LayoutUtilTest, CopyLayoutTuple) {
Shape src = ShapeUtil::MakeTupleShape(
{MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -100,6 +131,25 @@ TEST_F(LayoutUtilTest, CopyLayoutTuple) {
EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
}
+TEST_F(LayoutUtilTest, CopyLayoutTupleSparse) {
+ Shape src = ShapeUtil::MakeTupleShape(
+ {MakeShapeWithSparseLayout(F32, {2, 3}, 4),
+ MakeShapeWithSparseLayout(F32, {42, 123}, 4),
+ ShapeUtil::MakeTupleShape(
+ {MakeShapeWithLayout(F32, {}, {}),
+ MakeShapeWithSparseLayout(F32, {1, 2, 3}, 6)})});
+ Shape dst = ShapeUtil::MakeTupleShape(
+ {MakeShapeWithLayout(F32, {2, 3}, {1, 0}),
+ MakeShapeWithLayout(F32, {42, 123}, {1, 0}),
+ ShapeUtil::MakeTupleShape(
+ {MakeShapeWithLayout(F32, {}, {}),
+ MakeShapeWithLayout(F32, {1, 2, 3}, {1, 2, 0})})});
+
+ EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+ EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+ EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+}
+
TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
@@ -107,6 +157,13 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) {
EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
}
+TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleSameRank) {
+ Shape src = MakeShapeWithSparseLayout(F32, {123, 42, 7}, 6);
+ Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0});
+ ASSERT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst));
+ EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst));
+}
+
TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0});
@@ -116,6 +173,15 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) {
::testing::ContainsRegex("cannot copy layout from shape"));
}
+TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleDifferentRank) {
+ Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1});
+ Shape dst = MakeShapeWithSparseLayout(F32, {2, 3}, 4);
+ auto status = LayoutUtil::CopyLayoutBetweenShapes(src, &dst);
+ EXPECT_FALSE(status.ok());
+ EXPECT_THAT(status.error_message(),
+ ::testing::ContainsRegex("cannot copy layout from shape"));
+}
+
TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleTuple) {
Shape src =
ShapeUtil::MakeTupleShape({MakeShapeWithLayout(F32, {2, 3}, {0, 1}),
@@ -221,5 +287,10 @@ TEST_F(LayoutUtilTest, DefaultLayoutGettersMajorToMinor) {
ShapeUtil::MakeShape(F32, {10, 20, 30, 15, 25}))));
}
+TEST_F(LayoutUtilTest, SparseLayoutMaxElements) {
+ EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)),
+ 101);
+}
+
} // namespace
} // namespace xla
diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc
index cc1735e6f2..dff5c1381a 100644
--- a/tensorflow/compiler/xla/literal_util.cc
+++ b/tensorflow/compiler/xla/literal_util.cc
@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -94,9 +94,15 @@ Literal::Literal(const Shape& shape, bool allocate_arrays)
Piece& piece = pair.second;
piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
- if (ShapeUtil::IsArray(piece.subshape())) {
+ const Shape& subshape = piece.subshape();
+ if (ShapeUtil::IsArray(subshape)) {
if (allocate_arrays) {
piece.set_buffer(new char[piece.size_bytes()]);
+ if (LayoutUtil::IsSparseArray(subshape)) {
+ piece.set_sparse_indices(new SparseIndexArray(
+ LayoutUtil::MaxSparseElements(subshape.layout()),
+ ShapeUtil::Rank(subshape)));
+ }
} else {
piece.set_buffer(nullptr);
}
@@ -112,6 +118,7 @@ void Literal::DeallocateBuffers() {
Piece& piece = pair.second;
if (piece.buffer() != nullptr) {
delete[] piece.buffer();
+ delete piece.sparse_indices();
}
}
}
@@ -164,6 +171,15 @@ std::unique_ptr<Literal> Literal::CreateFromShape(const Shape& shape) {
return literal;
}
+const SparseIndexArray* Literal::sparse_indices(
+ const ShapeIndex& shape_index) const {
+ return piece(shape_index).sparse_indices();
+}
+
+SparseIndexArray* Literal::sparse_indices(const ShapeIndex& shape_index) {
+ return piece(shape_index).sparse_indices();
+}
+
/* static */ std::unique_ptr<Literal> Literal::CreateFromDimensions(
PrimitiveType primitive_type,
tensorflow::gtl::ArraySlice<int64> dimensions) {
@@ -247,9 +263,12 @@ std::vector<Literal> Literal::DecomposeTuple() {
}
Piece& src_piece = piece(src_index);
- // Move the respective buffer over to the element Literal.
+ // Move the respective buffer and sparse indices over to the element
+ // Literal.
dest_piece.set_buffer(src_piece.buffer());
src_piece.set_buffer(nullptr);
+ dest_piece.set_sparse_indices(src_piece.sparse_indices());
+ src_piece.set_sparse_indices(nullptr);
}
}
// Set this literal to be nil-shaped.
@@ -406,6 +425,8 @@ Status Literal::MoveFrom(Literal&& src_literal,
Piece& dest_piece = piece(dest_index);
delete[] dest_piece.buffer();
dest_piece.set_buffer(src_piece.buffer());
+ delete dest_piece.sparse_indices();
+ dest_piece.set_sparse_indices(src_piece.sparse_indices());
}
src_literal.shape_ = ShapeUtil::MakeNil();
@@ -764,7 +785,7 @@ std::unique_ptr<Literal> Literal::Transpose(
// dimension has within the transposed array, a layout is affine if
// MinMaj(Di) == TMinMaj(T(Di)), with TMinMaj() being the minor to major
// vector of the affine layout.
- CHECK(LayoutUtil::IsDense(permuted_shape));
+ CHECK(LayoutUtil::IsDenseArray(permuted_shape));
Layout* layout = permuted_shape.mutable_layout();
layout->clear_minor_to_major();
for (auto index : LayoutUtil::MinorToMajor(shape())) {
@@ -1573,6 +1594,12 @@ LiteralProto Literal::ToProto() const {
}
piece.WriteToProto(proto_piece);
}
+
+ if (LayoutUtil::IsSparseArray(shape())) {
+ CopyToRepeatedField(proto.mutable_sparse_indices(),
+ sparse_indices()->data());
+ }
+
return proto;
}
@@ -1653,6 +1680,7 @@ LiteralView::LiteralView(const Literal& literal, const ShapeIndex& view_root) {
}
const Piece& src_piece = literal.piece(src_index);
piece.set_buffer(src_piece.buffer());
+ piece.set_sparse_indices(src_piece.sparse_indices());
piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index));
}
}
diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h
index dc29c6359c..50e25bbdd0 100644
--- a/tensorflow/compiler/xla/literal_util.h
+++ b/tensorflow/compiler/xla/literal_util.h
@@ -36,6 +36,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/ptr_util.h"
#include "tensorflow/compiler/xla/shape_tree.h"
#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/sparse_index_array.h"
#include "tensorflow/compiler/xla/status_macros.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/compiler/xla/util.h"
@@ -103,6 +104,12 @@ class Literal {
tensorflow::gtl::MutableArraySlice<NativeT> data(
const ShapeIndex& shape_index = {});
+ // Returns a pointer to the sparse index array. Returns nullptr if the literal
+ // is not a sparse array.
+ const SparseIndexArray* sparse_indices(
+ const ShapeIndex& shape_index = {}) const;
+ SparseIndexArray* sparse_indices(const ShapeIndex& shape_index = {});
+
// Returns a pointer to (or size of) the underlying buffer holding the array
// at the given shape index. CHECKs if the subshape of the literal at the
// given ShapeIndex is not array.
@@ -160,6 +167,56 @@ class Literal {
// array.
string GetR1U8AsString() const;
+ // Creates a literal with a sparse layout and the given indices and values.
+ // The shape is initialized from the given dimensions. The minor dimension of
+ // the indices array must equal the rank of the shape (i.e. size of the
+ // dimensions array). The major dimension of the indices array must equal the
+ // number of elements in the values array. The maximum number of elements in
+ // the array is taken from the max_indices() value of the index array.
+ //
+ // XLA assumes that sparse literals are in sorted order for all operations. If
+ // the `sort` argument is true, then the indices and values will be sorted
+ // while copying them into the literal. If you have ensured that the indices
+ // and values are already sorted, then you may set the `sort` argument to
+ // false to skip the sorting step.
+ //
+ // For example:
+ //
+ // CreateSparse(
+ // {12, 12, 12},
+ // SparseIndexArray(10, 3,
+ // Array2D{
+ // {0, 1, 2},
+ // {3, 4, 5},
+ // {6, 7, 8},
+ // {9, 10, 11},
+ // }),
+ // {1.0, 2.0 3.0, 4.0})
+ //
+ // This creates an array with shape F64[12,12,12]sparse{10}, that has the
+ // following non-zero values:
+ //
+ // [0, 1, 2]: 1.0
+ // [3, 4, 5]: 2.0
+ // [6, 7, 8]: 3.0
+ // [9, 10, 11]: 4.0
+ //
+ template <typename NativeT>
+ static std::unique_ptr<Literal> CreateSparse(
+ tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices,
+ tensorflow::gtl::ArraySlice<NativeT> values, bool sort = true);
+
+ // Populates a literal with a sparse layout with the given indices and values.
+ // Each index in the indices array is CHECKed against the dimensions in the
+ // literal's shape. If sort is true, then the indices and values will be
+ // sorted. If sort is false, then the indices and values are assumed to
+ // already be in sorted order. See CreateSparse for an example of how data
+ // are populated.
+ template <typename NativeT>
+ void PopulateSparse(SparseIndexArray indices,
+ tensorflow::gtl::ArraySlice<NativeT> values,
+ bool sort = true);
+
// Creates a new Literal object with the shape specified as parameter.
// The content of the literal values is the default value of the primitive
// type of literal itself (0 for numeric types, and false for predicates).
@@ -358,7 +415,7 @@ class Literal {
const ShapeIndex& shape_index, NativeT value);
// Overloads of Get and Set for array literals. CHECKs if the literal is not
- // array-shaped.
+ // array-shaped and dense.
template <typename NativeT>
NativeT Get(tensorflow::gtl::ArraySlice<int64> multi_index) const;
template <typename NativeT>
@@ -408,6 +465,8 @@ class Literal {
// This function is useful if you want a polymorphic representation
// of the tensor's elements (turning it to a string for something
// like representation in a protobuf).
+ //
+ // This literal must have a dense layout.
void EachCellAsString(
const std::function<void(tensorflow::gtl::ArraySlice<int64> indices,
const string& value)>& per_cell) const;
@@ -447,6 +506,8 @@ class Literal {
//
// generator must be a callable of the type
// NativeT(tensorflow::gtl::ArraySlice<int64> indexes) or compatible.
+ //
+ // This literal must have a dense layout.
template <typename NativeT, typename FnType>
Status Populate(const FnType& generator);
@@ -485,10 +546,12 @@ class Literal {
// admonishments about floating-point equality checks apply. We expect you to
// use this to check for complex values that can be expressed precisely as
// float pairs e.g. (-0.5, 1.0).
+ //
+ // This literal must have a dense layout.
bool IsAllComplex(complex64 value) const;
// Returns whether this literal is zero at the specified index. This literal
- // must be an array.
+ // must be an array with a dense layout.
bool IsZero(tensorflow::gtl::ArraySlice<int64> indices) const;
// Return the count of the elements in the array at the given shape index in
@@ -563,6 +626,14 @@ class Literal {
char* buffer() const { return buffer_; }
void set_buffer(char* buffer) { buffer_ = buffer; }
+ // The array of multi-indices that provide the locations of non-zero
+ // elements in a sparse array. Only used if
+ // LayoutUtil::IsSparseArray(shape()) is true.
+ SparseIndexArray* sparse_indices() const { return sparse_indices_; }
+ void set_sparse_indices(SparseIndexArray* sparse_indices) {
+ sparse_indices_ = sparse_indices;
+ }
+
// Gets or sets the subshape of this piece. This reference points to a
// subshape within the shape in the containing Literal (Literal::shape_).
const Shape& subshape() const { return *subshape_; }
@@ -598,6 +669,9 @@ class Literal {
// For array-shaped pieces, this is the buffer holding the literal data.
char* buffer_ = nullptr;
+ // For sparse arrays, this is the array of indices.
+ SparseIndexArray* sparse_indices_ = nullptr;
+
// The shape of piece. This points into the shape of the containing Literal
// (Literal::shape_).
const Shape* subshape_ = nullptr;
@@ -837,6 +911,21 @@ template <typename NativeT>
}
template <typename NativeT>
+/* static */ std::unique_ptr<Literal> Literal::CreateSparse(
+ tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices,
+ tensorflow::gtl::ArraySlice<NativeT> values, bool sort) {
+ int64 num_elements = values.size();
+ int64 rank = dimensions.size();
+ CHECK_EQ(num_elements, indices.index_count());
+ CHECK_EQ(rank, indices.rank());
+ auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithSparseLayout(
+ primitive_util::NativeToPrimitiveType<NativeT>(), dimensions,
+ indices.max_indices()));
+ literal->PopulateSparse(indices, values, sort);
+ return literal;
+}
+
+template <typename NativeT>
/* static */ std::unique_ptr<Literal> Literal::CreateR4(
std::initializer_list<std::initializer_list<
std::initializer_list<std::initializer_list<NativeT>>>>
@@ -1044,11 +1133,35 @@ void Literal::PopulateR4FromArray4D(const Array4D<NativeT>& values) {
PopulateFromArray(values);
}
+template <typename NativeT>
+void Literal::PopulateSparse(SparseIndexArray indices,
+ tensorflow::gtl::ArraySlice<NativeT> values,
+ bool sort) {
+ CHECK(LayoutUtil::IsSparseArray(shape()));
+ int rank = ShapeUtil::Rank(shape());
+ CHECK_EQ(indices.rank(), rank);
+ int64 max_elements = LayoutUtil::MaxSparseElements(shape().layout());
+ CHECK_LE(indices.max_indices(), max_elements);
+ int64 num_elements = values.size();
+ CHECK_LE(num_elements, max_elements);
+ CHECK_EQ(num_elements, indices.index_count());
+ auto root_data = root_piece().data<NativeT>();
+ root_data.remove_suffix(max_elements - values.size());
+ std::copy(values.begin(), values.end(), root_data.begin());
+ *this->root_piece().sparse_indices() = std::move(indices);
+ if (sort) {
+ auto root_data = this->root_piece().data<NativeT>();
+ root_data.remove_suffix(root_data.size() - num_elements);
+ this->root_piece().sparse_indices()->SortWithValues(root_data);
+ }
+ DCHECK(this->root_piece().sparse_indices()->Validate(shape()));
+}
+
template <typename NativeT, typename FnType>
Status Literal::Populate(const FnType& generator) {
const Shape& this_shape = shape();
const int64 rank = ShapeUtil::Rank(this_shape);
- TF_RET_CHECK(ShapeUtil::IsArray(this_shape));
+ TF_RET_CHECK(LayoutUtil::IsDenseArray(this_shape));
TF_RET_CHECK(this_shape.element_type() ==
primitive_util::NativeToPrimitiveType<NativeT>());
tensorflow::gtl::MutableArraySlice<NativeT> literal_data = data<NativeT>();
diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc
index 4974ead048..29efb4312f 100644
--- a/tensorflow/compiler/xla/literal_util_test.cc
+++ b/tensorflow/compiler/xla/literal_util_test.cc
@@ -193,6 +193,34 @@ TEST_F(LiteralUtilTest, CreateR3FromArray3d) {
ASSERT_EQ(expected, result);
}
+TEST_F(LiteralUtilTest, CreateSparse) {
+ std::vector<int64> dimensions = {8, 8, 8};
+ Array2D<int64> indices = {
+ {3, 4, 5},
+ {1, 2, 3},
+ {2, 3, 4},
+ {3, 5, 6},
+ };
+ std::vector<int64> values = {7, 8, 9, 10};
+ auto literal = Literal::CreateSparse<int64>(
+ dimensions, SparseIndexArray(indices.n1() + 3, indices), values);
+
+ Array2D<int64> expected_indices = {
+ {1, 2, 3},
+ {2, 3, 4},
+ {3, 4, 5},
+ {3, 5, 6},
+ };
+ std::vector<int64> expected_values = {8, 9, 7, 10};
+
+ EXPECT_EQ(literal->sparse_indices()->data(),
+ tensorflow::gtl::ArraySlice<int64>(
+ expected_indices.data(), expected_indices.num_elements()));
+ EXPECT_EQ(tensorflow::gtl::ArraySlice<int64>(literal->data<int64>().data(),
+ expected_values.size()),
+ tensorflow::gtl::ArraySlice<int64>(expected_values));
+}
+
TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) {
// clang-format off
auto literal = Literal::CreateR4Projected<float>({
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 14c86e2e72..a6b3c0c7c4 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -81,6 +81,7 @@ cc_library(
":conv_canonicalization",
":cpu_copy_insertion",
":cpu_executable",
+ ":cpu_hlo_support_checker",
":cpu_instruction_fusion",
":cpu_layout_assignment",
":cpu_options",
@@ -873,6 +874,32 @@ tf_cc_test(
],
)
+cc_library(
+ name = "cpu_hlo_support_checker",
+ srcs = ["cpu_hlo_support_checker.cc"],
+ hdrs = ["cpu_hlo_support_checker.h"],
+ deps = [
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/compiler/xla:xla_data_proto",
+ "//tensorflow/compiler/xla/service:hlo_pass",
+ "//tensorflow/core:lib",
+ ],
+)
+
+tf_cc_test(
+ name = "cpu_hlo_support_checker_test",
+ srcs = ["cpu_hlo_support_checker_test.cc"],
+ deps = [
+ ":cpu_hlo_support_checker",
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/compiler/xla:test",
+ "//tensorflow/compiler/xla/tests:hlo_test_base",
+ "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ ],
+)
+
# -----------------------------------------------------------------------------
filegroup(
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 8e6562c237..42fd4f100b 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -50,6 +50,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h"
#include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h"
#include "tensorflow/compiler/xla/service/cpu/cpu_executable.h"
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
#include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h"
#include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h"
#include "tensorflow/compiler/xla/service/cpu/cpu_options.h"
@@ -258,6 +259,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) {
// Optimization pipeline.
HloPassPipeline pipeline("CPU");
pipeline.AddInvariantChecker<HloVerifier>(ShapeSizeBytesFunction());
+ pipeline.AddPass<CpuHloSupportChecker>();
ReducePrecisionInsertion::AddPasses(
&pipeline, module->config().debug_options(),
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
new file mode 100644
index 0000000000..7bd4741a04
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+StatusOr<bool> CpuHloSupportChecker::Run(HloModule* module) {
+ for (auto* computation : module->computations()) {
+ for (const auto& instruction : computation->instructions()) {
+ TF_RETURN_IF_ERROR(
+ ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+ TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
+ instruction->shape(),
+ [&instruction](const Shape& subshape, const ShapeIndex&) {
+ if (LayoutUtil::IsSparseArray(subshape)) {
+ return xla::Unimplemented(
+ "CPU backend does not support HLO instruction %s with shape "
+ "containing a sparse layout: %s",
+ instruction->ToString().c_str(),
+ ShapeUtil::HumanStringWithLayout(instruction->shape())
+ .c_str());
+ }
+ return Status::OK();
+ }));
+ }
+ }
+ return false;
+}
+
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
new file mode 100644
index 0000000000..2271af7b24
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// This pass should run early in the HLO pipeline and checks for HLO constructs
+// which are not supported by the CPU backend and cannot be removed via HLO
+// transformations (eg, sparse layouts).
+class CpuHloSupportChecker : public HloPassInterface {
+ public:
+ CpuHloSupportChecker() = default;
+ ~CpuHloSupportChecker() override = default;
+
+ tensorflow::StringPiece name() const override {
+ return "cpu_hlo_support_checker";
+ }
+
+ // Note: always returns false (no instructions are ever modified by this
+ // pass).
+ StatusOr<bool> Run(HloModule* module) override;
+};
+
+} // namespace xla
+
+#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
new file mode 100644
index 0000000000..0f463e6de6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+class CpuHloSupportCheckerTest : public HloTestBase {
+ protected:
+ CpuHloSupportChecker& checker() { return checker_; }
+
+ private:
+ CpuHloSupportChecker checker_;
+};
+
+TEST_F(CpuHloSupportCheckerTest, Add) {
+ HloComputation::Builder builder(TestName());
+ const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+ HloInstruction* param0 = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, scalar_shape, "param0"));
+ HloInstruction* param1 = builder.AddInstruction(
+ HloInstruction::CreateParameter(1, scalar_shape, "param1"));
+ builder.AddInstruction(HloInstruction::CreateBinary(
+ scalar_shape, HloOpcode::kAdd, param0, param1));
+ auto module = CreateNewModule();
+ module->AddEntryComputation(builder.Build());
+
+ TF_ASSERT_OK(checker().Run(module.get()).status());
+}
+
+TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) {
+ HloComputation::Builder builder(TestName());
+ const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
+ HloInstruction* param0 = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, sparse_shape, "param0"));
+ HloInstruction* param1 = builder.AddInstruction(
+ HloInstruction::CreateParameter(1, sparse_shape, "param1"));
+ builder.AddInstruction(HloInstruction::CreateBinary(
+ sparse_shape, HloOpcode::kAdd, param0, param1));
+ auto module = CreateNewModule();
+ module->AddEntryComputation(builder.Build());
+
+ Status status = checker().Run(module.get()).status();
+ ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
+ EXPECT_THAT(status.error_message(),
+ HasSubstr("CPU backend does not support"));
+ EXPECT_THAT(status.error_message(),
+ HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
+}
+
+} // namespace
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index a86d3583a6..69ccc7179f 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -437,6 +437,7 @@ cc_library(
":fusion_merger",
":gpu_copy_insertion",
":gpu_executable",
+ ":gpu_hlo_support_checker",
":gpu_layout_assignment",
":hlo_schedule",
":instruction_fusion",
@@ -610,6 +611,32 @@ tf_cc_test(
],
)
+cc_library(
+ name = "gpu_hlo_support_checker",
+ srcs = ["gpu_hlo_support_checker.cc"],
+ hdrs = ["gpu_hlo_support_checker.h"],
+ deps = [
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/compiler/xla:xla_data_proto",
+ "//tensorflow/compiler/xla/service:hlo_pass",
+ "//tensorflow/core:lib",
+ ],
+)
+
+tf_cc_test(
+ name = "gpu_hlo_support_checker_test",
+ srcs = ["gpu_hlo_support_checker_test.cc"],
+ deps = [
+ ":gpu_hlo_support_checker",
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/compiler/xla:test",
+ "//tensorflow/compiler/xla/tests:hlo_test_base",
+ "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ ],
+)
+
# -----------------------------------------------------------------------------
filegroup(
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 9f34866ff5..93db9ebbee 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -39,6 +39,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
#include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h"
#include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h"
@@ -137,6 +138,7 @@ tensorflow::Status OptimizeHloModule(
{
HloPassPipeline pipeline("optimization");
pipeline.AddInvariantChecker<HloVerifier>(shape_size_function);
+ pipeline.AddPass<GpuHloSupportChecker>();
ReducePrecisionInsertion::AddPasses(
&pipeline, hlo_module->config().debug_options(),
ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
new file mode 100644
index 0000000000..4944c41f7d
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc
@@ -0,0 +1,48 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace xla {
+
+StatusOr<bool> GpuHloSupportChecker::Run(HloModule* module) {
+ for (auto* computation : module->computations()) {
+ for (const auto& instruction : computation->instructions()) {
+ TF_RETURN_IF_ERROR(
+ ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape()));
+ TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
+ instruction->shape(),
+ [&instruction](const Shape& subshape, const ShapeIndex&) {
+ if (LayoutUtil::IsSparseArray(subshape)) {
+ return xla::Unimplemented(
+ "GPU backend does not support HLO instruction %s with shape "
+ "containing a sparse layout: %s",
+ instruction->ToString().c_str(),
+ ShapeUtil::HumanStringWithLayout(instruction->shape())
+ .c_str());
+ }
+ return Status::OK();
+ }));
+ }
+ }
+ return false;
+}
+
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
new file mode 100644
index 0000000000..d9550f81b5
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
+
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+
+// his pass should run early in the HLO pipeline and checks for HLO constructs
+// which are not supported by the GPU backend and cannot be removed via HLO
+// transformations (eg, sparse layouts).
+class GpuHloSupportChecker : public HloPassInterface {
+ public:
+ GpuHloSupportChecker() = default;
+ ~GpuHloSupportChecker() override = default;
+
+ tensorflow::StringPiece name() const override {
+ return "gpu_hlo_support_checker";
+ }
+
+ // Note: always returns false (no instructions are ever modified by this
+ // pass).
+ StatusOr<bool> Run(HloModule* module) override;
+};
+
+} // namespace xla
+
+#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
new file mode 100644
index 0000000000..0a4089df4c
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace xla {
+namespace {
+
+using ::testing::HasSubstr;
+
+class GpuHloSupportCheckerTest : public HloTestBase {
+ protected:
+ GpuHloSupportChecker& checker() { return checker_; }
+
+ private:
+ GpuHloSupportChecker checker_;
+};
+
+TEST_F(GpuHloSupportCheckerTest, Add) {
+ HloComputation::Builder builder(TestName());
+ const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+ HloInstruction* param0 = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, scalar_shape, "param0"));
+ HloInstruction* param1 = builder.AddInstruction(
+ HloInstruction::CreateParameter(1, scalar_shape, "param1"));
+ builder.AddInstruction(HloInstruction::CreateBinary(
+ scalar_shape, HloOpcode::kAdd, param0, param1));
+ auto module = CreateNewModule();
+ module->AddEntryComputation(builder.Build());
+
+ TF_ASSERT_OK(checker().Run(module.get()).status());
+}
+
+TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) {
+ HloComputation::Builder builder(TestName());
+ const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2);
+ HloInstruction* param0 = builder.AddInstruction(
+ HloInstruction::CreateParameter(0, sparse_shape, "param0"));
+ HloInstruction* param1 = builder.AddInstruction(
+ HloInstruction::CreateParameter(1, sparse_shape, "param1"));
+ builder.AddInstruction(HloInstruction::CreateBinary(
+ sparse_shape, HloOpcode::kAdd, param0, param1));
+ auto module = CreateNewModule();
+ module->AddEntryComputation(builder.Build());
+
+ Status status = checker().Run(module.get()).status();
+ ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED);
+ EXPECT_THAT(status.error_message(),
+ HasSubstr("GPU backend does not support"));
+ EXPECT_THAT(status.error_message(),
+ HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape)));
+}
+
+} // namespace
+} // namespace xla
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 3d4080e353..290ea9b496 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -84,7 +84,7 @@ bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) {
if (lhs.layout().format() != rhs.layout().format()) {
return false;
}
- if (LayoutUtil::IsDense(lhs)) {
+ if (LayoutUtil::IsDenseArray(lhs)) {
if (!ContainersEqual(LayoutUtil::MinorToMajor(lhs),
LayoutUtil::MinorToMajor(rhs))) {
VLOG(3) << "CompareShapes: lhs layout != rhs layout";
@@ -202,6 +202,17 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
return MakeShapeWithLayout(element_type, dimensions, layout);
}
+/* static */ Shape ShapeUtil::MakeShapeWithSparseLayout(
+ PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
+ int64 max_sparse_elements) {
+ DCHECK_NE(TUPLE, element_type);
+ DCHECK_NE(OPAQUE, element_type);
+ Shape shape = ShapeUtil::MakeShape(element_type, dimensions);
+ *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements);
+ TF_DCHECK_OK(ShapeUtil::ValidateShape(shape));
+ return shape;
+}
+
/* static */ Shape
ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
const Shape& shape) {
@@ -249,7 +260,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
}
/* static */ void ShapeUtil::AppendMajorDimension(int bound, Shape* shape) {
- CHECK(LayoutUtil::IsDense(*shape));
+ CHECK(LayoutUtil::IsDenseArray(*shape));
shape->mutable_layout()->add_minor_to_major(Rank(*shape));
shape->add_dimensions(bound);
TF_DCHECK_OK(ValidateShape(*shape));
@@ -658,23 +669,55 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
TF_DCHECK_OK(ValidateShape(shape));
DCHECK_NE(OPAQUE, shape.element_type());
if (shape.element_type() == TUPLE) {
- CHECK_GT(pointer_size, 0);
- return pointer_size * shape.tuple_shapes_size();
+ return ByteSizeOfTupleIndexTable(shape, pointer_size);
+ }
+ int64 byte_size = ByteSizeOfElements(shape);
+ if (LayoutUtil::IsSparseArray(shape)) {
+ byte_size += ByteSizeOfSparseIndices(shape);
}
+ return byte_size;
+}
+
+/* static */ int64 ShapeUtil::ByteSizeOfTupleIndexTable(const Shape& shape,
+ int64 pointer_size) {
+ TF_DCHECK_OK(ValidateShape(shape));
+ DCHECK_EQ(TUPLE, shape.element_type());
+ CHECK_GT(pointer_size, 0);
+ return pointer_size * shape.tuple_shapes_size();
+}
+
+/* static */ int64 ShapeUtil::ByteSizeOfElements(const Shape& shape) {
+ TF_DCHECK_OK(ValidateShape(shape));
+ DCHECK(ShapeUtil::IsArray(shape));
int64 allocated_element_count;
- if (shape.layout().padded_dimensions_size() > 0) {
- CHECK_EQ(Rank(shape), shape.layout().padded_dimensions_size());
- allocated_element_count = 1;
- for (int64 dimension_size : shape.layout().padded_dimensions()) {
- allocated_element_count *= dimension_size;
- }
+
+ if (LayoutUtil::IsSparseArray(shape)) {
+ allocated_element_count = LayoutUtil::MaxSparseElements(shape.layout());
} else {
- allocated_element_count = ElementsIn(shape);
+ CHECK(LayoutUtil::IsDenseArray(shape));
+ tensorflow::gtl::ArraySlice<int64> padded_dimensions =
+ LayoutUtil::PaddedDimensions(shape);
+ if (!padded_dimensions.empty()) {
+ CHECK_EQ(Rank(shape), padded_dimensions.size());
+ allocated_element_count = 1;
+ for (int64 dimension_size : padded_dimensions) {
+ allocated_element_count *= dimension_size;
+ }
+ } else {
+ allocated_element_count = ElementsIn(shape);
+ }
}
return allocated_element_count *
ByteSizeOfPrimitiveType(shape.element_type());
}
+/* static */ int64 ShapeUtil::ByteSizeOfSparseIndices(const Shape& shape) {
+ TF_DCHECK_OK(ValidateShape(shape));
+ DCHECK(LayoutUtil::IsSparseArray(shape));
+ return LayoutUtil::MaxSparseElements(shape.layout()) *
+ ShapeUtil::Rank(shape) * sizeof(int64);
+}
+
/* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal(
const Shape& shape) {
if (shape.element_type() == TUPLE) {
@@ -900,7 +943,7 @@ Status ForEachMutableSubshapeHelper(
new_shape.add_dimensions(dim);
}
if (shape.has_layout()) {
- CHECK(LayoutUtil::IsDense(shape));
+ CHECK(LayoutUtil::IsDenseArray(shape));
Layout* new_layout = new_shape.mutable_layout();
new_layout->set_format(DENSE);
new_layout->clear_minor_to_major();
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 59bdffee5a..453d4ec047 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -143,7 +143,10 @@ std::ostream& operator<<(std::ostream& out, const ShapeIndexView& shape_index);
class ShapeUtil {
public:
// Returns the number of elements are contained within the provided shape;
- // e.g. for rank 0 (scalars) the result is always 1.
+ // e.g. for rank 0 (scalars) the result is always 1. Note that sparse shapes
+ // may not actually be able to store this number of elements. See
+ // LayoutUtil::MaxSparseElements(shape) to obtain the maximum number of
+ // elements that can be stored in a sparse shape.
// Precondition: !IsTuple(shape)
static int64 ElementsIn(const Shape& shape);
@@ -164,6 +167,27 @@ class ShapeUtil {
// Precondition: !ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)
static int64 ByteSizeOfPrimitiveType(PrimitiveType primitive_type);
+ // Returns the number of bytes required to store the tuple member pointers for
+ // a allocation of shape. The `shape` must be a TUPLE shape, and
+ // `pointer_size` must be larger than zero.
+ static int64 ByteSizeOfTupleIndexTable(const Shape& shape,
+ int64 pointer_size);
+
+ // Returns the number of bytes required for the elements in an allocation of
+ // `shape`, which must be an array shape. The return value does not include
+ // the bytes needed to store sparse indices. Dense shapes use a separate
+ // memory location for each element, and so for these shapes,
+ // `ByteSizeOf(shape) == ByteSizeOfElements(shape)`. For dense shapes, this
+ // size also includes padding if present in the layout. For sparse shapes,
+ // `ByteSizeOf(shape) == ByteSizeOfElements(shape) +
+ // ByteSizeOfSparseindices(shape)`.
+ static int64 ByteSizeOfElements(const Shape& shape);
+
+ // Returns the number of bytes required for the sparse indices in an
+ // allocation of shape. The shape must be an array shape. The return value
+ // does not include the bytes needed to store sparse indices.
+ static int64 ByteSizeOfSparseIndices(const Shape& shape);
+
// Returns a human-readable string that represents the given shape, with or
// without layout. e.g. "f32[42x12] {0, 1}" or "f32[64]".
static string HumanString(const Shape& shape);
@@ -269,6 +293,10 @@ class ShapeUtil {
PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
tensorflow::gtl::ArraySlice<int64> minor_to_major);
+ static Shape MakeShapeWithSparseLayout(
+ PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions,
+ int64 max_sparse_elements);
+
// Constructs a new shape with major-first layout (i.e. {n, n-1, ..., 0}).
static Shape MakeShapeWithDescendingLayout(
PrimitiveType element_type,
diff --git a/tensorflow/compiler/xla/sparse_index_array.cc b/tensorflow/compiler/xla/sparse_index_array.cc
new file mode 100644
index 0000000000..e7738e6790
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array.cc
@@ -0,0 +1,110 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/sparse_index_array.h"
+
+#include "tensorflow/compiler/xla/index_util.h"
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+
+namespace xla {
+
+SparseIndexArray::SparseIndexArray() : rank_(0), max_indices_(0) {}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
+ std::vector<int64> indices)
+ : indices_(std::move(indices)), rank_(rank), max_indices_(max_indices) {
+ CHECK_GT(rank_, 0);
+ CHECK_EQ(indices_.size() % rank_, 0)
+ << "indices_.size(): " << indices_.size() << ", rank_: " << rank_;
+ CHECK_LT(index_count(), max_indices_);
+}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank,
+ tensorflow::gtl::ArraySlice<int64> indices)
+ : SparseIndexArray(max_indices, rank,
+ std::vector<int64>(indices.begin(), indices.end())) {}
+
+SparseIndexArray::SparseIndexArray(int64 max_indices,
+ const Array2D<int64>& indices)
+ : SparseIndexArray(max_indices, indices.n2(),
+ std::vector<int64>(indices.begin(), indices.end())) {}
+
+int64 SparseIndexArray::index_count() const {
+ CHECK_GT(rank_, 0);
+ CHECK_EQ(indices_.size() % rank_, 0);
+ return indices_.size() / rank_;
+}
+
+tensorflow::gtl::ArraySlice<int64> SparseIndexArray::At(
+ int64 sparse_index_number) const {
+ CHECK_GT(rank_, 0);
+ CHECK_GE(sparse_index_number, 0);
+ CHECK_LE(rank_ * sparse_index_number + rank_, indices_.size());
+ return tensorflow::gtl::ArraySlice<int64>(
+ indices_.data() + rank_ * sparse_index_number, rank_);
+}
+
+tensorflow::gtl::MutableArraySlice<int64> SparseIndexArray::At(
+ int64 sparse_index_number) {
+ CHECK_GT(rank_, 0);
+ CHECK_GE(sparse_index_number, 0);
+ CHECK_LE(rank_ * sparse_index_number + rank_, indices_.size());
+ return tensorflow::gtl::MutableArraySlice<int64>(
+ indices_.data() + rank_ * sparse_index_number, rank_);
+}
+
+void SparseIndexArray::Append(tensorflow::gtl::ArraySlice<int64> index) {
+ CHECK_GT(rank_, 0);
+ CHECK_EQ(index.size(), rank_);
+ indices_.insert(indices_.end(), index.begin(), index.end());
+}
+
+void SparseIndexArray::Clear() { indices_.clear(); }
+
+void SparseIndexArray::Resize(int64 num_indices) {
+ CHECK_GT(rank_, 0);
+ indices_.resize(rank_ * num_indices);
+}
+
+bool SparseIndexArray::Validate(const Shape& shape) const {
+ if (rank_ == 0 || rank_ != ShapeUtil::Rank(shape)) {
+ return false;
+ }
+ int64 num_indices = index_count();
+ if (num_indices > LayoutUtil::MaxSparseElements(shape.layout())) {
+ return false;
+ }
+ if (num_indices < 2) {
+ return true;
+ }
+ tensorflow::gtl::ArraySlice<int64> last = At(0);
+ if (!IndexUtil::IndexInBounds(shape, last)) {
+ return false;
+ }
+ for (int64 n = 1; n < num_indices; ++n) {
+ tensorflow::gtl::ArraySlice<int64> next = At(n);
+ if (!IndexUtil::IndexInBounds(shape, next)) {
+ return false;
+ }
+ if (IndexUtil::CompareIndices(last, next) >= 0) {
+ return false;
+ }
+ last = next;
+ }
+ return true;
+}
+
+} // namespace xla
diff --git a/tensorflow/compiler/xla/sparse_index_array.h b/tensorflow/compiler/xla/sparse_index_array.h
new file mode 100644
index 0000000000..f67f34760e
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array.h
@@ -0,0 +1,176 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Utility class for managing sparse array indices.
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/index_util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+
+namespace xla {
+
+// Encapsulates the array of indices for a sparse array. A SparseIndexArray
+// contain indices for up to `max_indices` elements of a sparse array. Each
+// sparse index is an array of `rank` int64 value that gives the location of a
+// value within a sparse array. Note that the dimensions of the array are not
+// checked (except for the rank). To avoid confusion, we refer to the position
+// of an index within a SparseIndexArray as a sparse index number.
+class SparseIndexArray {
+ public:
+ SparseIndexArray();
+ SparseIndexArray(const SparseIndexArray&) = default;
+ SparseIndexArray(SparseIndexArray&&) = default;
+ SparseIndexArray& operator=(const SparseIndexArray&) = default;
+ SparseIndexArray& operator=(SparseIndexArray&&) = default;
+
+ // Constructs a SparseIndexArray that can hold up to `max_indices` sparse
+ // indices, with an initial contents obtained from the given array. The rank
+ // is taken from the minor dimension of the array. The major dimension of the
+ // array must not exceed `max_indices`.
+ SparseIndexArray(int64 max_indices, const Array2D<int64>& indices);
+
+ // Like above, but the array is flattened. For example, the following are
+ // equivalent:
+ //
+ // SparseIndexArray(10, 3,
+ // Array2D{
+ // {0, 1, 2},
+ // {3, 4, 5},
+ // {6, 7, 8},
+ // {9, 10, 11},
+ // })
+ //
+ // SparseIndexArray(10, 3,
+ // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11})
+ //
+ SparseIndexArray(int64 max_indices, int64 rank,
+ std::vector<int64> indices = {});
+ SparseIndexArray(int64 max_indices, int64 rank,
+ tensorflow::gtl::ArraySlice<int64> indices);
+
+ // Returns the number of elements represented by the indices stored in the
+ // array.
+ int64 index_count() const;
+
+ // Returns a slice that refers to the given sparse index number. The argument
+ // must be in the range [0, element_count()).
+ tensorflow::gtl::ArraySlice<int64> At(int64 sparse_index_number) const;
+ tensorflow::gtl::MutableArraySlice<int64> At(int64 sparse_index_number);
+
+ // Adds the given index at the end of the array. The new size of the
+ // SparseIndexArray must not exceed `max_indices`.
+ void Append(tensorflow::gtl::ArraySlice<int64> index);
+
+ // Removes all indices from the array.
+ void Clear();
+
+ // Resizes the array to contain the given number of sparse indices. The new
+ // size must be smaller than `max_indices`. If the new size is larger than
+ // the old size, the value of the new indices is not specified.
+ void Resize(int64 num_indices);
+
+ // Returns true iff all indices are unique and occur in sorted order, and are
+ // valid for the given shape.
+ bool Validate(const Shape& shape) const;
+
+ int64 rank() const { return rank_; }
+ int64 max_indices() const { return max_indices_; }
+
+ // Returns a pointer to the int64 array that holds the sparse indices.
+ tensorflow::gtl::MutableArraySlice<int64> mutable_data() { return &indices_; }
+ tensorflow::gtl::ArraySlice<int64> data() const { return indices_; }
+
+ // Sorts this sparse index array along with the set of corresponding values.
+ // The indices and values are sorted in the lexicographic order of the
+ // indices, from smallest to largest.
+ //
+ // For example:
+ //
+ // std::vector<float> v{10.0, 11.0, 12.0};
+ // SparseIndexArray a(10, 3,
+ // {{3, 4, 5},
+ // {1, 2, 3},
+ // {2, 3, 4}});
+ // a.SortWithValues(&v);
+ // // Prints "11.0, 12.0, 10.0":
+ // std::cout << v[0] << ", " << v[1] << ", " << v[2] << std::endl;
+ //
+ template <typename NativeT>
+ void SortWithValues(tensorflow::gtl::MutableArraySlice<NativeT> values);
+
+ private:
+ std::vector<int64> indices_;
+ int64 rank_;
+ int64 max_indices_;
+};
+
+template <typename NativeT>
+void SparseIndexArray::SortWithValues(
+ tensorflow::gtl::MutableArraySlice<NativeT> values) {
+ int64 num_elements = index_count();
+ CHECK_EQ(values.size(), num_elements);
+ std::vector<int64> sort_order;
+ sort_order.reserve(num_elements);
+ for (int64 i = 0; i < num_elements; ++i) {
+ sort_order.push_back(i);
+ }
+ auto sort_order_less = [this](int64 lhs, int64 rhs) {
+ return IndexUtil::CompareIndices(At(lhs), At(rhs)) < 0;
+ };
+ std::sort(sort_order.begin(), sort_order.end(), sort_order_less);
+
+ // Reorder the array elements according to sort_order. Work through the array
+ // and follow cycles so we can do the reorder in-place.
+ tensorflow::gtl::InlinedVector<int64, 8> saved_index(rank());
+ for (int64 i = 0; i < num_elements; ++i) {
+ // sort_order[i] == -1 indicates the element has already been copied.
+ if (sort_order[i] < 0) {
+ continue;
+ } else if (i == sort_order[i]) {
+ // The element is already in sorted order.
+ sort_order[i] = -1;
+ continue;
+ }
+
+ std::copy_n(At(i).begin(), rank(), saved_index.begin());
+ NativeT saved_value = values[i];
+ int64 j = i;
+ for (;;) {
+ if (sort_order[j] == i) {
+ std::copy_n(saved_index.begin(), rank(), At(j).begin());
+ values[j] = saved_value;
+ sort_order[j] = -1;
+ break;
+ }
+
+ std::copy_n(At(sort_order[j]).begin(), rank(), At(j).begin());
+ values[j] = values[sort_order[j]];
+
+ int64 k = sort_order[j];
+ sort_order[j] = -1;
+ j = k;
+ }
+ }
+}
+
+} // namespace xla
+
+#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_
diff --git a/tensorflow/compiler/xla/sparse_index_array_test.cc b/tensorflow/compiler/xla/sparse_index_array_test.cc
new file mode 100644
index 0000000000..7377f88958
--- /dev/null
+++ b/tensorflow/compiler/xla/sparse_index_array_test.cc
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/sparse_index_array.h"
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/test.h"
+
+namespace xla {
+namespace {
+
+TEST(SparseIndexArrayTest, Sort) {
+ SparseIndexArray a(10, 3);
+ a.Append({2, 3, 4});
+ a.Append({3, 4, 5});
+ a.Append({1, 2, 3});
+ a.Append({5, 6, 7});
+ a.Append({4, 5, 6});
+ a.Append({6, 7, 8});
+ std::vector<double> values = {
+ 12.0, 13.0, 11.0, 15.0, 14.0, 16.0,
+ };
+ a.SortWithValues<double>(&values);
+ ASSERT_EQ(a.data(), std::vector<int64>({1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6, 5,
+ 6, 7, 6, 7, 8}));
+ ASSERT_EQ(values, std::vector<double>({11.0, 12.0, 13.0, 14.0, 15.0, 16.0}));
+}
+
+} // namespace
+} // namespace xla
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index e34f138b6e..3aea021753 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -120,6 +120,9 @@ enum Format {
// The default layout, with exactly one storage location per element (ignoring
// padding).
DENSE = 1;
+ // A sparsely encoded layout, providing only the index/value pairs of non-zero
+ // elements.
+ SPARSE = 2;
}
// A layout describes how the array is placed in (1D) memory space. This
@@ -151,6 +154,11 @@ message Layout {
// field must be unset unless the format is DENSE.
PaddingValue padding_value = 3;
+ // The maximum number of elements that can be stored for SPARSE formats. This
+ // can be used to determine the maximum size in bytes of arrays stored in
+ // memory. This field must be unset unless the format is SPARSE.
+ int64 max_sparse_elements = 5;
+
// Important: if any field is added, be sure to modify ShapeUtil::Equal()
// appropriately to account for the new field.
}
@@ -333,7 +341,8 @@ message LiteralProto {
// The F16s and BF16s are encoded in little endian byte order
bytes f16s = 11;
bytes bf16s = 13;
- // Next = 14
+ repeated int64 sparse_indices = 14;
+ // Next = 15
}
message WindowDimension {