diff options
25 files changed, 1160 insertions, 28 deletions
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index 88de17a5ff..dcbe1fe9e5 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -302,6 +302,7 @@ cc_library( ":array4d", ":shape_tree", ":shape_util", + ":sparse_index_array", ":status_macros", ":types", ":util", @@ -628,6 +629,28 @@ tf_cc_test( ], ) +cc_library( + name = "sparse_index_array", + srcs = ["sparse_index_array.cc"], + hdrs = ["sparse_index_array.h"], + deps = [ + ":array2d", + ":shape_util", + ":xla_data_proto", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "sparse_index_array_test", + srcs = ["sparse_index_array_test.cc"], + deps = [ + ":sparse_index_array", + ":test", + "//tensorflow/core:test_main", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/index_util.cc b/tensorflow/compiler/xla/index_util.cc index 2ee23927d8..ffd1fb79e9 100644 --- a/tensorflow/compiler/xla/index_util.cc +++ b/tensorflow/compiler/xla/index_util.cc @@ -149,4 +149,33 @@ namespace xla { return stride; } +/* static */ bool IndexUtil::IndexInBounds( + const Shape& shape, tensorflow::gtl::ArraySlice<int64> index) { + int64 rank = ShapeUtil::Rank(shape); + if (rank != index.size()) { + return false; + } + for (int64 d = 0; d < rank; ++d) { + if (index[d] >= shape.dimensions(d)) { + return false; + } + } + return true; +} + +/* static */ int IndexUtil::CompareIndices( + tensorflow::gtl::ArraySlice<int64> lhs, + tensorflow::gtl::ArraySlice<int64> rhs) { + int64 rank = lhs.size(); + CHECK_EQ(rhs.size(), rank); + for (int64 dim = 0; dim < rank; ++dim) { + if (lhs[dim] < rhs[dim]) { + return -1; + } else if (lhs[dim] > rhs[dim]) { + return 1; + } + } + return 0; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/index_util.h b/tensorflow/compiler/xla/index_util.h index c9838966a5..0b9188e852 100644 --- a/tensorflow/compiler/xla/index_util.h +++ b/tensorflow/compiler/xla/index_util.h @@ -69,6 +69,18 @@ class IndexUtil { // sizeof(dimension(3)) * sizeof(dimension(2)) == 4 * 10 static int64 GetDimensionStride(const Shape& shape, int64 dimension); + // Returns true iff the given multi-index is contained in the bounds for the + // shape. + static bool IndexInBounds(const Shape& shape, + tensorflow::gtl::ArraySlice<int64> index); + + // Compares the given indices in lexicographic order. lhs[0] and rhs[0] are + // compared first, and lhs[rank-1] and rhs[rank-1] last. If lhs is larger, + // then -1 is returned. If rhs is larger, then 1 is returned. Otherwise, 0 is + // returned. + static int CompareIndices(tensorflow::gtl::ArraySlice<int64> lhs, + tensorflow::gtl::ArraySlice<int64> rhs); + private: TF_DISALLOW_COPY_AND_ASSIGN(IndexUtil); }; diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index 6435226fbe..ddf091e19f 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -64,6 +64,13 @@ void SetDefaultLayoutToContainer( return layout; } +/* static */ Layout LayoutUtil::MakeSparseLayout(int64 max_sparse_elements) { + Layout layout; + layout.set_format(SPARSE); + layout.set_max_sparse_elements(max_sparse_elements); + return layout; +} + namespace { // Internal helper that creates a default layout for an array of the given rank. @@ -234,7 +241,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) { LayoutUtil::ClearLayout(program_shape->mutable_result()); } -/* static */ bool LayoutUtil::IsDense(const Shape& shape) { +/* static */ bool LayoutUtil::IsDenseArray(const Shape& shape) { return ShapeUtil::IsArray(shape) && shape.has_layout() && IsDense(shape.layout()); } @@ -260,7 +267,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) { shape.layout().padded_dimensions_size() == 0) { return false; } - CHECK(IsDense(shape)); + CHECK(IsDenseArray(shape)); CHECK_EQ(shape.dimensions_size(), shape.layout().padded_dimensions_size()); for (int64 i = 0; i < shape.dimensions_size(); ++i) { if (shape.layout().padded_dimensions(i) > shape.dimensions(i)) { @@ -272,21 +279,35 @@ Layout CreateDefaultLayoutForRank(int64 rank) { /* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::PaddedDimensions( const Shape& shape) { - CHECK(IsDense(shape)); + CHECK(IsDenseArray(shape)); return AsInt64Slice(shape.layout().padded_dimensions()); } /* static */ int64 LayoutUtil::PaddedDimension(const Shape& shape, int64 index) { - CHECK(IsDense(shape)); + CHECK(IsDenseArray(shape)); return shape.layout().padded_dimensions(index); } /* static */ PaddingValue LayoutUtil::GetPaddingValue(const Shape& shape) { - CHECK(IsDense(shape)); + CHECK(IsDenseArray(shape)); return shape.layout().padding_value(); } +/* static */ bool LayoutUtil::IsSparseArray(const Shape& shape) { + return ShapeUtil::IsArray(shape) && shape.has_layout() && + IsSparse(shape.layout()); +} + +/* static */ bool LayoutUtil::IsSparse(const Layout& layout) { + return layout.format() == SPARSE; +} + +/* static */ int64 LayoutUtil::MaxSparseElements(const Layout& layout) { + CHECK(IsSparse(layout)); + return layout.max_sparse_elements(); +} + /* static */ bool LayoutUtil::HasLayout(const Shape& shape) { if (ShapeUtil::IsTuple(shape)) { // Tuple shape: all subshapes must have a layout. @@ -313,7 +334,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) { /* static */ tensorflow::gtl::ArraySlice<int64> LayoutUtil::MinorToMajor( const Shape& shape) { - CHECK(IsDense(shape)); + CHECK(IsDenseArray(shape)); return AsInt64Slice(shape.layout().minor_to_major()); } @@ -419,6 +440,7 @@ tensorflow::Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src, /* static */ bool LayoutUtil::AreDimensionsConsecutive( const Layout& layout, tensorflow::gtl::ArraySlice<int64> dims) { + CHECK(IsDense(layout)); std::vector<int64> positions_in_layout; for (int64 dim : dims) { positions_in_layout.push_back( diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h index f73cc95764..7c1ba4b022 100644 --- a/tensorflow/compiler/xla/layout_util.h +++ b/tensorflow/compiler/xla/layout_util.h @@ -36,6 +36,10 @@ class LayoutUtil { // convenience function for protobuf construction.) static Layout MakeLayout(tensorflow::gtl::ArraySlice<int64> minor_to_major); + // Creates a sparse layout with the given maximum number of elements. (This is + // a convenience function for protobuf construction.) + static Layout MakeSparseLayout(int64 max_sparse_elements); + // Returns default layout for the given shape. static Layout GetDefaultLayoutForShape(const Shape& shape); @@ -72,7 +76,7 @@ class LayoutUtil { static void ClearLayout(ProgramShape* program_shape); // Returns whether the given Shape is an array and has a dense format layout. - static bool IsDense(const Shape& shape); + static bool IsDenseArray(const Shape& shape); // Returns whether the given Layout has a dense format. static bool IsDense(const Layout& layout); @@ -107,6 +111,17 @@ class LayoutUtil { // an array and has a dense layout. static PaddingValue GetPaddingValue(const Shape& shape); + // Returns whether the given Shape is an array (i.e. not a tuple) and has a + // sparse format layout. + static bool IsSparseArray(const Shape& shape); + + // Returns whether the given Layout has a sparse format. + static bool IsSparse(const Layout& layout); + + // Returns the maximum number of elements that can be stored in a sparse + // layout. + static int64 MaxSparseElements(const Layout& layout); + // Returns whether the given shape has a layout. For tuple shapes, true is // returned only if all elements have layouts. static bool HasLayout(const Shape& shape); diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc index 331bb9afa9..daf4dc10ac 100644 --- a/tensorflow/compiler/xla/layout_util_test.cc +++ b/tensorflow/compiler/xla/layout_util_test.cc @@ -30,6 +30,14 @@ class LayoutUtilTest : public ::testing::Test { *shape.mutable_layout() = LayoutUtil::MakeLayout(minor_to_major); return shape; } + + Shape MakeShapeWithSparseLayout(PrimitiveType element_type, + tensorflow::gtl::ArraySlice<int64> dimensions, + int64 max_sparse_elements) { + Shape shape = ShapeUtil::MakeShape(element_type, dimensions); + *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements); + return shape; + } }; TEST_F(LayoutUtilTest, TupleLayoutComparison) { @@ -81,6 +89,29 @@ TEST_F(LayoutUtilTest, CopyLayoutArray) { EXPECT_FALSE(dst.has_layout()); } +TEST_F(LayoutUtilTest, CopyLayoutSparse) { + Shape src = MakeShapeWithSparseLayout(F32, {2, 3}, 2); + Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0}); + + EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + + // Should work if destination has no layout. + dst.clear_layout(); + EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + + // If source is cleared, then destination should be cleared. + src.clear_layout(); + EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_TRUE(dst.has_layout()); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_FALSE(dst.has_layout()); +} + TEST_F(LayoutUtilTest, CopyLayoutTuple) { Shape src = ShapeUtil::MakeTupleShape( {MakeShapeWithLayout(F32, {2, 3}, {0, 1}), @@ -100,6 +131,25 @@ TEST_F(LayoutUtilTest, CopyLayoutTuple) { EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); } +TEST_F(LayoutUtilTest, CopyLayoutTupleSparse) { + Shape src = ShapeUtil::MakeTupleShape( + {MakeShapeWithSparseLayout(F32, {2, 3}, 4), + MakeShapeWithSparseLayout(F32, {42, 123}, 4), + ShapeUtil::MakeTupleShape( + {MakeShapeWithLayout(F32, {}, {}), + MakeShapeWithSparseLayout(F32, {1, 2, 3}, 6)})}); + Shape dst = ShapeUtil::MakeTupleShape( + {MakeShapeWithLayout(F32, {2, 3}, {1, 0}), + MakeShapeWithLayout(F32, {42, 123}, {1, 0}), + ShapeUtil::MakeTupleShape( + {MakeShapeWithLayout(F32, {}, {}), + MakeShapeWithLayout(F32, {1, 2, 3}, {1, 2, 0})})}); + + EXPECT_FALSE(LayoutUtil::LayoutsInShapesEqual(src, dst)); + EXPECT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); +} + TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) { Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1}); Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0}); @@ -107,6 +157,13 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleSameRank) { EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); } +TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleSameRank) { + Shape src = MakeShapeWithSparseLayout(F32, {123, 42, 7}, 6); + Shape dst = MakeShapeWithLayout(F32, {2, 3, 5}, {1, 0}); + ASSERT_IS_OK(LayoutUtil::CopyLayoutBetweenShapes(src, &dst)); + EXPECT_TRUE(LayoutUtil::LayoutsInShapesEqual(src, dst)); +} + TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) { Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1}); Shape dst = MakeShapeWithLayout(F32, {2, 3}, {1, 0}); @@ -116,6 +173,15 @@ TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleDifferentRank) { ::testing::ContainsRegex("cannot copy layout from shape")); } +TEST_F(LayoutUtilTest, CopyLayoutSparseNotCompatibleDifferentRank) { + Shape src = MakeShapeWithLayout(F32, {123, 42, 7}, {2, 0, 1}); + Shape dst = MakeShapeWithSparseLayout(F32, {2, 3}, 4); + auto status = LayoutUtil::CopyLayoutBetweenShapes(src, &dst); + EXPECT_FALSE(status.ok()); + EXPECT_THAT(status.error_message(), + ::testing::ContainsRegex("cannot copy layout from shape")); +} + TEST_F(LayoutUtilTest, CopyLayoutNotCompatibleTuple) { Shape src = ShapeUtil::MakeTupleShape({MakeShapeWithLayout(F32, {2, 3}, {0, 1}), @@ -221,5 +287,10 @@ TEST_F(LayoutUtilTest, DefaultLayoutGettersMajorToMinor) { ShapeUtil::MakeShape(F32, {10, 20, 30, 15, 25})))); } +TEST_F(LayoutUtilTest, SparseLayoutMaxElements) { + EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)), + 101); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index cc1735e6f2..dff5c1381a 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -94,9 +94,15 @@ Literal::Literal(const Shape& shape, bool allocate_arrays) Piece& piece = pair.second; piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index)); - if (ShapeUtil::IsArray(piece.subshape())) { + const Shape& subshape = piece.subshape(); + if (ShapeUtil::IsArray(subshape)) { if (allocate_arrays) { piece.set_buffer(new char[piece.size_bytes()]); + if (LayoutUtil::IsSparseArray(subshape)) { + piece.set_sparse_indices(new SparseIndexArray( + LayoutUtil::MaxSparseElements(subshape.layout()), + ShapeUtil::Rank(subshape))); + } } else { piece.set_buffer(nullptr); } @@ -112,6 +118,7 @@ void Literal::DeallocateBuffers() { Piece& piece = pair.second; if (piece.buffer() != nullptr) { delete[] piece.buffer(); + delete piece.sparse_indices(); } } } @@ -164,6 +171,15 @@ std::unique_ptr<Literal> Literal::CreateFromShape(const Shape& shape) { return literal; } +const SparseIndexArray* Literal::sparse_indices( + const ShapeIndex& shape_index) const { + return piece(shape_index).sparse_indices(); +} + +SparseIndexArray* Literal::sparse_indices(const ShapeIndex& shape_index) { + return piece(shape_index).sparse_indices(); +} + /* static */ std::unique_ptr<Literal> Literal::CreateFromDimensions( PrimitiveType primitive_type, tensorflow::gtl::ArraySlice<int64> dimensions) { @@ -247,9 +263,12 @@ std::vector<Literal> Literal::DecomposeTuple() { } Piece& src_piece = piece(src_index); - // Move the respective buffer over to the element Literal. + // Move the respective buffer and sparse indices over to the element + // Literal. dest_piece.set_buffer(src_piece.buffer()); src_piece.set_buffer(nullptr); + dest_piece.set_sparse_indices(src_piece.sparse_indices()); + src_piece.set_sparse_indices(nullptr); } } // Set this literal to be nil-shaped. @@ -406,6 +425,8 @@ Status Literal::MoveFrom(Literal&& src_literal, Piece& dest_piece = piece(dest_index); delete[] dest_piece.buffer(); dest_piece.set_buffer(src_piece.buffer()); + delete dest_piece.sparse_indices(); + dest_piece.set_sparse_indices(src_piece.sparse_indices()); } src_literal.shape_ = ShapeUtil::MakeNil(); @@ -764,7 +785,7 @@ std::unique_ptr<Literal> Literal::Transpose( // dimension has within the transposed array, a layout is affine if // MinMaj(Di) == TMinMaj(T(Di)), with TMinMaj() being the minor to major // vector of the affine layout. - CHECK(LayoutUtil::IsDense(permuted_shape)); + CHECK(LayoutUtil::IsDenseArray(permuted_shape)); Layout* layout = permuted_shape.mutable_layout(); layout->clear_minor_to_major(); for (auto index : LayoutUtil::MinorToMajor(shape())) { @@ -1573,6 +1594,12 @@ LiteralProto Literal::ToProto() const { } piece.WriteToProto(proto_piece); } + + if (LayoutUtil::IsSparseArray(shape())) { + CopyToRepeatedField(proto.mutable_sparse_indices(), + sparse_indices()->data()); + } + return proto; } @@ -1653,6 +1680,7 @@ LiteralView::LiteralView(const Literal& literal, const ShapeIndex& view_root) { } const Piece& src_piece = literal.piece(src_index); piece.set_buffer(src_piece.buffer()); + piece.set_sparse_indices(src_piece.sparse_indices()); piece.set_subshape(&ShapeUtil::GetSubshape(shape_, index)); } } diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index dc29c6359c..50e25bbdd0 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/sparse_index_array.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" @@ -103,6 +104,12 @@ class Literal { tensorflow::gtl::MutableArraySlice<NativeT> data( const ShapeIndex& shape_index = {}); + // Returns a pointer to the sparse index array. Returns nullptr if the literal + // is not a sparse array. + const SparseIndexArray* sparse_indices( + const ShapeIndex& shape_index = {}) const; + SparseIndexArray* sparse_indices(const ShapeIndex& shape_index = {}); + // Returns a pointer to (or size of) the underlying buffer holding the array // at the given shape index. CHECKs if the subshape of the literal at the // given ShapeIndex is not array. @@ -160,6 +167,56 @@ class Literal { // array. string GetR1U8AsString() const; + // Creates a literal with a sparse layout and the given indices and values. + // The shape is initialized from the given dimensions. The minor dimension of + // the indices array must equal the rank of the shape (i.e. size of the + // dimensions array). The major dimension of the indices array must equal the + // number of elements in the values array. The maximum number of elements in + // the array is taken from the max_indices() value of the index array. + // + // XLA assumes that sparse literals are in sorted order for all operations. If + // the `sort` argument is true, then the indices and values will be sorted + // while copying them into the literal. If you have ensured that the indices + // and values are already sorted, then you may set the `sort` argument to + // false to skip the sorting step. + // + // For example: + // + // CreateSparse( + // {12, 12, 12}, + // SparseIndexArray(10, 3, + // Array2D{ + // {0, 1, 2}, + // {3, 4, 5}, + // {6, 7, 8}, + // {9, 10, 11}, + // }), + // {1.0, 2.0 3.0, 4.0}) + // + // This creates an array with shape F64[12,12,12]sparse{10}, that has the + // following non-zero values: + // + // [0, 1, 2]: 1.0 + // [3, 4, 5]: 2.0 + // [6, 7, 8]: 3.0 + // [9, 10, 11]: 4.0 + // + template <typename NativeT> + static std::unique_ptr<Literal> CreateSparse( + tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices, + tensorflow::gtl::ArraySlice<NativeT> values, bool sort = true); + + // Populates a literal with a sparse layout with the given indices and values. + // Each index in the indices array is CHECKed against the dimensions in the + // literal's shape. If sort is true, then the indices and values will be + // sorted. If sort is false, then the indices and values are assumed to + // already be in sorted order. See CreateSparse for an example of how data + // are populated. + template <typename NativeT> + void PopulateSparse(SparseIndexArray indices, + tensorflow::gtl::ArraySlice<NativeT> values, + bool sort = true); + // Creates a new Literal object with the shape specified as parameter. // The content of the literal values is the default value of the primitive // type of literal itself (0 for numeric types, and false for predicates). @@ -358,7 +415,7 @@ class Literal { const ShapeIndex& shape_index, NativeT value); // Overloads of Get and Set for array literals. CHECKs if the literal is not - // array-shaped. + // array-shaped and dense. template <typename NativeT> NativeT Get(tensorflow::gtl::ArraySlice<int64> multi_index) const; template <typename NativeT> @@ -408,6 +465,8 @@ class Literal { // This function is useful if you want a polymorphic representation // of the tensor's elements (turning it to a string for something // like representation in a protobuf). + // + // This literal must have a dense layout. void EachCellAsString( const std::function<void(tensorflow::gtl::ArraySlice<int64> indices, const string& value)>& per_cell) const; @@ -447,6 +506,8 @@ class Literal { // // generator must be a callable of the type // NativeT(tensorflow::gtl::ArraySlice<int64> indexes) or compatible. + // + // This literal must have a dense layout. template <typename NativeT, typename FnType> Status Populate(const FnType& generator); @@ -485,10 +546,12 @@ class Literal { // admonishments about floating-point equality checks apply. We expect you to // use this to check for complex values that can be expressed precisely as // float pairs e.g. (-0.5, 1.0). + // + // This literal must have a dense layout. bool IsAllComplex(complex64 value) const; // Returns whether this literal is zero at the specified index. This literal - // must be an array. + // must be an array with a dense layout. bool IsZero(tensorflow::gtl::ArraySlice<int64> indices) const; // Return the count of the elements in the array at the given shape index in @@ -563,6 +626,14 @@ class Literal { char* buffer() const { return buffer_; } void set_buffer(char* buffer) { buffer_ = buffer; } + // The array of multi-indices that provide the locations of non-zero + // elements in a sparse array. Only used if + // LayoutUtil::IsSparseArray(shape()) is true. + SparseIndexArray* sparse_indices() const { return sparse_indices_; } + void set_sparse_indices(SparseIndexArray* sparse_indices) { + sparse_indices_ = sparse_indices; + } + // Gets or sets the subshape of this piece. This reference points to a // subshape within the shape in the containing Literal (Literal::shape_). const Shape& subshape() const { return *subshape_; } @@ -598,6 +669,9 @@ class Literal { // For array-shaped pieces, this is the buffer holding the literal data. char* buffer_ = nullptr; + // For sparse arrays, this is the array of indices. + SparseIndexArray* sparse_indices_ = nullptr; + // The shape of piece. This points into the shape of the containing Literal // (Literal::shape_). const Shape* subshape_ = nullptr; @@ -837,6 +911,21 @@ template <typename NativeT> } template <typename NativeT> +/* static */ std::unique_ptr<Literal> Literal::CreateSparse( + tensorflow::gtl::ArraySlice<int64> dimensions, SparseIndexArray indices, + tensorflow::gtl::ArraySlice<NativeT> values, bool sort) { + int64 num_elements = values.size(); + int64 rank = dimensions.size(); + CHECK_EQ(num_elements, indices.index_count()); + CHECK_EQ(rank, indices.rank()); + auto literal = MakeUnique<Literal>(ShapeUtil::MakeShapeWithSparseLayout( + primitive_util::NativeToPrimitiveType<NativeT>(), dimensions, + indices.max_indices())); + literal->PopulateSparse(indices, values, sort); + return literal; +} + +template <typename NativeT> /* static */ std::unique_ptr<Literal> Literal::CreateR4( std::initializer_list<std::initializer_list< std::initializer_list<std::initializer_list<NativeT>>>> @@ -1044,11 +1133,35 @@ void Literal::PopulateR4FromArray4D(const Array4D<NativeT>& values) { PopulateFromArray(values); } +template <typename NativeT> +void Literal::PopulateSparse(SparseIndexArray indices, + tensorflow::gtl::ArraySlice<NativeT> values, + bool sort) { + CHECK(LayoutUtil::IsSparseArray(shape())); + int rank = ShapeUtil::Rank(shape()); + CHECK_EQ(indices.rank(), rank); + int64 max_elements = LayoutUtil::MaxSparseElements(shape().layout()); + CHECK_LE(indices.max_indices(), max_elements); + int64 num_elements = values.size(); + CHECK_LE(num_elements, max_elements); + CHECK_EQ(num_elements, indices.index_count()); + auto root_data = root_piece().data<NativeT>(); + root_data.remove_suffix(max_elements - values.size()); + std::copy(values.begin(), values.end(), root_data.begin()); + *this->root_piece().sparse_indices() = std::move(indices); + if (sort) { + auto root_data = this->root_piece().data<NativeT>(); + root_data.remove_suffix(root_data.size() - num_elements); + this->root_piece().sparse_indices()->SortWithValues(root_data); + } + DCHECK(this->root_piece().sparse_indices()->Validate(shape())); +} + template <typename NativeT, typename FnType> Status Literal::Populate(const FnType& generator) { const Shape& this_shape = shape(); const int64 rank = ShapeUtil::Rank(this_shape); - TF_RET_CHECK(ShapeUtil::IsArray(this_shape)); + TF_RET_CHECK(LayoutUtil::IsDenseArray(this_shape)); TF_RET_CHECK(this_shape.element_type() == primitive_util::NativeToPrimitiveType<NativeT>()); tensorflow::gtl::MutableArraySlice<NativeT> literal_data = data<NativeT>(); diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 4974ead048..29efb4312f 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -193,6 +193,34 @@ TEST_F(LiteralUtilTest, CreateR3FromArray3d) { ASSERT_EQ(expected, result); } +TEST_F(LiteralUtilTest, CreateSparse) { + std::vector<int64> dimensions = {8, 8, 8}; + Array2D<int64> indices = { + {3, 4, 5}, + {1, 2, 3}, + {2, 3, 4}, + {3, 5, 6}, + }; + std::vector<int64> values = {7, 8, 9, 10}; + auto literal = Literal::CreateSparse<int64>( + dimensions, SparseIndexArray(indices.n1() + 3, indices), values); + + Array2D<int64> expected_indices = { + {1, 2, 3}, + {2, 3, 4}, + {3, 4, 5}, + {3, 5, 6}, + }; + std::vector<int64> expected_values = {8, 9, 7, 10}; + + EXPECT_EQ(literal->sparse_indices()->data(), + tensorflow::gtl::ArraySlice<int64>( + expected_indices.data(), expected_indices.num_elements())); + EXPECT_EQ(tensorflow::gtl::ArraySlice<int64>(literal->data<int64>().data(), + expected_values.size()), + tensorflow::gtl::ArraySlice<int64>(expected_values)); +} + TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) { // clang-format off auto literal = Literal::CreateR4Projected<float>({ diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 14c86e2e72..a6b3c0c7c4 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -81,6 +81,7 @@ cc_library( ":conv_canonicalization", ":cpu_copy_insertion", ":cpu_executable", + ":cpu_hlo_support_checker", ":cpu_instruction_fusion", ":cpu_layout_assignment", ":cpu_options", @@ -873,6 +874,32 @@ tf_cc_test( ], ) +cc_library( + name = "cpu_hlo_support_checker", + srcs = ["cpu_hlo_support_checker.cc"], + hdrs = ["cpu_hlo_support_checker.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_pass", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "cpu_hlo_support_checker_test", + srcs = ["cpu_hlo_support_checker_test.cc"], + deps = [ + ":cpu_hlo_support_checker", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 8e6562c237..42fd4f100b 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -50,6 +50,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h" #include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h" #include "tensorflow/compiler/xla/service/cpu/cpu_executable.h" +#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h" #include "tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.h" #include "tensorflow/compiler/xla/service/cpu/cpu_layout_assignment.h" #include "tensorflow/compiler/xla/service/cpu/cpu_options.h" @@ -258,6 +259,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { // Optimization pipeline. HloPassPipeline pipeline("CPU"); pipeline.AddInvariantChecker<HloVerifier>(ShapeSizeBytesFunction()); + pipeline.AddPass<CpuHloSupportChecker>(); ReducePrecisionInsertion::AddPasses( &pipeline, module->config().debug_options(), diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc new file mode 100644 index 0000000000..7bd4741a04 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.cc @@ -0,0 +1,48 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h" + +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace xla { + +StatusOr<bool> CpuHloSupportChecker::Run(HloModule* module) { + for (auto* computation : module->computations()) { + for (const auto& instruction : computation->instructions()) { + TF_RETURN_IF_ERROR( + ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape())); + TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( + instruction->shape(), + [&instruction](const Shape& subshape, const ShapeIndex&) { + if (LayoutUtil::IsSparseArray(subshape)) { + return xla::Unimplemented( + "CPU backend does not support HLO instruction %s with shape " + "containing a sparse layout: %s", + instruction->ToString().c_str(), + ShapeUtil::HumanStringWithLayout(instruction->shape()) + .c_str()); + } + return Status::OK(); + })); + } + } + return false; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h new file mode 100644 index 0000000000..2271af7b24 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h @@ -0,0 +1,42 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_ + +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// This pass should run early in the HLO pipeline and checks for HLO constructs +// which are not supported by the CPU backend and cannot be removed via HLO +// transformations (eg, sparse layouts). +class CpuHloSupportChecker : public HloPassInterface { + public: + CpuHloSupportChecker() = default; + ~CpuHloSupportChecker() override = default; + + tensorflow::StringPiece name() const override { + return "cpu_hlo_support_checker"; + } + + // Note: always returns false (no instructions are ever modified by this + // pass). + StatusOr<bool> Run(HloModule* module) override; +}; + +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_HLO_SUPPORT_CHECKER_H_ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc new file mode 100644 index 0000000000..0f463e6de6 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc @@ -0,0 +1,72 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/core/lib/core/error_codes.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +using ::testing::HasSubstr; + +class CpuHloSupportCheckerTest : public HloTestBase { + protected: + CpuHloSupportChecker& checker() { return checker_; } + + private: + CpuHloSupportChecker checker_; +}; + +TEST_F(CpuHloSupportCheckerTest, Add) { + HloComputation::Builder builder(TestName()); + const Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "param1")); + builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, param0, param1)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + TF_ASSERT_OK(checker().Run(module.get()).status()); +} + +TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) { + HloComputation::Builder builder(TestName()); + const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, sparse_shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, sparse_shape, "param1")); + builder.AddInstruction(HloInstruction::CreateBinary( + sparse_shape, HloOpcode::kAdd, param0, param1)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + Status status = checker().Run(module.get()).status(); + ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); + EXPECT_THAT(status.error_message(), + HasSubstr("CPU backend does not support")); + EXPECT_THAT(status.error_message(), + HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape))); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index a86d3583a6..69ccc7179f 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -437,6 +437,7 @@ cc_library( ":fusion_merger", ":gpu_copy_insertion", ":gpu_executable", + ":gpu_hlo_support_checker", ":gpu_layout_assignment", ":hlo_schedule", ":instruction_fusion", @@ -610,6 +611,32 @@ tf_cc_test( ], ) +cc_library( + name = "gpu_hlo_support_checker", + srcs = ["gpu_hlo_support_checker.cc"], + hdrs = ["gpu_hlo_support_checker.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_pass", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "gpu_hlo_support_checker_test", + srcs = ["gpu_hlo_support_checker_test.cc"], + deps = [ + ":gpu_hlo_support_checker", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 9f34866ff5..93db9ebbee 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -39,6 +39,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h" #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" +#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h" #include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h" #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" @@ -137,6 +138,7 @@ tensorflow::Status OptimizeHloModule( { HloPassPipeline pipeline("optimization"); pipeline.AddInvariantChecker<HloVerifier>(shape_size_function); + pipeline.AddPass<GpuHloSupportChecker>(); ReducePrecisionInsertion::AddPasses( &pipeline, hlo_module->config().debug_options(), ReducePrecisionInsertion::PassTiming::BEFORE_OPTIMIZATION); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc new file mode 100644 index 0000000000..4944c41f7d --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.cc @@ -0,0 +1,48 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h" + +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace xla { + +StatusOr<bool> GpuHloSupportChecker::Run(HloModule* module) { + for (auto* computation : module->computations()) { + for (const auto& instruction : computation->instructions()) { + TF_RETURN_IF_ERROR( + ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape())); + TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( + instruction->shape(), + [&instruction](const Shape& subshape, const ShapeIndex&) { + if (LayoutUtil::IsSparseArray(subshape)) { + return xla::Unimplemented( + "GPU backend does not support HLO instruction %s with shape " + "containing a sparse layout: %s", + instruction->ToString().c_str(), + ShapeUtil::HumanStringWithLayout(instruction->shape()) + .c_str()); + } + return Status::OK(); + })); + } + } + return false; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h new file mode 100644 index 0000000000..d9550f81b5 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h @@ -0,0 +1,42 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_ + +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// his pass should run early in the HLO pipeline and checks for HLO constructs +// which are not supported by the GPU backend and cannot be removed via HLO +// transformations (eg, sparse layouts). +class GpuHloSupportChecker : public HloPassInterface { + public: + GpuHloSupportChecker() = default; + ~GpuHloSupportChecker() override = default; + + tensorflow::StringPiece name() const override { + return "gpu_hlo_support_checker"; + } + + // Note: always returns false (no instructions are ever modified by this + // pass). + StatusOr<bool> Run(HloModule* module) override; +}; + +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_HLO_SUPPORT_CHECKER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc new file mode 100644 index 0000000000..0a4089df4c --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc @@ -0,0 +1,72 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/core/lib/core/error_codes.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +using ::testing::HasSubstr; + +class GpuHloSupportCheckerTest : public HloTestBase { + protected: + GpuHloSupportChecker& checker() { return checker_; } + + private: + GpuHloSupportChecker checker_; +}; + +TEST_F(GpuHloSupportCheckerTest, Add) { + HloComputation::Builder builder(TestName()); + const Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "param1")); + builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, param0, param1)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + TF_ASSERT_OK(checker().Run(module.get()).status()); +} + +TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) { + HloComputation::Builder builder(TestName()); + const Shape sparse_shape = ShapeUtil::MakeShapeWithSparseLayout(F32, {10}, 2); + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, sparse_shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, sparse_shape, "param1")); + builder.AddInstruction(HloInstruction::CreateBinary( + sparse_shape, HloOpcode::kAdd, param0, param1)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + Status status = checker().Run(module.get()).status(); + ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); + EXPECT_THAT(status.error_message(), + HasSubstr("GPU backend does not support")); + EXPECT_THAT(status.error_message(), + HasSubstr(ShapeUtil::HumanStringWithLayout(sparse_shape))); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 3d4080e353..290ea9b496 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -84,7 +84,7 @@ bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) { if (lhs.layout().format() != rhs.layout().format()) { return false; } - if (LayoutUtil::IsDense(lhs)) { + if (LayoutUtil::IsDenseArray(lhs)) { if (!ContainersEqual(LayoutUtil::MinorToMajor(lhs), LayoutUtil::MinorToMajor(rhs))) { VLOG(3) << "CompareShapes: lhs layout != rhs layout"; @@ -202,6 +202,17 @@ StatusOr<Shape> MakeShapeWithLayoutInternal( return MakeShapeWithLayout(element_type, dimensions, layout); } +/* static */ Shape ShapeUtil::MakeShapeWithSparseLayout( + PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions, + int64 max_sparse_elements) { + DCHECK_NE(TUPLE, element_type); + DCHECK_NE(OPAQUE, element_type); + Shape shape = ShapeUtil::MakeShape(element_type, dimensions); + *shape.mutable_layout() = LayoutUtil::MakeSparseLayout(max_sparse_elements); + TF_DCHECK_OK(ShapeUtil::ValidateShape(shape)); + return shape; +} + /* static */ Shape ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( const Shape& shape) { @@ -249,7 +260,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( } /* static */ void ShapeUtil::AppendMajorDimension(int bound, Shape* shape) { - CHECK(LayoutUtil::IsDense(*shape)); + CHECK(LayoutUtil::IsDenseArray(*shape)); shape->mutable_layout()->add_minor_to_major(Rank(*shape)); shape->add_dimensions(bound); TF_DCHECK_OK(ValidateShape(*shape)); @@ -658,23 +669,55 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) { TF_DCHECK_OK(ValidateShape(shape)); DCHECK_NE(OPAQUE, shape.element_type()); if (shape.element_type() == TUPLE) { - CHECK_GT(pointer_size, 0); - return pointer_size * shape.tuple_shapes_size(); + return ByteSizeOfTupleIndexTable(shape, pointer_size); + } + int64 byte_size = ByteSizeOfElements(shape); + if (LayoutUtil::IsSparseArray(shape)) { + byte_size += ByteSizeOfSparseIndices(shape); } + return byte_size; +} + +/* static */ int64 ShapeUtil::ByteSizeOfTupleIndexTable(const Shape& shape, + int64 pointer_size) { + TF_DCHECK_OK(ValidateShape(shape)); + DCHECK_EQ(TUPLE, shape.element_type()); + CHECK_GT(pointer_size, 0); + return pointer_size * shape.tuple_shapes_size(); +} + +/* static */ int64 ShapeUtil::ByteSizeOfElements(const Shape& shape) { + TF_DCHECK_OK(ValidateShape(shape)); + DCHECK(ShapeUtil::IsArray(shape)); int64 allocated_element_count; - if (shape.layout().padded_dimensions_size() > 0) { - CHECK_EQ(Rank(shape), shape.layout().padded_dimensions_size()); - allocated_element_count = 1; - for (int64 dimension_size : shape.layout().padded_dimensions()) { - allocated_element_count *= dimension_size; - } + + if (LayoutUtil::IsSparseArray(shape)) { + allocated_element_count = LayoutUtil::MaxSparseElements(shape.layout()); } else { - allocated_element_count = ElementsIn(shape); + CHECK(LayoutUtil::IsDenseArray(shape)); + tensorflow::gtl::ArraySlice<int64> padded_dimensions = + LayoutUtil::PaddedDimensions(shape); + if (!padded_dimensions.empty()) { + CHECK_EQ(Rank(shape), padded_dimensions.size()); + allocated_element_count = 1; + for (int64 dimension_size : padded_dimensions) { + allocated_element_count *= dimension_size; + } + } else { + allocated_element_count = ElementsIn(shape); + } } return allocated_element_count * ByteSizeOfPrimitiveType(shape.element_type()); } +/* static */ int64 ShapeUtil::ByteSizeOfSparseIndices(const Shape& shape) { + TF_DCHECK_OK(ValidateShape(shape)); + DCHECK(LayoutUtil::IsSparseArray(shape)); + return LayoutUtil::MaxSparseElements(shape.layout()) * + ShapeUtil::Rank(shape) * sizeof(int64); +} + /* static */ Status ShapeUtil::ValidateShapeWithOptionalLayoutInternal( const Shape& shape) { if (shape.element_type() == TUPLE) { @@ -900,7 +943,7 @@ Status ForEachMutableSubshapeHelper( new_shape.add_dimensions(dim); } if (shape.has_layout()) { - CHECK(LayoutUtil::IsDense(shape)); + CHECK(LayoutUtil::IsDenseArray(shape)); Layout* new_layout = new_shape.mutable_layout(); new_layout->set_format(DENSE); new_layout->clear_minor_to_major(); diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 59bdffee5a..453d4ec047 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -143,7 +143,10 @@ std::ostream& operator<<(std::ostream& out, const ShapeIndexView& shape_index); class ShapeUtil { public: // Returns the number of elements are contained within the provided shape; - // e.g. for rank 0 (scalars) the result is always 1. + // e.g. for rank 0 (scalars) the result is always 1. Note that sparse shapes + // may not actually be able to store this number of elements. See + // LayoutUtil::MaxSparseElements(shape) to obtain the maximum number of + // elements that can be stored in a sparse shape. // Precondition: !IsTuple(shape) static int64 ElementsIn(const Shape& shape); @@ -164,6 +167,27 @@ class ShapeUtil { // Precondition: !ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape) static int64 ByteSizeOfPrimitiveType(PrimitiveType primitive_type); + // Returns the number of bytes required to store the tuple member pointers for + // a allocation of shape. The `shape` must be a TUPLE shape, and + // `pointer_size` must be larger than zero. + static int64 ByteSizeOfTupleIndexTable(const Shape& shape, + int64 pointer_size); + + // Returns the number of bytes required for the elements in an allocation of + // `shape`, which must be an array shape. The return value does not include + // the bytes needed to store sparse indices. Dense shapes use a separate + // memory location for each element, and so for these shapes, + // `ByteSizeOf(shape) == ByteSizeOfElements(shape)`. For dense shapes, this + // size also includes padding if present in the layout. For sparse shapes, + // `ByteSizeOf(shape) == ByteSizeOfElements(shape) + + // ByteSizeOfSparseindices(shape)`. + static int64 ByteSizeOfElements(const Shape& shape); + + // Returns the number of bytes required for the sparse indices in an + // allocation of shape. The shape must be an array shape. The return value + // does not include the bytes needed to store sparse indices. + static int64 ByteSizeOfSparseIndices(const Shape& shape); + // Returns a human-readable string that represents the given shape, with or // without layout. e.g. "f32[42x12] {0, 1}" or "f32[64]". static string HumanString(const Shape& shape); @@ -269,6 +293,10 @@ class ShapeUtil { PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions, tensorflow::gtl::ArraySlice<int64> minor_to_major); + static Shape MakeShapeWithSparseLayout( + PrimitiveType element_type, tensorflow::gtl::ArraySlice<int64> dimensions, + int64 max_sparse_elements); + // Constructs a new shape with major-first layout (i.e. {n, n-1, ..., 0}). static Shape MakeShapeWithDescendingLayout( PrimitiveType element_type, diff --git a/tensorflow/compiler/xla/sparse_index_array.cc b/tensorflow/compiler/xla/sparse_index_array.cc new file mode 100644 index 0000000000..e7738e6790 --- /dev/null +++ b/tensorflow/compiler/xla/sparse_index_array.cc @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/sparse_index_array.h" + +#include "tensorflow/compiler/xla/index_util.h" +#include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/shape_util.h" + +namespace xla { + +SparseIndexArray::SparseIndexArray() : rank_(0), max_indices_(0) {} + +SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank, + std::vector<int64> indices) + : indices_(std::move(indices)), rank_(rank), max_indices_(max_indices) { + CHECK_GT(rank_, 0); + CHECK_EQ(indices_.size() % rank_, 0) + << "indices_.size(): " << indices_.size() << ", rank_: " << rank_; + CHECK_LT(index_count(), max_indices_); +} + +SparseIndexArray::SparseIndexArray(int64 max_indices, int64 rank, + tensorflow::gtl::ArraySlice<int64> indices) + : SparseIndexArray(max_indices, rank, + std::vector<int64>(indices.begin(), indices.end())) {} + +SparseIndexArray::SparseIndexArray(int64 max_indices, + const Array2D<int64>& indices) + : SparseIndexArray(max_indices, indices.n2(), + std::vector<int64>(indices.begin(), indices.end())) {} + +int64 SparseIndexArray::index_count() const { + CHECK_GT(rank_, 0); + CHECK_EQ(indices_.size() % rank_, 0); + return indices_.size() / rank_; +} + +tensorflow::gtl::ArraySlice<int64> SparseIndexArray::At( + int64 sparse_index_number) const { + CHECK_GT(rank_, 0); + CHECK_GE(sparse_index_number, 0); + CHECK_LE(rank_ * sparse_index_number + rank_, indices_.size()); + return tensorflow::gtl::ArraySlice<int64>( + indices_.data() + rank_ * sparse_index_number, rank_); +} + +tensorflow::gtl::MutableArraySlice<int64> SparseIndexArray::At( + int64 sparse_index_number) { + CHECK_GT(rank_, 0); + CHECK_GE(sparse_index_number, 0); + CHECK_LE(rank_ * sparse_index_number + rank_, indices_.size()); + return tensorflow::gtl::MutableArraySlice<int64>( + indices_.data() + rank_ * sparse_index_number, rank_); +} + +void SparseIndexArray::Append(tensorflow::gtl::ArraySlice<int64> index) { + CHECK_GT(rank_, 0); + CHECK_EQ(index.size(), rank_); + indices_.insert(indices_.end(), index.begin(), index.end()); +} + +void SparseIndexArray::Clear() { indices_.clear(); } + +void SparseIndexArray::Resize(int64 num_indices) { + CHECK_GT(rank_, 0); + indices_.resize(rank_ * num_indices); +} + +bool SparseIndexArray::Validate(const Shape& shape) const { + if (rank_ == 0 || rank_ != ShapeUtil::Rank(shape)) { + return false; + } + int64 num_indices = index_count(); + if (num_indices > LayoutUtil::MaxSparseElements(shape.layout())) { + return false; + } + if (num_indices < 2) { + return true; + } + tensorflow::gtl::ArraySlice<int64> last = At(0); + if (!IndexUtil::IndexInBounds(shape, last)) { + return false; + } + for (int64 n = 1; n < num_indices; ++n) { + tensorflow::gtl::ArraySlice<int64> next = At(n); + if (!IndexUtil::IndexInBounds(shape, next)) { + return false; + } + if (IndexUtil::CompareIndices(last, next) >= 0) { + return false; + } + last = next; + } + return true; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/sparse_index_array.h b/tensorflow/compiler/xla/sparse_index_array.h new file mode 100644 index 0000000000..f67f34760e --- /dev/null +++ b/tensorflow/compiler/xla/sparse_index_array.h @@ -0,0 +1,176 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Utility class for managing sparse array indices. + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_ + +#include <vector> + +#include "tensorflow/compiler/xla/array2d.h" +#include "tensorflow/compiler/xla/index_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace xla { + +// Encapsulates the array of indices for a sparse array. A SparseIndexArray +// contain indices for up to `max_indices` elements of a sparse array. Each +// sparse index is an array of `rank` int64 value that gives the location of a +// value within a sparse array. Note that the dimensions of the array are not +// checked (except for the rank). To avoid confusion, we refer to the position +// of an index within a SparseIndexArray as a sparse index number. +class SparseIndexArray { + public: + SparseIndexArray(); + SparseIndexArray(const SparseIndexArray&) = default; + SparseIndexArray(SparseIndexArray&&) = default; + SparseIndexArray& operator=(const SparseIndexArray&) = default; + SparseIndexArray& operator=(SparseIndexArray&&) = default; + + // Constructs a SparseIndexArray that can hold up to `max_indices` sparse + // indices, with an initial contents obtained from the given array. The rank + // is taken from the minor dimension of the array. The major dimension of the + // array must not exceed `max_indices`. + SparseIndexArray(int64 max_indices, const Array2D<int64>& indices); + + // Like above, but the array is flattened. For example, the following are + // equivalent: + // + // SparseIndexArray(10, 3, + // Array2D{ + // {0, 1, 2}, + // {3, 4, 5}, + // {6, 7, 8}, + // {9, 10, 11}, + // }) + // + // SparseIndexArray(10, 3, + // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}) + // + SparseIndexArray(int64 max_indices, int64 rank, + std::vector<int64> indices = {}); + SparseIndexArray(int64 max_indices, int64 rank, + tensorflow::gtl::ArraySlice<int64> indices); + + // Returns the number of elements represented by the indices stored in the + // array. + int64 index_count() const; + + // Returns a slice that refers to the given sparse index number. The argument + // must be in the range [0, element_count()). + tensorflow::gtl::ArraySlice<int64> At(int64 sparse_index_number) const; + tensorflow::gtl::MutableArraySlice<int64> At(int64 sparse_index_number); + + // Adds the given index at the end of the array. The new size of the + // SparseIndexArray must not exceed `max_indices`. + void Append(tensorflow::gtl::ArraySlice<int64> index); + + // Removes all indices from the array. + void Clear(); + + // Resizes the array to contain the given number of sparse indices. The new + // size must be smaller than `max_indices`. If the new size is larger than + // the old size, the value of the new indices is not specified. + void Resize(int64 num_indices); + + // Returns true iff all indices are unique and occur in sorted order, and are + // valid for the given shape. + bool Validate(const Shape& shape) const; + + int64 rank() const { return rank_; } + int64 max_indices() const { return max_indices_; } + + // Returns a pointer to the int64 array that holds the sparse indices. + tensorflow::gtl::MutableArraySlice<int64> mutable_data() { return &indices_; } + tensorflow::gtl::ArraySlice<int64> data() const { return indices_; } + + // Sorts this sparse index array along with the set of corresponding values. + // The indices and values are sorted in the lexicographic order of the + // indices, from smallest to largest. + // + // For example: + // + // std::vector<float> v{10.0, 11.0, 12.0}; + // SparseIndexArray a(10, 3, + // {{3, 4, 5}, + // {1, 2, 3}, + // {2, 3, 4}}); + // a.SortWithValues(&v); + // // Prints "11.0, 12.0, 10.0": + // std::cout << v[0] << ", " << v[1] << ", " << v[2] << std::endl; + // + template <typename NativeT> + void SortWithValues(tensorflow::gtl::MutableArraySlice<NativeT> values); + + private: + std::vector<int64> indices_; + int64 rank_; + int64 max_indices_; +}; + +template <typename NativeT> +void SparseIndexArray::SortWithValues( + tensorflow::gtl::MutableArraySlice<NativeT> values) { + int64 num_elements = index_count(); + CHECK_EQ(values.size(), num_elements); + std::vector<int64> sort_order; + sort_order.reserve(num_elements); + for (int64 i = 0; i < num_elements; ++i) { + sort_order.push_back(i); + } + auto sort_order_less = [this](int64 lhs, int64 rhs) { + return IndexUtil::CompareIndices(At(lhs), At(rhs)) < 0; + }; + std::sort(sort_order.begin(), sort_order.end(), sort_order_less); + + // Reorder the array elements according to sort_order. Work through the array + // and follow cycles so we can do the reorder in-place. + tensorflow::gtl::InlinedVector<int64, 8> saved_index(rank()); + for (int64 i = 0; i < num_elements; ++i) { + // sort_order[i] == -1 indicates the element has already been copied. + if (sort_order[i] < 0) { + continue; + } else if (i == sort_order[i]) { + // The element is already in sorted order. + sort_order[i] = -1; + continue; + } + + std::copy_n(At(i).begin(), rank(), saved_index.begin()); + NativeT saved_value = values[i]; + int64 j = i; + for (;;) { + if (sort_order[j] == i) { + std::copy_n(saved_index.begin(), rank(), At(j).begin()); + values[j] = saved_value; + sort_order[j] = -1; + break; + } + + std::copy_n(At(sort_order[j]).begin(), rank(), At(j).begin()); + values[j] = values[sort_order[j]]; + + int64 k = sort_order[j]; + sort_order[j] = -1; + j = k; + } + } +} + +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SPARSE_INDEX_ARRAY_H_ diff --git a/tensorflow/compiler/xla/sparse_index_array_test.cc b/tensorflow/compiler/xla/sparse_index_array_test.cc new file mode 100644 index 0000000000..7377f88958 --- /dev/null +++ b/tensorflow/compiler/xla/sparse_index_array_test.cc @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/sparse_index_array.h" + +#include <vector> + +#include "tensorflow/compiler/xla/test.h" + +namespace xla { +namespace { + +TEST(SparseIndexArrayTest, Sort) { + SparseIndexArray a(10, 3); + a.Append({2, 3, 4}); + a.Append({3, 4, 5}); + a.Append({1, 2, 3}); + a.Append({5, 6, 7}); + a.Append({4, 5, 6}); + a.Append({6, 7, 8}); + std::vector<double> values = { + 12.0, 13.0, 11.0, 15.0, 14.0, 16.0, + }; + a.SortWithValues<double>(&values); + ASSERT_EQ(a.data(), std::vector<int64>({1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6, 5, + 6, 7, 6, 7, 8})); + ASSERT_EQ(values, std::vector<double>({11.0, 12.0, 13.0, 14.0, 15.0, 16.0})); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index e34f138b6e..3aea021753 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -120,6 +120,9 @@ enum Format { // The default layout, with exactly one storage location per element (ignoring // padding). DENSE = 1; + // A sparsely encoded layout, providing only the index/value pairs of non-zero + // elements. + SPARSE = 2; } // A layout describes how the array is placed in (1D) memory space. This @@ -151,6 +154,11 @@ message Layout { // field must be unset unless the format is DENSE. PaddingValue padding_value = 3; + // The maximum number of elements that can be stored for SPARSE formats. This + // can be used to determine the maximum size in bytes of arrays stored in + // memory. This field must be unset unless the format is SPARSE. + int64 max_sparse_elements = 5; + // Important: if any field is added, be sure to modify ShapeUtil::Equal() // appropriately to account for the new field. } @@ -333,7 +341,8 @@ message LiteralProto { // The F16s and BF16s are encoded in little endian byte order bytes f16s = 11; bytes bf16s = 13; - // Next = 14 + repeated int64 sparse_indices = 14; + // Next = 15 } message WindowDimension { |