diff options
Diffstat (limited to 'tensorflow/core/kernels/deserialize_sparse_variant_op.cc')
-rw-r--r-- | tensorflow/core/kernels/deserialize_sparse_variant_op.cc | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/deserialize_sparse_variant_op.cc b/tensorflow/core/kernels/deserialize_sparse_variant_op.cc new file mode 100644 index 0000000000..fce3029e4e --- /dev/null +++ b/tensorflow/core/kernels/deserialize_sparse_variant_op.cc @@ -0,0 +1,372 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" + +namespace tensorflow { + +namespace { + +class DeserializeSparseOp : public OpKernel { + public: + explicit DeserializeSparseOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("dtype", &dtype_)); + } + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + + OP_REQUIRES( + context, input.dims() > 0, + errors::InvalidArgument("Serialized sparse should have non-zero rank ", + input.shape().DebugString())); + OP_REQUIRES(context, input.shape().dim_size(input.dims() - 1) == 3, + errors::InvalidArgument( + "Serialized sparse should have 3 as the last dimension ", + input.shape().DebugString())); + + // `input_dims_to_stack` is the number of dimensions that will be added to + // each of the elements before they are concatenated into the output. + const int64 input_dims_to_stack = input.dims() - 1; + int num_sparse_tensors = 1; + for (int i = 0; i < input_dims_to_stack; ++i) { + num_sparse_tensors *= input.shape().dim_size(i); + } + + if (num_sparse_tensors == 1 && input_dims_to_stack == 0) { + // Special case with a single sparse tensor, and no dimensions to add + // to the output indices. We can return the boxed tensors directly (after + // validating them). + const Tensor* output_indices; + const Tensor* output_values; + const Tensor* output_shape; + const auto& input_as_vec = input.vec<Variant>(); + int64 total_non_zeros; + OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape( + input_as_vec(1), input_as_vec(2), 0, + &output_shape, &total_non_zeros)); + OP_REQUIRES_OK(context, GetAndValidateSparseTensorIndicesAndValues( + input_as_vec(0), input_as_vec(1), 0, + output_shape->NumElements(), &output_indices, + &output_values)); + context->set_output(0, *output_indices); + context->set_output(1, *output_values); + context->set_output(2, *output_shape); + return; + } + + OP_REQUIRES( + context, num_sparse_tensors > 0, + errors::InvalidArgument( + "Serialized sparse should have at least 1 serialized tensor, " + "but has a zero dimension ", + input.shape().DebugString())); + + const auto& input_as_matrix = input.flat_inner_dims<Variant, 2>(); + + // Compute the output "dense shape" of and number of non-zero elements in + // the stacked sparse tensors. Given an input of shape (S_0, ..., + // S_{input_dims_to_stack-1}, 3), and an element of dense shape (E_0, ... + // E_n), the output dense shape will be (S_0, ..., + // S_{input_dims_to_stack-1}, E_0, ..., E_n). + Tensor* output_shape; + int64 total_non_zeros = 0; + + // Allocate and build the initial output shape based on the element shape of + // the 0th sparse tensor in the input. + // + // NOTE(mrry): We define `element_shape` as a `const Tensor*` rather than a + // `Tensor` to avoid the overhead of allocating and deallocating a `Tensor` + // on the stack. While the per-`Tensor` cost is small, this op can unbox a + // large number of tensors (3 per batch element) and these fixed overheads + // dominate when the number of non-zeros per element is small. + const Tensor* element_shape; + OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape( + input_as_matrix(0, 1), input_as_matrix(0, 2), 0, + &element_shape, &total_non_zeros)); + OP_REQUIRES_OK(context, + context->allocate_output( + 2, {input_dims_to_stack + element_shape->NumElements()}, + &output_shape)); + const auto element_shape_vec = element_shape->vec<int64>(); + auto output_shape_vec = output_shape->vec<int64>(); + output_shape_vec(0) = num_sparse_tensors; + for (int64 j = 0; j < input_dims_to_stack; ++j) { + output_shape_vec(j) = input.dim_size(j); + } + for (int64 j = 0; j < element_shape->NumElements(); ++j) { + output_shape_vec(j + input_dims_to_stack) = element_shape_vec(j); + } + + // Accumulate the number of non-zero elements from the remaining sparse + // tensors, and validate that they have compatible dense shapes. + // + // NOTE(mrry): For compatibility with the implementations of + // DeserializeManySparse, and many ops that generate SparseTensors to batch + // that do not have a fixed dense_shape (e.g. `tf.parse_single_example()`), + // we compute the maximum in each dimension to find the smallest dense_shape + // that bounds all of the input SparseTensors. + for (int i = 1; i < num_sparse_tensors; ++i) { + int64 num_non_zeros; + OP_REQUIRES_OK(context, GetAndValidateSparseTensorShape( + input_as_matrix(i, 1), input_as_matrix(i, 2), + i, &element_shape, &num_non_zeros)); + total_non_zeros += num_non_zeros; + OP_REQUIRES( + context, + output_shape->NumElements() - input_dims_to_stack == + element_shape->NumElements(), + errors::InvalidArgument( + "Inconsistent shape across SparseTensors: rank prior to " + "SparseTensor[", + i, "] was: ", output_shape->NumElements() - input_dims_to_stack, + " but rank of SparseTensor[", i, + "] is: ", element_shape->NumElements())); + const auto element_shape_vec = element_shape->vec<int64>(); + for (int j = 0; j < element_shape->NumElements(); ++j) { + output_shape_vec(j + input_dims_to_stack) = std::max( + output_shape_vec(j + input_dims_to_stack), element_shape_vec(j)); + } + } + + // Compute the output "indices" matrix and "values" vector. + Tensor* output_indices; + Tensor* output_values; + + const int output_rank = output_shape->NumElements(); + OP_REQUIRES_OK(context, + context->allocate_output( + 0, {static_cast<int64>(total_non_zeros), output_rank}, + &output_indices)); + OP_REQUIRES_OK( + context, context->allocate_output( + 1, {static_cast<int64>(total_non_zeros)}, &output_values)); + + // The bulk of the work in this method involves building the output indices + // in a tight loop. For cache friendliness, we generate the indices in the + // order that they will be laid out in memory. We use raw pointers instead + // of Eigen element/slice indexing methods, to access the underlying index + // buffer to minimize the amount of work in that tight loop. + int64* output_indices_data = output_indices->matrix<int64>().data(); + size_t current_row = 0; + + for (int i = 0; i < num_sparse_tensors; ++i) { + const Tensor* element_indices; + const Tensor* element_values; + OP_REQUIRES_OK(context, this->GetAndValidateSparseTensorIndicesAndValues( + input_as_matrix(i, 0), input_as_matrix(i, 1), + i, output_rank - input_dims_to_stack, + &element_indices, &element_values)); + + const size_t num_index_rows = element_values->NumElements(); + + // An empty sparse tensor in the input will generate no data + // in the output. We short-circuit the rest of the iteration to avoid + // triggering assertions in the Eigen when manipulating empty tensors (or + // slices of tensors). + if (num_index_rows == 0) continue; + + const size_t start_row = current_row; + const size_t next_start_row = current_row + num_index_rows; + + // NOTE(mrry): If the element is a scalar SparseTensor, + // `element_indices` will be an empty tensor, and this pointer will not + // be valid. However, we will not dereference the pointer in that case, + // because `input_dims_to_stack == output_rank`. + const int64* element_indices_data = + element_indices->matrix<int64>().data(); + + // Build the submatrix of `output_indices` for the i^th sparse tensor + // in the input. + // + // Each row of `output_indices` comprises `input_dims_to_stack` indices + // based on the position of the i^th sparse tensor in the input tensor, + // followed by the indices from the corresponding row in + // `element_indices`. + if (input_dims_to_stack == 1 && output_rank == 2) { + // We specialize this case because the compiler can generate + // more efficient code when the number of indices for each element is + // known statically. Since the most common use of this op is to + // serialize batches of SparseTensors, and the most common source of + // SparseTensors is the `tf.parse_single_example()` op, which generates + // 1-D SparseTensors, we statically unroll the loop for the rank 2 + // output case. + for (; current_row < next_start_row; ++current_row) { + *output_indices_data++ = i; + *output_indices_data++ = *element_indices_data++; + } + } else { + // `sparse_tensor_index` is the tuple of indices that correspond to + // mapping the flat element index (`i`) back onto the stacked + // coordinates implied by the position of the i^th sparse tensor in the + // input tensor. + // + // We build `sparse_tensor_index` in reverse (innermost/minor dimension + // to outermost/major dimension). The `cumulative_product` represents + // the size of the inner subtensor for which `sparse_tensor_index` has + // already been built. + gtl::InlinedVector<int64, 4> sparse_tensor_index(input_dims_to_stack); + int cumulative_product = 1; + for (size_t j = 0; j < sparse_tensor_index.size(); ++j) { + size_t reverse_index = sparse_tensor_index.size() - j - 1; + sparse_tensor_index[reverse_index] = + (i / cumulative_product) % input.dim_size(reverse_index); + cumulative_product *= input.dim_size(reverse_index); + } + for (; current_row < next_start_row; ++current_row) { + for (int64 sparse_tensor_index_component : sparse_tensor_index) { + *output_indices_data++ = sparse_tensor_index_component; + } + for (size_t k = input_dims_to_stack; k < output_rank; ++k) { + *output_indices_data++ = *element_indices_data++; + } + } + } + + // Build the subvector of `output_values` for the i^th sparse tensor + // in the input. + // + // NOTE(mrry): There is a potential optimization here where we use a T* + // to represent the current position in `output_values`, but it would + // require some rejigging of the template parameters. + // NOTE(mrry): Another potential optimization: if we know that this + // operation consumes its input, we could std::move non-primitive elements + // into the output and avoid a copy. + Eigen::DSizes<Eigen::DenseIndex, 1> values_start(start_row); + Eigen::DSizes<Eigen::DenseIndex, 1> values_sizes(num_index_rows); + +#define HANDLE_TYPE(T) \ + case DataTypeToEnum<T>::value: { \ + output_values->vec<T>().slice(values_start, values_sizes) = \ + element_values->vec<T>(); \ + break; \ + } + switch (dtype_) { + TF_CALL_ALL_TYPES(HANDLE_TYPE); + TF_CALL_QUANTIZED_TYPES(HANDLE_TYPE); +#undef HANDLE_TYPE + default: + OP_REQUIRES_OK( + context, errors::Unimplemented( + "DeserializeSparse Unhandled data type: ", dtype_)); + } + } + } + + private: + Status GetAndValidateSparseTensorShape(const Variant& serialized_values, + const Variant& serialized_shape, + int index, const Tensor** output_shape, + int64* output_num_non_zeros) { + // Deserialize and validate the shape. + *output_shape = serialized_shape.get<Tensor>(); + if (*output_shape == nullptr) { + return errors::InvalidArgument( + "Could not get a tensor from serialized_sparse[", index, ", 2]"); + } + if ((*output_shape)->dtype() != DT_INT64) { + return errors::InvalidArgument( + "Expected serialized_sparse[", index, + ", 2] to be a vector of DT_INT64 but received dtype ", + DataTypeString((*output_shape)->dtype())); + } + if (!TensorShapeUtils::IsVector((*output_shape)->shape())) { + return errors::InvalidArgument( + "Expected serialized_sparse[", index, + ", 2] to be a shape vector but its shape is ", + (*output_shape)->shape().DebugString()); + } + *output_num_non_zeros = serialized_values.get<Tensor>()->NumElements(); + return Status::OK(); + } + + Status GetAndValidateSparseTensorIndicesAndValues( + const Variant& serialized_indices, const Variant& serialized_values, + int index, int expected_rank, const Tensor** output_indices, + const Tensor** output_values) { + // Deserialize and validate the indices. + *output_indices = serialized_indices.get<Tensor>(); + if (*output_indices == nullptr) { + return errors::InvalidArgument( + "Could not get a tensor from serialized_sparse[", index, ", 0]"); + } + if ((*output_indices)->dtype() != DT_INT64) { + return errors::InvalidArgument( + "Expected serialized_sparse[", index, + ", 0] to be a matrix of DT_INT64 but received dtype ", + DataTypeString((*output_indices)->dtype())); + } + if (!TensorShapeUtils::IsMatrix((*output_indices)->shape())) { + return errors::InvalidArgument( + "Expected serialized_sparse[", index, + ", 0] to represent an index matrix but received shape ", + (*output_indices)->shape().DebugString()); + } + int64 num_entries = (*output_indices)->dim_size(0); + int rank = (*output_indices)->dim_size(1); + if (rank != expected_rank) { + return errors::InvalidArgument( + "Expected column counts of SparseTensor[", index, + "].indices to match size of SparseTensor[", index, + "].shape but they do not: ", rank, " vs. ", expected_rank); + } + + // Deserialize and validate the values. + *output_values = serialized_values.get<Tensor>(); + if (*output_values == nullptr) { + return errors::InvalidArgument( + "Could not get a tensor from serialized_sparse[", index, ", 1]"); + } + if (!TensorShapeUtils::IsVector((*output_values)->shape())) { + return errors::InvalidArgument( + "Expected serialized_sparse[", index, + ", 1] to represent a values vector but received shape ", + (*output_values)->shape().DebugString()); + } + if (dtype_ != (*output_values)->dtype()) { + return errors::InvalidArgument( + "Requested SparseTensor of type ", DataTypeString(dtype_), + " but SparseTensor[", index, + "].values.dtype() == ", DataTypeString((*output_values)->dtype())); + } + if (num_entries != (*output_values)->dim_size(0)) { + return errors::InvalidArgument( + "Expected row counts of SparseTensor[", index, + "].indices and SparseTensor[", index, + "].values to match but they do not: ", num_entries, " vs. ", + (*output_values)->dim_size(0)); + } + + return Status::OK(); + } + + DataType dtype_; +}; + +REGISTER_KERNEL_BUILDER(Name("DeserializeSparse") + .Device(DEVICE_CPU) + .TypeConstraint<Variant>("Tserialized"), + DeserializeSparseOp) + +} // namespace + +} // namespace tensorflow |