aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/core/api_def/base_api/api_def_ParseSequenceExample.pbtxt112
-rw-r--r--tensorflow/core/api_def/python_api/api_def_ParseSequenceExample.pbtxt4
-rw-r--r--tensorflow/core/kernels/example_parsing_ops.cc165
-rw-r--r--tensorflow/core/ops/parsing_ops.cc93
-rw-r--r--tensorflow/core/ops/parsing_ops_test.cc82
-rw-r--r--tensorflow/core/util/example_proto_fast_parsing.cc228
-rw-r--r--tensorflow/core/util/example_proto_fast_parsing.h3
-rw-r--r--tensorflow/core/util/example_proto_helper.cc53
-rw-r--r--tensorflow/core/util/example_proto_helper.h61
-rw-r--r--tensorflow/python/kernel_tests/parsing_ops_test.py1158
-rw-r--r--tensorflow/python/ops/parsing_ops.py346
-rw-r--r--tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt4
-rw-r--r--tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt4
13 files changed, 1726 insertions, 587 deletions
diff --git a/tensorflow/core/api_def/base_api/api_def_ParseSequenceExample.pbtxt b/tensorflow/core/api_def/base_api/api_def_ParseSequenceExample.pbtxt
new file mode 100644
index 0000000000..b1cb9a696d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ParseSequenceExample.pbtxt
@@ -0,0 +1,112 @@
+op {
+ graph_op_name: "ParseSequenceExample"
+ in_arg {
+ name: "serialized"
+ description: <<END
+A vector containing binary serialized SequenceExample protos.
+END
+ }
+ in_arg {
+ name: "debug_name"
+ description: <<END
+A vector containing the names of the serialized protos.
+May contain, for example, table key (descriptive) name for the
+corresponding serialized proto. This is purely useful for debugging
+purposes, and the presence of values here has no effect on the output.
+May also be an empty vector if no name is available.
+END
+ }
+ in_arg {
+ name: "context_dense_defaults"
+ description: <<END
+A list of Ncontext_dense Tensors (some may be empty).
+context_dense_defaults[j] provides default values
+when the SequenceExample's context map lacks context_dense_key[j].
+If an empty Tensor is provided for context_dense_defaults[j],
+then the Feature context_dense_keys[j] is required.
+The input type is inferred from context_dense_defaults[j], even when it's
+empty. If context_dense_defaults[j] is not empty, its shape must match
+context_dense_shapes[j].
+END
+ }
+ attr {
+ name: "feature_list_dense_missing_assumed_empty"
+ description: <<END
+A vector listing the
+FeatureList keys which may be missing from the SequenceExamples. If the
+associated FeatureList is missing, it is treated as empty. By default,
+any FeatureList not listed in this vector must exist in the SequenceExamples.
+END
+ }
+ attr {
+ name: "context_sparse_keys"
+ description: <<END
+A list of Ncontext_sparse string Tensors (scalars).
+The keys expected in the Examples' features associated with context_sparse
+values.
+END
+ }
+ attr {
+ name: "context_dense_keys"
+ description: <<END
+A list of Ncontext_dense string Tensors (scalars).
+The keys expected in the SequenceExamples' context features associated with
+dense values.
+END
+ }
+ attr {
+ name: "feature_list_sparse_keys"
+ description: <<END
+A list of Nfeature_list_sparse string Tensors
+(scalars). The keys expected in the FeatureLists associated with sparse
+values.
+END
+ }
+ attr {
+ name: "feature_list_dense_keys"
+ description: <<END
+A list of Nfeature_list_dense string Tensors (scalars).
+The keys expected in the SequenceExamples' feature_lists associated
+with lists of dense values.
+END
+ }
+ attr {
+ name: "context_sparse_types"
+ description: <<END
+A list of Ncontext_sparse types; the data types of data in
+each context Feature given in context_sparse_keys.
+Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+DT_INT64 (Int64List), and DT_STRING (BytesList).
+END
+ }
+ attr {
+ name: "context_dense_shapes"
+ description: <<END
+A list of Ncontext_dense shapes; the shapes of data in
+each context Feature given in context_dense_keys.
+The number of elements in the Feature corresponding to context_dense_key[j]
+must always equal context_dense_shapes[j].NumEntries().
+The shape of context_dense_values[j] will match context_dense_shapes[j].
+END
+ }
+ attr {
+ name: "feature_list_sparse_types"
+ description: <<END
+A list of Nfeature_list_sparse types; the data types
+of data in each FeatureList given in feature_list_sparse_keys.
+Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+DT_INT64 (Int64List), and DT_STRING (BytesList).
+END
+ }
+ attr {
+ name: "feature_list_dense_shapes"
+ description: <<END
+A list of Nfeature_list_dense shapes; the shapes of
+data in each FeatureList given in feature_list_dense_keys.
+The shape of each Feature in the FeatureList corresponding to
+feature_list_dense_key[j] must always equal
+feature_list_dense_shapes[j].NumEntries().
+END
+ }
+ summary: "Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors."
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_ParseSequenceExample.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParseSequenceExample.pbtxt
new file mode 100644
index 0000000000..4a7e75ba0e
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_ParseSequenceExample.pbtxt
@@ -0,0 +1,4 @@
+op {
+ graph_op_name: "ParseSequenceExample"
+ visibility: HIDDEN
+}
diff --git a/tensorflow/core/kernels/example_parsing_ops.cc b/tensorflow/core/kernels/example_parsing_ops.cc
index 83cd0e9b47..528b3c6bf0 100644
--- a/tensorflow/core/kernels/example_parsing_ops.cc
+++ b/tensorflow/core/kernels/example_parsing_ops.cc
@@ -264,9 +264,168 @@ class ParseSingleExampleOp : public OpKernel {
REGISTER_KERNEL_BUILDER(Name("ParseSingleExample").Device(DEVICE_CPU),
ParseSingleExampleOp);
-class SingleSequenceExampleParserOp : public OpKernel {
+class ParseSequenceExampleOp : public OpKernel {
public:
- explicit SingleSequenceExampleParserOp(OpKernelConstruction* ctx)
+ explicit ParseSequenceExampleOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
+ OP_REQUIRES_OK(ctx, attrs_.Init(ctx));
+ }
+
+ void Compute(OpKernelContext* ctx) override {
+ const Tensor* debug_name;
+ const Tensor* serialized;
+ OpInputList context_dense_defaults;
+
+ OP_REQUIRES_OK(ctx, ctx->input("debug_name", &debug_name));
+ OP_REQUIRES_OK(ctx, ctx->input("serialized", &serialized));
+ OP_REQUIRES_OK(ctx, ctx->input_list("context_dense_defaults",
+ &context_dense_defaults));
+
+ bool has_debug_name = (debug_name->NumElements() > 0);
+ if (has_debug_name) {
+ OP_REQUIRES(ctx, TensorShapeUtils::IsVector(debug_name->shape()),
+ errors::InvalidArgument(
+ "Expected debug_name to be a vector, got shape: ",
+ debug_name->shape().DebugString()));
+ }
+
+ OP_REQUIRES(ctx, TensorShapeUtils::IsVector(serialized->shape()),
+ errors::InvalidArgument(
+ "Expected serialized to be a vector, got shape: ",
+ serialized->shape().DebugString()));
+
+ OP_REQUIRES(ctx, context_dense_defaults.size() == attrs_.num_context_dense,
+ errors::InvalidArgument("Expected len(context_dense_defaults) "
+ "== len(context_dense_keys) but got: ",
+ context_dense_defaults.size(), " vs. ",
+ attrs_.num_context_dense));
+
+ std::vector<bool> required(attrs_.num_context_dense);
+ for (int d = 0; d < attrs_.num_context_dense; ++d) {
+ const Tensor& def_value = context_dense_defaults[d];
+ required[d] = (def_value.NumElements() == 0); // No default provided.
+
+ if (def_value.NumElements() > 0) {
+ OP_REQUIRES(ctx, def_value.shape() == attrs_.context_dense_shapes[d],
+ errors::InvalidArgument(
+ "default_value[", d,
+ "].shape() == ", def_value.shape().DebugString(),
+ " != context_dense_shapes[", d,
+ "] == ", attrs_.context_dense_shapes[d].DebugString()));
+ OP_REQUIRES(
+ ctx, def_value.dtype() == attrs_.context_dense_types[d],
+ errors::InvalidArgument(
+ "context_dense_defaults[", d, "].dtype() == ",
+ DataTypeString(def_value.dtype()), " != context_dense_types[",
+ d, "] == ", DataTypeString(attrs_.context_dense_types[d])));
+ }
+ }
+
+ example::Result context_result, feature_list_result;
+ std::vector<Tensor> dense_feature_lengths;
+
+ example::FastParseExampleConfig context_config;
+ for (int d = 0; d < attrs_.num_context_dense; ++d) {
+ context_config.dense.push_back(
+ {attrs_.context_dense_keys[d], attrs_.context_dense_types[d],
+ attrs_.context_dense_shapes[d], context_dense_defaults[d],
+ false /* attrs_.context_variable_length[d] */,
+ 0 /*attrs_.context_elements_per_stride[d] */});
+ }
+ for (int d = 0; d < attrs_.num_context_sparse; ++d) {
+ context_config.sparse.push_back(
+ {attrs_.context_sparse_keys[d], attrs_.context_sparse_types[d]});
+ }
+ example::FastParseExampleConfig feature_list_config;
+ for (int d = 0; d < attrs_.num_feature_list_dense; ++d) {
+ DataType dtype = attrs_.feature_list_dense_types[d];
+ Tensor default_value = Tensor(dtype, TensorShape({}));
+ feature_list_config.dense.push_back(
+ {attrs_.feature_list_dense_keys[d], dtype,
+ attrs_.feature_list_dense_shapes[d], default_value,
+ (attrs_.feature_list_dense_missing_assumed_empty.count(
+ attrs_.feature_list_dense_keys[d]) > 0),
+ 0 /*attrs_.context_elements_per_stride[d] */});
+ }
+ for (int d = 0; d < attrs_.num_feature_list_sparse; ++d) {
+ feature_list_config.sparse.push_back(
+ {attrs_.feature_list_sparse_keys[d],
+ attrs_.feature_list_sparse_types[d]});
+ }
+
+ auto serialized_t = serialized->flat<string>();
+ auto debug_name_t = debug_name->flat<string>();
+ gtl::ArraySlice<string> slice(serialized_t.data(), serialized_t.size());
+ gtl::ArraySlice<string> names_slice(debug_name_t.data(),
+ debug_name_t.size());
+
+ OP_REQUIRES_OK(
+ ctx,
+ FastParseSequenceExample(
+ context_config, feature_list_config, slice, names_slice,
+ ctx->device()->tensorflow_cpu_worker_threads()->workers,
+ &context_result, &feature_list_result, &dense_feature_lengths));
+
+ OpOutputList context_sparse_indices;
+ OpOutputList context_sparse_values;
+ OpOutputList context_sparse_shapes;
+ OpOutputList context_dense_values;
+ OpOutputList feature_list_sparse_indices;
+ OpOutputList feature_list_sparse_values;
+ OpOutputList feature_list_sparse_shapes;
+ OpOutputList feature_list_dense_values;
+ OpOutputList feature_list_dense_lengths;
+
+ OP_REQUIRES_OK(ctx, ctx->output_list("context_sparse_indices",
+ &context_sparse_indices));
+ OP_REQUIRES_OK(
+ ctx, ctx->output_list("context_sparse_values", &context_sparse_values));
+ OP_REQUIRES_OK(
+ ctx, ctx->output_list("context_sparse_shapes", &context_sparse_shapes));
+ OP_REQUIRES_OK(
+ ctx, ctx->output_list("context_dense_values", &context_dense_values));
+ OP_REQUIRES_OK(ctx, ctx->output_list("context_sparse_indices",
+ &context_sparse_indices));
+ OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_sparse_indices",
+ &feature_list_sparse_indices));
+ OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_sparse_values",
+ &feature_list_sparse_values));
+ OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_sparse_shapes",
+ &feature_list_sparse_shapes));
+ OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_dense_values",
+ &feature_list_dense_values));
+ OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_dense_lengths",
+ &feature_list_dense_lengths));
+ for (int d = 0; d < attrs_.num_context_dense; ++d) {
+ context_dense_values.set(d, context_result.dense_values[d]);
+ }
+ TensorShape lengths_shape;
+ lengths_shape.AddDim(serialized_t.size());
+ for (int d = 0; d < attrs_.num_feature_list_dense; ++d) {
+ feature_list_dense_values.set(d, feature_list_result.dense_values[d]);
+ feature_list_dense_lengths.set(d, dense_feature_lengths[d]);
+ }
+ for (int d = 0; d < attrs_.num_context_sparse; ++d) {
+ context_sparse_indices.set(d, context_result.sparse_indices[d]);
+ context_sparse_values.set(d, context_result.sparse_values[d]);
+ context_sparse_shapes.set(d, context_result.sparse_shapes[d]);
+ }
+ for (int d = 0; d < attrs_.num_feature_list_sparse; ++d) {
+ feature_list_sparse_indices.set(d, feature_list_result.sparse_indices[d]);
+ feature_list_sparse_values.set(d, feature_list_result.sparse_values[d]);
+ feature_list_sparse_shapes.set(d, feature_list_result.sparse_shapes[d]);
+ }
+ }
+
+ protected:
+ ParseSequenceExampleAttrs attrs_;
+};
+
+REGISTER_KERNEL_BUILDER(Name("ParseSequenceExample").Device(DEVICE_CPU),
+ ParseSequenceExampleOp);
+
+class ParseSingleSequenceExampleOp : public OpKernel {
+ public:
+ explicit ParseSingleSequenceExampleOp(OpKernelConstruction* ctx)
: OpKernel(ctx) {
OP_REQUIRES_OK(ctx, attrs_.Init(ctx));
}
@@ -658,7 +817,7 @@ class SingleSequenceExampleParserOp : public OpKernel {
};
REGISTER_KERNEL_BUILDER(Name("ParseSingleSequenceExample").Device(DEVICE_CPU),
- SingleSequenceExampleParserOp);
+ ParseSingleSequenceExampleOp);
#ifndef IS_MOBILE_PLATFORM
// when using lite protos on mobile, decoding JSON is not available.
diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc
index ddb714b4e9..79ca96d249 100644
--- a/tensorflow/core/ops/parsing_ops.cc
+++ b/tensorflow/core/ops/parsing_ops.cc
@@ -132,6 +132,99 @@ REGISTER_OP("ParseSingleExample")
return Status::OK();
});
+REGISTER_OP("ParseSequenceExample")
+ .Input("serialized: string")
+ .Input("debug_name: string")
+ .Input("context_dense_defaults: Tcontext_dense")
+ .Output("context_sparse_indices: Ncontext_sparse * int64")
+ .Output("context_sparse_values: context_sparse_types")
+ .Output("context_sparse_shapes: Ncontext_sparse * int64")
+ .Output("context_dense_values: Tcontext_dense")
+ .Output("feature_list_sparse_indices: Nfeature_list_sparse * int64")
+ .Output("feature_list_sparse_values: feature_list_sparse_types")
+ .Output("feature_list_sparse_shapes: Nfeature_list_sparse * int64")
+ .Output("feature_list_dense_values: feature_list_dense_types")
+ .Output("feature_list_dense_lengths: Nfeature_list_dense * int64")
+ .Attr("feature_list_dense_missing_assumed_empty: list(string) >= 0")
+ .Attr("context_sparse_keys: list(string) >= 0")
+ .Attr("context_dense_keys: list(string) >= 0")
+ .Attr("feature_list_sparse_keys: list(string) >= 0")
+ .Attr("feature_list_dense_keys: list(string) >= 0")
+ .Attr("Ncontext_sparse: int >= 0 = 0")
+ .Attr("Ncontext_dense: int >= 0 = 0")
+ .Attr("Nfeature_list_sparse: int >= 0 = 0")
+ .Attr("Nfeature_list_dense: int >= 0 = 0")
+ .Attr("context_sparse_types: list({float,int64,string}) >= 0 = []")
+ .Attr("Tcontext_dense: list({float,int64,string}) >= 0 = []")
+ .Attr("feature_list_dense_types: list({float,int64,string}) >= 0 = []")
+ .Attr("context_dense_shapes: list(shape) >= 0 = []")
+ .Attr("feature_list_sparse_types: list({float,int64,string}) >= 0 = []")
+ .Attr("feature_list_dense_shapes: list(shape) >= 0 = []")
+ .SetShapeFn([](InferenceContext* c) {
+ ParseSequenceExampleAttrs attrs;
+ TF_RETURN_IF_ERROR(attrs.Init(c));
+
+ // Verify that the input is a vector, and carry the shape if known.
+ ShapeHandle input;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &input));
+ shape_inference::DimensionHandle num_examples = c->Dim(input, 0);
+
+ ShapeHandle unused;
+ TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); // debug_name
+
+ int output_idx = 0;
+
+ // Output context_sparse_indices, context_sparse_values, and
+ // context_sparse_shapes.
+ for (int i = 0; i < attrs.num_context_sparse; ++i) {
+ c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 2));
+ }
+ for (int i = 0; i < attrs.num_context_sparse; ++i) {
+ c->set_output(output_idx++, c->Vector(c->UnknownDim()));
+ }
+ for (int i = 0; i < attrs.num_context_sparse; ++i) {
+ c->set_output(output_idx++, c->Vector(2));
+ }
+
+ // Output context_dense_values.
+ for (int i = 0; i < attrs.num_context_dense; ++i) {
+ ShapeHandle s;
+ TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+ attrs.context_dense_shapes[i], &s));
+ TF_RETURN_IF_ERROR(c->Concatenate(c->Vector(num_examples), s, &s));
+ c->set_output(output_idx++, s);
+ }
+
+ // Output feature_list_sparse_indices, feature_list_sparse_values,
+ // feature_list_sparse_shapes.
+ for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
+ c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 3));
+ }
+ for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
+ c->set_output(output_idx++, c->Vector(c->UnknownDim()));
+ }
+ for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
+ c->set_output(output_idx++, c->Vector(3));
+ }
+
+ // Output feature_list_dense_shapes.
+ for (int i = 0; i < attrs.num_feature_list_dense; ++i) {
+ ShapeHandle s;
+ TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
+ attrs.feature_list_dense_shapes[i], &s));
+ TF_RETURN_IF_ERROR(
+ c->Concatenate(c->Matrix(num_examples, c->UnknownDim()), s, &s));
+ c->set_output(output_idx++, s);
+ }
+
+ // Output feature_list_dense_lengths.
+ for (int i = 0; i < attrs.num_feature_list_dense; ++i) {
+ c->set_output(output_idx++, c->Vector(num_examples));
+ }
+
+ return Status::OK();
+ });
+
REGISTER_OP("ParseSingleSequenceExample")
.Input("serialized: string")
.Input("feature_list_dense_missing_assumed_empty: string")
diff --git a/tensorflow/core/ops/parsing_ops_test.cc b/tensorflow/core/ops/parsing_ops_test.cc
index 9121d7ae92..c65e66d1a8 100644
--- a/tensorflow/core/ops/parsing_ops_test.cc
+++ b/tensorflow/core/ops/parsing_ops_test.cc
@@ -143,6 +143,88 @@ TEST(ParsingOpsTest, ParseExample_ShapeFn) {
"?;?;?;?;?;?;?;?;?;?");
}
+TEST(ParsingOpsTest, ParseSequenceExample_ShapeFn) {
+ ShapeInferenceTestOp op("ParseSequenceExample");
+ auto set_outputs = [&op](int num_context_sparse, int num_context_dense,
+ int num_feature_list_sparse,
+ int num_feature_list_dense,
+ bool add_extra_shape = false) {
+ using NodeOutList = std::vector<NodeDefBuilder::NodeOut>;
+ using DataTypeList = std::vector<DataType>;
+ string string_in("test");
+ NodeDefBuilder::NodeOut node_in{"a", 0, DT_STRING};
+ TF_ASSERT_OK(
+ NodeDefBuilder("test", "ParseSequenceExample")
+ .Input("serialized", 0, DT_STRING)
+ .Input("debug_name", 0, DT_STRING)
+ .Input(NodeOutList(num_context_dense, node_in))
+ .Attr("Ncontext_sparse", num_context_sparse)
+ .Attr("Ncontext_dense", num_context_dense)
+ .Attr("Nfeature_list_sparse", num_feature_list_sparse)
+ .Attr("Nfeature_list_dense", num_feature_list_dense)
+ .Attr("feature_list_dense_missing_assumed_empty",
+ std::vector<string>(num_feature_list_dense, string_in))
+ .Attr("context_sparse_keys",
+ std::vector<string>(num_context_sparse, string_in))
+ .Attr("context_dense_keys",
+ std::vector<string>(num_context_dense, string_in))
+ .Attr("feature_list_sparse_keys",
+ std::vector<string>(num_feature_list_sparse, string_in))
+ .Attr("feature_list_dense_keys",
+ std::vector<string>(num_feature_list_dense, string_in))
+ .Attr("context_sparse_types",
+ DataTypeList(num_context_sparse, DT_FLOAT))
+ .Attr("context_dense_types",
+ DataTypeList(num_context_dense, DT_FLOAT))
+ .Attr("context_dense_shapes",
+ MakeDenseShapes(num_context_dense, add_extra_shape, 0))
+ .Attr("feature_list_sparse_types",
+ DataTypeList(num_feature_list_sparse, DT_FLOAT))
+ .Attr("feature_list_dense_types",
+ DataTypeList(num_feature_list_dense, DT_FLOAT))
+ .Attr("feature_list_dense_shapes",
+ MakeDenseShapes(num_feature_list_dense, add_extra_shape, 0))
+ .Finalize(&op.node_def));
+ };
+
+ // Verify inputs 'serialized' and 'debug_name'.
+ set_outputs(0, 0, 0, 0);
+ INFER_OK(op, "[?];[?]", "");
+ INFER_OK(op, "[8];[8]", "");
+ INFER_ERROR("must be rank 1", op, "[];[?]");
+ INFER_ERROR("must be rank 1", op, "[?];[]");
+
+ // context inputs with no feature_list inputs.
+ set_outputs(2 /* num_context_sparse */, 3 /* num_context_dense */, 0, 0);
+ INFER_OK(op, "[?];[?];?;?;?",
+ ("[?,2];[?,2];[?];[?];[2];[2];" // context sparse
+ "[d0_0,1];[d0_0,1,2];[d0_0,1,2,3]")); // context dense
+
+ // feature_list inputs with no context inputs.
+ set_outputs(0, 0, 2 /* num_feature_list_sparse */,
+ 3 /* num_feature_list_dense */);
+ INFER_OK(op, "[?];[?]",
+ ("[?,3];[?,3];[?];[?];[3];[3];" // feature_list sparse
+ "[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3];" // feature_list dense
+ "[d0_0];[d0_0];[d0_0]")); // feature_list length
+
+ // Combine previous two test cases.
+ set_outputs(2, 3, 2, 3);
+ INFER_OK(op, "[7];[7];?;?;?",
+ ("[?,2];[?,2];[?];[?];[2];[2];" // context sparse
+ "[d0_0,1];[d0_0,1,2];[d0_0,1,2,3];" // context dense
+ "[?,3];[?,3];[?];[?];[3];[3];" // feature_list sparse
+ "[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3];" // feature_list dense
+ "[d0_0];[d0_0];[d0_0]")); // feature_list length
+
+ // Confirm an error from ParseSequenceExampleAttrs.Init().
+ set_outputs(1, 1, 1, 1, true /* add_extra_shape */);
+ INFER_ERROR(
+ "num_context_dense (1) must match the size of context_dense_keys (1), "
+ "context_dense_types (1) and context_dense_shapes (2)",
+ op, "[?];[?];?");
+}
+
TEST(ParsingOpsTest, ParseSingleSequenceExample_ShapeFn) {
ShapeInferenceTestOp op("ParseSingleSequenceExample");
auto set_outputs = [&op](int num_context_sparse, int num_context_dense,
diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc
index a38cd1d09f..e52d55e2ff 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.cc
+++ b/tensorflow/core/util/example_proto_fast_parsing.cc
@@ -1722,10 +1722,11 @@ Status FastParseSequenceExample(
const FastParseExampleConfig& feature_list_config,
gtl::ArraySlice<string> serialized, gtl::ArraySlice<string> example_names,
thread::ThreadPool* thread_pool, Result* context_result,
- Result* feature_list_result) {
+ Result* feature_list_result, std::vector<Tensor>* dense_feature_lengths) {
int num_examples = serialized.size();
DCHECK(context_result != nullptr);
DCHECK(feature_list_result != nullptr);
+ DCHECK(dense_feature_lengths != nullptr);
std::map<StringPiece, bool> context_is_sparse;
std::map<StringPiece, std::pair<DataType, size_t>>
context_feature_type_and_lengths;
@@ -1740,9 +1741,22 @@ Status FastParseSequenceExample(
context_is_sparse[c.feature_name] = true;
}
for (auto& c : context_config.dense) {
+ if (context_is_sparse[c.feature_name]) {
+ return errors::InvalidArgument("Context feature " + c.feature_name +
+ " cannot be both dense and sparse");
+ }
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
context_feature_type_and_lengths[c.feature_name] =
- std::make_pair(c.dtype, 0);
+ std::make_pair(c.dtype, c.default_value.NumElements());
+ if (c.default_value.NumElements() > 0) {
+ if (!c.shape.IsCompatibleWith(c.default_value.shape())) {
+ return errors::InvalidArgument("Default value for context feature ",
+ c.feature_name,
+ " has an incorrect shape: saw ",
+ c.default_value.shape().DebugString(),
+ " but expected ", c.shape.DebugString());
+ }
+ }
context_is_sparse[c.feature_name] = false;
}
std::map<StringPiece, bool> sequence_is_sparse;
@@ -1755,6 +1769,10 @@ Status FastParseSequenceExample(
sequence_is_sparse[c.feature_name] = true;
}
for (auto& c : feature_list_config.dense) {
+ if (sequence_is_sparse[c.feature_name]) {
+ return errors::InvalidArgument("Sequence feature " + c.feature_name +
+ " cannot be both dense and sparse");
+ }
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
sequence_feature_type_and_lengths[c.feature_name] =
std::make_pair(c.dtype, 0);
@@ -1792,14 +1810,14 @@ Status FastParseSequenceExample(
features = sequence_features;
config = &sequence_feature_type_and_lengths;
} else if (!SkipExtraneousTag(&stream)) {
- return errors::InvalidArgument(strings::StrCat(
- "Invalid protocol message input, example id: ", example_name));
+ return errors::InvalidArgument(
+ "Invalid protocol message input, example id: ", example_name);
}
if (features != nullptr) {
uint32 length;
if (!stream.ReadVarint32(&length)) {
- return errors::InvalidArgument(strings::StrCat(
- "Invalid protocol message input, example id: ", example_name));
+ return errors::InvalidArgument(
+ "Invalid protocol message input, example id: ", example_name);
}
auto limit = stream.PushLimit(length);
while (!stream.ExpectAtEnd()) {
@@ -1807,16 +1825,16 @@ Status FastParseSequenceExample(
uint32 length;
if (!stream.ExpectTag(kDelimitedTag(1)) ||
!stream.ReadVarint32(&length)) {
- return errors::InvalidArgument(strings::StrCat(
- "Invalid protocol message input, example id: ", example_name));
+ return errors::InvalidArgument(
+ "Invalid protocol message input, example id: ", example_name);
}
auto limit = stream.PushLimit(length);
if (!stream.ExpectTag(kDelimitedTag(1)) ||
!ParseString(&stream, &key) ||
!stream.ExpectTag(kDelimitedTag(2)) ||
!ParseString(&stream, &value) || !stream.ExpectAtEnd()) {
- return errors::InvalidArgument(strings::StrCat(
- "Invalid protocol message input, example id: ", example_name));
+ return errors::InvalidArgument(
+ "Invalid protocol message input, example id: ", example_name);
}
stream.PopLimit(limit);
// Only save if this feature was requested.
@@ -1851,9 +1869,8 @@ Status FastParseSequenceExample(
break;
}
if (num == -1) {
- return errors::InvalidArgument(
- strings::StrCat("Error in context feature ", c.first,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in context feature ", c.first,
+ " in example ", example_name);
}
num_elements += num;
}
@@ -1876,9 +1893,9 @@ Status FastParseSequenceExample(
uint32 feature_length;
if (!stream.ExpectTag(kDelimitedTag(1)) ||
!stream.ReadVarint32(&feature_length)) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.first,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.first, " in example ",
+ example_name);
}
if (feature_length > 2) {
auto limit = stream.PushLimit(feature_length);
@@ -1898,22 +1915,22 @@ Status FastParseSequenceExample(
break;
}
if (num == -1) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.first,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.first, " in example ",
+ example_name);
}
num_elements += num;
stream.PopLimit(limit);
} else if (feature_length == 2) {
if (!SkipEmptyFeature(&stream, dtype)) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.first,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.first, " in example ",
+ example_name);
}
} else if (feature_length != 0) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.first,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.first, " in example ",
+ example_name);
}
}
}
@@ -1936,15 +1953,19 @@ Status FastParseSequenceExample(
feature_list_result->sparse_indices.resize(feature_list_config.sparse.size());
feature_list_result->sparse_shapes.resize(feature_list_config.sparse.size());
feature_list_result->dense_values.resize(feature_list_config.dense.size());
+ dense_feature_lengths->resize(feature_list_config.dense.size());
+
int t = 0;
for (const auto& c : context_config.dense) {
- TensorShape dense_shape;
+ TensorShape dense_shape, example_shape;
DataType dtype = c.dtype;
- size_t expected_max_elements =
+ const size_t expected_max_elements =
context_feature_type_and_lengths[c.feature_name].second;
- if (expected_max_elements != dense_shape.num_elements()) {
- return errors::InvalidArgument(strings::StrCat(
- "Inconsistent number of elements for feature ", c.feature_name));
+ if (!c.shape.AsTensorShape(&example_shape) ||
+ expected_max_elements != example_shape.num_elements()) {
+ return errors::InvalidArgument(
+ "Inconsistent number of elements for feature ", c.feature_name, ": ",
+ expected_max_elements, " vs ", dense_shape.num_elements());
}
dense_shape.AddDim(num_examples);
for (const int dim : c.shape.dim_sizes()) {
@@ -1968,18 +1989,58 @@ Status FastParseSequenceExample(
out_int64 = context_result->dense_values[t].flat<int64>().data();
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in feature ", c.feature_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in feature ", c.feature_name);
}
t++;
// Fill in the values.
for (int e = 0; e < num_examples; e++) {
size_t num_elements = 0;
- const auto& feature = all_context_features[e][c.feature_name];
+ const auto feature_iter = all_context_features[e].find(c.feature_name);
const string& example_name =
example_names.empty() ? kUnknown : example_names[e];
- if (!feature.empty()) {
+ if (feature_iter == all_context_features[e].end()) {
+ // Copy the default value, if present. If not, return an error.
+ if (c.default_value.NumElements() == 0) {
+ return errors::InvalidArgument(
+ "Feature: ", c.feature_name,
+ " (data type: ", DataTypeString(c.dtype), ")",
+ " is required but could not be found.");
+ }
+ const string* in_bytes = nullptr;
+ const float* in_float = nullptr;
+ const int64* in_int64 = nullptr;
+ size_t num = 0;
+ switch (dtype) {
+ case DT_STRING:
+ in_bytes = c.default_value.flat<string>().data();
+ num = c.default_value.NumElements();
+ for (int p = 0; p < num; p++) {
+ *out_bytes++ = *in_bytes++;
+ }
+ break;
+ case DT_FLOAT:
+ in_float = c.default_value.flat<float>().data();
+ num = c.default_value.NumElements();
+ for (int p = 0; p < num; p++) {
+ *out_float++ = *in_float++;
+ }
+ break;
+ case DT_INT64:
+ in_int64 = c.default_value.flat<int64>().data();
+ num = c.default_value.NumElements();
+ for (int p = 0; p < num; p++) {
+ *out_int64++ = *in_int64++;
+ }
+ break;
+ default:
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in example ", example_name);
+ }
+ num_elements += num;
+ } else if (!feature_iter->second.empty()) {
+ const auto& feature = feature_iter->second;
protobuf::io::CodedInputStream stream(
reinterpret_cast<const uint8*>(feature.data()), feature.size());
EnableAliasing(&stream);
@@ -1998,14 +2059,14 @@ Status FastParseSequenceExample(
out_int64 += num_added;
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in example ", example_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in example ", example_name);
}
num_elements += num_added;
}
if (num_elements != expected_max_elements) {
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected number of elements in example ", example_name));
+ return errors::InvalidArgument(
+ "Unexpected number of elements in example ", example_name);
}
}
}
@@ -2037,8 +2098,8 @@ Status FastParseSequenceExample(
out_int64 = context_result->sparse_values[t].flat<int64>().data();
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in feature ", c.feature_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in feature ", c.feature_name);
}
int64* out_indices = context_result->sparse_indices[t].flat<int64>().data();
auto out_shape = context_result->sparse_shapes[t].vec<int64>();
@@ -2070,8 +2131,8 @@ Status FastParseSequenceExample(
out_int64 += num_added;
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in example ", example_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in example ", example_name);
}
num_elements += num_added;
max_num_cols = std::max(max_num_cols, num_added);
@@ -2082,30 +2143,35 @@ Status FastParseSequenceExample(
}
}
if (num_elements != expected_num_elements) {
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected total number of elements in feature ", c.feature_name));
+ return errors::InvalidArgument(
+ "Unexpected total number of elements in feature ", c.feature_name);
}
out_shape(0) = num_examples;
out_shape(1) = max_num_cols;
}
t = 0;
+ TensorShape dense_length_shape({num_examples});
for (const auto& c : feature_list_config.dense) {
TensorShape dense_shape, row_shape;
DataType dtype = c.dtype;
- size_t expected_max_elements =
+ const size_t expected_max_elements =
sequence_feature_type_and_lengths[c.feature_name].second;
- int64 expected_max_rows = expected_max_elements / row_shape.num_elements();
if (!c.shape.AsTensorShape(&row_shape) ||
- expected_max_elements != expected_max_rows * row_shape.num_elements()) {
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected shape error in feature ", c.feature_name));
+ expected_max_elements !=
+ (expected_max_elements / row_shape.num_elements()) *
+ row_shape.num_elements()) {
+ return errors::InvalidArgument("Unexpected shape error in feature ",
+ c.feature_name);
}
+ int64 expected_max_rows = expected_max_elements / row_shape.num_elements();
dense_shape.AddDim(num_examples);
dense_shape.AddDim(expected_max_rows);
for (const int dim : feature_list_config.dense[t].shape.dim_sizes()) {
dense_shape.AddDim(dim);
}
feature_list_result->dense_values[t] = Tensor(dtype, dense_shape);
+ (*dense_feature_lengths)[t] = Tensor(DT_INT64, dense_length_shape);
+ int64* out_lengths = (*dense_feature_lengths)[t].flat<int64>().data();
string* out_bytes = nullptr;
float* out_float = nullptr;
@@ -2121,18 +2187,26 @@ Status FastParseSequenceExample(
out_int64 = feature_list_result->dense_values[t].flat<int64>().data();
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in feature ", c.feature_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in feature ", c.feature_name);
}
t++;
// Fill in the values.
for (int e = 0; e < num_examples; e++) {
- size_t num_elements = 0;
- const auto& feature = all_sequence_features[e][c.feature_name];
+ size_t num_elements = 0, num_rows = 0;
+ const auto feature_iter = all_sequence_features[e].find(c.feature_name);
const string& example_name =
example_names.empty() ? kUnknown : example_names[e];
- if (!feature.empty()) {
+ if (feature_iter == all_sequence_features[e].end()) {
+ // Return an error if this feature was not allowed to be missing.
+ // Otherwise, we'll pad as needed below.
+ if (!c.variable_length) {
+ return errors::InvalidArgument("Missing feature ", c.feature_name,
+ " in example ", example_name);
+ }
+ } else if (!feature_iter->second.empty()) {
+ const auto& feature = feature_iter->second;
protobuf::io::CodedInputStream stream(
reinterpret_cast<const uint8*>(feature.data()), feature.size());
EnableAliasing(&stream);
@@ -2140,9 +2214,9 @@ Status FastParseSequenceExample(
uint32 feature_length;
if (!stream.ExpectTag(kDelimitedTag(1)) ||
!stream.ReadVarint32(&feature_length)) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.feature_name,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.feature_name, " in example ",
+ example_name);
}
auto limit = stream.PushLimit(feature_length);
size_t num_added;
@@ -2160,10 +2234,11 @@ Status FastParseSequenceExample(
out_int64 += num_added;
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in example ", example_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in example ", example_name);
}
num_elements += num_added;
+ num_rows++;
if (num_added != row_shape.num_elements()) {
return errors::InvalidArgument(
"Unexpected number of elements in feature ", c.feature_name,
@@ -2172,6 +2247,7 @@ Status FastParseSequenceExample(
stream.PopLimit(limit);
}
}
+ *out_lengths++ = num_rows;
// Pad as necessary.
int num_to_pad = expected_max_elements - num_elements;
switch (dtype) {
@@ -2187,8 +2263,8 @@ Status FastParseSequenceExample(
out_int64 += num_to_pad;
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in example ", example_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in example ", example_name);
}
}
}
@@ -2219,8 +2295,8 @@ Status FastParseSequenceExample(
out_int64 = feature_list_result->sparse_values[t].flat<int64>().data();
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in feature ", c.feature_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in feature ", c.feature_name);
}
int64* out_indices =
feature_list_result->sparse_indices[t].flat<int64>().data();
@@ -2244,9 +2320,9 @@ Status FastParseSequenceExample(
uint32 feature_length;
if (!stream.ExpectTag(kDelimitedTag(1)) ||
!stream.ReadVarint32(&feature_length)) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.feature_name,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.feature_name, " in example ",
+ example_name);
}
if (feature_length > 2) {
auto limit = stream.PushLimit(feature_length);
@@ -2265,8 +2341,8 @@ Status FastParseSequenceExample(
out_int64 += num_added;
break;
default:
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected dtype ", dtype, " in example ", example_name));
+ return errors::InvalidArgument("Unexpected dtype ", dtype,
+ " in example ", example_name);
}
num_elements += num_added;
max_num_cols = std::max(max_num_cols, num_added);
@@ -2278,14 +2354,14 @@ Status FastParseSequenceExample(
stream.PopLimit(limit);
} else if (feature_length == 2) {
if (!SkipEmptyFeature(&stream, dtype)) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.feature_name,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.feature_name, " in example ",
+ example_name);
}
} else if (feature_length != 0) {
- return errors::InvalidArgument(
- strings::StrCat("Error in sequence feature ", c.feature_name,
- " in example ", example_name));
+ return errors::InvalidArgument("Error in sequence feature ",
+ c.feature_name, " in example ",
+ example_name);
}
num_rows++;
}
@@ -2293,8 +2369,8 @@ Status FastParseSequenceExample(
}
}
if (num_elements != expected_num_elements) {
- return errors::InvalidArgument(strings::StrCat(
- "Unexpected number of elements in feature ", c.feature_name));
+ return errors::InvalidArgument(
+ "Unexpected number of elements in feature ", c.feature_name);
}
out_shape(0) = num_examples;
out_shape(1) = max_num_rows;
diff --git a/tensorflow/core/util/example_proto_fast_parsing.h b/tensorflow/core/util/example_proto_fast_parsing.h
index db5b5ff929..055d9c2c30 100644
--- a/tensorflow/core/util/example_proto_fast_parsing.h
+++ b/tensorflow/core/util/example_proto_fast_parsing.h
@@ -118,7 +118,8 @@ Status FastParseSequenceExample(
const example::FastParseExampleConfig& feature_list_config,
gtl::ArraySlice<string> serialized, gtl::ArraySlice<string> example_names,
thread::ThreadPool* thread_pool, example::Result* context_result,
- example::Result* feature_list_result);
+ example::Result* feature_list_result,
+ std::vector<Tensor>* dense_feature_lengths);
// This function parses serialized Example and populates given example.
// It uses the same specialized parser as FastParseExample which is efficient.
diff --git a/tensorflow/core/util/example_proto_helper.cc b/tensorflow/core/util/example_proto_helper.cc
index e156a3bc8f..41fb20c00a 100644
--- a/tensorflow/core/util/example_proto_helper.cc
+++ b/tensorflow/core/util/example_proto_helper.cc
@@ -443,6 +443,59 @@ Status ParseSingleExampleAttrs::FinishInit() {
return Status::OK();
}
+Status ParseSequenceExampleAttrs::FinishInit() {
+ if (num_context_sparse != context_sparse_keys.size() ||
+ num_context_sparse != context_sparse_types.size()) {
+ return errors::InvalidArgument(
+ "num_context_sparse (", num_context_sparse,
+ ") must match the size of context_sparse_keys (",
+ context_sparse_keys.size(), ") and context_sparse_types (",
+ context_sparse_types.size(), ")");
+ }
+ if (num_context_dense != context_dense_keys.size() ||
+ num_context_dense != context_dense_types.size() ||
+ num_context_dense != context_dense_shapes.size()) {
+ return errors::InvalidArgument(
+ "num_context_dense (", num_context_dense,
+ ") must match the size of context_dense_keys (",
+ context_dense_keys.size(), "), context_dense_types (",
+ context_dense_types.size(), ") and context_dense_shapes (",
+ context_dense_shapes.size(), ")");
+ }
+ if (num_feature_list_sparse != feature_list_sparse_keys.size() ||
+ num_feature_list_sparse != feature_list_sparse_types.size()) {
+ return errors::InvalidArgument(
+ "num_feature_list_sparse (", num_feature_list_sparse,
+ ") must match the size of feature_list_sparse_keys (",
+ feature_list_sparse_keys.size(), ") and feature_list_sparse_types (",
+ feature_list_sparse_types.size(), ")");
+ }
+ if (num_feature_list_dense != feature_list_dense_keys.size() ||
+ num_feature_list_dense != feature_list_dense_types.size() ||
+ num_feature_list_dense != feature_list_dense_shapes.size()) {
+ return errors::InvalidArgument(
+ "num_feature_list_dense (", num_feature_list_dense,
+ ") must match the size of feature_list_dense_keys (",
+ feature_list_dense_keys.size(), "), feature_list_dense_types (",
+ feature_list_dense_types.size(), ") and feature_list_dense_shapes (",
+ feature_list_dense_shapes.size(), ")");
+ }
+ for (const DataType& type : context_dense_types) {
+ TF_RETURN_IF_ERROR(CheckValidType(type));
+ }
+ for (const DataType& type : context_sparse_types) {
+ TF_RETURN_IF_ERROR(CheckValidType(type));
+ }
+ for (const DataType& type : feature_list_dense_types) {
+ TF_RETURN_IF_ERROR(CheckValidType(type));
+ }
+ for (const DataType& type : feature_list_sparse_types) {
+ TF_RETURN_IF_ERROR(CheckValidType(type));
+ }
+
+ return Status::OK();
+}
+
Status ParseSingleSequenceExampleAttrs::FinishInit() {
if (static_cast<size_t>(num_context_sparse) != context_sparse_types.size()) {
return errors::InvalidArgument(
diff --git a/tensorflow/core/util/example_proto_helper.h b/tensorflow/core/util/example_proto_helper.h
index e511704962..c183ee4d96 100644
--- a/tensorflow/core/util/example_proto_helper.h
+++ b/tensorflow/core/util/example_proto_helper.h
@@ -26,6 +26,7 @@ limitations under the License.
#include "tensorflow/core/framework/partial_tensor_shape.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/util/sparse/sparse_tensor.h"
@@ -271,6 +272,66 @@ class ParseSingleExampleAttrs {
Status FinishInit(); // for context-independent parts of Init.
};
+// Parses the attributes passed to ParseSequenceExample.
+// REQUIRES: Init must be called after construction.
+class ParseSequenceExampleAttrs {
+ public:
+ template <typename ContextType>
+ Status Init(ContextType* ctx) {
+ std::vector<string> feature_list_dense_missing_assumed_empty_tmp;
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("feature_list_dense_missing_assumed_empty",
+ &feature_list_dense_missing_assumed_empty_tmp));
+ for (const string& feature : feature_list_dense_missing_assumed_empty_tmp) {
+ feature_list_dense_missing_assumed_empty.insert(feature);
+ }
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("context_sparse_keys", &context_sparse_keys));
+ TF_RETURN_IF_ERROR(ctx->GetAttr("context_dense_keys", &context_dense_keys));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("feature_list_sparse_keys", &feature_list_sparse_keys));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("feature_list_dense_keys", &feature_list_dense_keys));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("context_sparse_types", &context_sparse_types));
+ TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense));
+ TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse));
+ TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("context_dense_shapes", &context_dense_shapes));
+ TF_RETURN_IF_ERROR(
+ ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes));
+ return FinishInit();
+ }
+
+ std::unordered_set<string> feature_list_dense_missing_assumed_empty;
+ int64 num_context_sparse;
+ int64 num_context_dense;
+ int64 num_feature_list_sparse;
+ int64 num_feature_list_dense;
+ std::vector<string> context_sparse_keys;
+ std::vector<string> context_dense_keys;
+ std::vector<string> feature_list_sparse_keys;
+ std::vector<string> feature_list_dense_keys;
+ std::vector<DataType> context_sparse_types;
+ std::vector<DataType> context_dense_types;
+ std::vector<TensorShape> context_dense_shapes;
+ std::vector<DataType> feature_list_sparse_types;
+ std::vector<DataType> feature_list_dense_types;
+ std::vector<TensorShape> feature_list_dense_shapes;
+
+ private:
+ Status FinishInit(); // for context-independent parts of Init.
+};
+
// Parses the attributes passed to ParseSingleSequenceExample.
// REQUIRES: Init must be called after construction.
class ParseSingleSequenceExampleAttrs {
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index 59b3ee2013..7dff4501cc 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -60,8 +60,9 @@ def flatten(list_of_lists):
def flatten_values_tensors_or_sparse(tensors_list):
"""Flatten each SparseTensor object into 3 Tensors for session.run()."""
return list(
- flatten([[v.indices, v.values, v.dense_shape] if isinstance(
- v, sparse_tensor.SparseTensor) else [v] for v in tensors_list]))
+ flatten([[v.indices, v.values, v.dense_shape]
+ if isinstance(v, sparse_tensor.SparseTensor) else [v]
+ for v in tensors_list]))
def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
@@ -106,8 +107,9 @@ class ParseExampleTest(test.TestCase):
# Check shapes; if serialized is a Tensor we need its size to
# properly check.
serialized = kwargs["serialized"]
- batch_size = (serialized.eval().size if isinstance(serialized, ops.Tensor)
- else np.asarray(serialized).size)
+ batch_size = (
+ serialized.eval().size if isinstance(serialized, ops.Tensor) else
+ np.asarray(serialized).size)
for k, f in kwargs["features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
self.assertEqual(
@@ -129,12 +131,9 @@ class ParseExampleTest(test.TestCase):
c_default = np.random.rand(2).astype(np.float32)
expected_st_a = ( # indices, values, shape
- np.empty(
- (0, 2), dtype=np.int64), # indices
- np.empty(
- (0,), dtype=np.int64), # sp_a is DT_INT64
- np.array(
- [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
+ np.empty((0, 2), dtype=np.int64), # indices
+ np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
+ np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
expected_output = {
sparse_name: expected_st_a,
@@ -143,28 +142,23 @@ class ParseExampleTest(test.TestCase):
c_name: np.array(2 * [c_default]),
}
- self._test(
- {
- "example_names":
- np.empty(
- (0,), dtype=bytes),
- "serialized":
- ops.convert_to_tensor(["", ""]),
- "features": {
- sparse_name:
- parsing_ops.VarLenFeature(dtypes.int64),
- a_name:
- parsing_ops.FixedLenFeature(
- (1, 3), dtypes.int64, default_value=a_default),
- b_name:
- parsing_ops.FixedLenFeature(
- (3, 3), dtypes.string, default_value=b_default),
- c_name:
- parsing_ops.FixedLenFeature(
- (2,), dtypes.float32, default_value=c_default),
- }
- },
- expected_output)
+ self._test({
+ "example_names": np.empty((0,), dtype=bytes),
+ "serialized": ops.convert_to_tensor(["", ""]),
+ "features": {
+ sparse_name:
+ parsing_ops.VarLenFeature(dtypes.int64),
+ a_name:
+ parsing_ops.FixedLenFeature(
+ (1, 3), dtypes.int64, default_value=a_default),
+ b_name:
+ parsing_ops.FixedLenFeature(
+ (3, 3), dtypes.string, default_value=b_default),
+ c_name:
+ parsing_ops.FixedLenFeature(
+ (2,), dtypes.float32, default_value=c_default),
+ }
+ }, expected_output)
def testEmptySerializedWithoutDefaultsShouldFail(self):
input_features = {
@@ -180,8 +174,7 @@ class ParseExampleTest(test.TestCase):
default_value=np.random.rand(3, 3).astype(bytes)),
# Feature "c" is missing a default, this gap will cause failure.
"c":
- parsing_ops.FixedLenFeature(
- (2,), dtype=dtypes.float32),
+ parsing_ops.FixedLenFeature((2,), dtype=dtypes.float32),
}
# Edge case where the key is there but the feature value is empty
@@ -211,7 +204,8 @@ class ParseExampleTest(test.TestCase):
original = [
example(features=features({
"a": float_feature([1, 1, 3]),
- })), example(features=features({
+ })),
+ example(features=features({
"a": float_feature([-1, -1]),
}))
]
@@ -231,7 +225,11 @@ class ParseExampleTest(test.TestCase):
"Name: failing, Key: a, Index: 1. Number of float val"))
def testDenseDefaultNoShapeShouldFail(self):
- original = [example(features=features({"a": float_feature([1, 1, 3]),})),]
+ original = [
+ example(features=features({
+ "a": float_feature([1, 1, 3]),
+ })),
+ ]
serialized = [m.SerializeToString() for m in original]
@@ -250,31 +248,31 @@ class ParseExampleTest(test.TestCase):
example(features=features({
"st_c": float_feature([3, 4])
})),
- example(features=features({
- "st_c": float_feature([]), # empty float list
- })),
- example(features=features({
- "st_d": feature(), # feature with nothing in it
- })),
- example(features=features({
- "st_c": float_feature([1, 2, -1]),
- "st_d": bytes_feature([b"hi"])
- }))
+ example(
+ features=features({
+ "st_c": float_feature([]), # empty float list
+ })),
+ example(
+ features=features({
+ "st_d": feature(), # feature with nothing in it
+ })),
+ example(
+ features=features({
+ "st_c": float_feature([1, 2, -1]),
+ "st_d": bytes_feature([b"hi"])
+ }))
]
serialized = [m.SerializeToString() for m in original]
expected_st_c = ( # indices, values, shape
- np.array(
- [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array(
- [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array(
- [4, 3], dtype=np.int64)) # batch == 2, max_elems = 3
+ np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
+ np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
+ np.array([4, 3], dtype=np.int64)) # batch == 2, max_elems = 3
expected_st_d = ( # indices, values, shape
- np.array(
- [[3, 0]], dtype=np.int64), np.array(
- ["hi"], dtype=bytes), np.array(
- [4, 1], dtype=np.int64)) # batch == 2, max_elems = 1
+ np.array([[3, 0]], dtype=np.int64), np.array(["hi"], dtype=bytes),
+ np.array([4, 1], dtype=np.int64)) # batch == 2, max_elems = 1
expected_output = {
"st_c": expected_st_c,
@@ -291,70 +289,74 @@ class ParseExampleTest(test.TestCase):
def testSerializedContainingSparseFeature(self):
original = [
- example(features=features({
- "val": float_feature([3, 4]),
- "idx": int64_feature([5, 10])
- })),
- example(features=features({
- "val": float_feature([]), # empty float list
- "idx": int64_feature([])
- })),
- example(features=features({
- "val": feature(), # feature with nothing in it
- # missing idx feature
- })),
- example(features=features({
- "val": float_feature([1, 2, -1]),
- "idx":
- int64_feature([0, 9, 3]) # unsorted
- }))
+ example(
+ features=features({
+ "val": float_feature([3, 4]),
+ "idx": int64_feature([5, 10])
+ })),
+ example(
+ features=features({
+ "val": float_feature([]), # empty float list
+ "idx": int64_feature([])
+ })),
+ example(
+ features=features({
+ "val": feature(), # feature with nothing in it
+ # missing idx feature
+ })),
+ example(
+ features=features({
+ "val": float_feature([1, 2, -1]),
+ "idx":
+ int64_feature([0, 9, 3]) # unsorted
+ }))
]
serialized = [m.SerializeToString() for m in original]
expected_sp = ( # indices, values, shape
- np.array(
- [[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
- np.array(
- [3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), np.array(
- [4, 13], dtype=np.int64)) # batch == 4, max_elems = 13
+ np.array([[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
+ np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
+ np.array([4, 13], dtype=np.int64)) # batch == 4, max_elems = 13
- expected_output = {"sp": expected_sp,}
+ expected_output = {
+ "sp": expected_sp,
+ }
self._test({
"serialized": ops.convert_to_tensor(serialized),
"features": {
- "sp": parsing_ops.SparseFeature(
- ["idx"], "val", dtypes.float32, [13])
+ "sp":
+ parsing_ops.SparseFeature(["idx"], "val", dtypes.float32, [13])
}
}, expected_output)
def testSerializedContainingSparseFeatureReuse(self):
original = [
- example(features=features({
- "val1": float_feature([3, 4]),
- "val2": float_feature([5, 6]),
- "idx": int64_feature([5, 10])
- })),
- example(features=features({
- "val1": float_feature([]), # empty float list
- "idx": int64_feature([])
- })),
+ example(
+ features=features({
+ "val1": float_feature([3, 4]),
+ "val2": float_feature([5, 6]),
+ "idx": int64_feature([5, 10])
+ })),
+ example(
+ features=features({
+ "val1": float_feature([]), # empty float list
+ "idx": int64_feature([])
+ })),
]
serialized = [m.SerializeToString() for m in original]
expected_sp1 = ( # indices, values, shape
- np.array(
- [[0, 5], [0, 10]], dtype=np.int64), np.array(
- [3.0, 4.0], dtype=np.float32), np.array(
- [2, 13], dtype=np.int64)) # batch == 2, max_elems = 13
+ np.array([[0, 5], [0, 10]], dtype=np.int64),
+ np.array([3.0, 4.0], dtype=np.float32), np.array(
+ [2, 13], dtype=np.int64)) # batch == 2, max_elems = 13
expected_sp2 = ( # indices, values, shape
- np.array(
- [[0, 5], [0, 10]], dtype=np.int64), np.array(
- [5.0, 6.0], dtype=np.float32), np.array(
- [2, 7], dtype=np.int64)) # batch == 2, max_elems = 13
+ np.array([[0, 5], [0, 10]], dtype=np.int64),
+ np.array([5.0, 6.0], dtype=np.float32), np.array(
+ [2, 7], dtype=np.int64)) # batch == 2, max_elems = 13
expected_output = {
"sp1": expected_sp1,
@@ -374,25 +376,29 @@ class ParseExampleTest(test.TestCase):
def testSerializedContaining3DSparseFeature(self):
original = [
- example(features=features({
- "val": float_feature([3, 4]),
- "idx0": int64_feature([5, 10]),
- "idx1": int64_feature([0, 2]),
- })),
- example(features=features({
- "val": float_feature([]), # empty float list
- "idx0": int64_feature([]),
- "idx1": int64_feature([]),
- })),
- example(features=features({
- "val": feature(), # feature with nothing in it
- # missing idx feature
- })),
- example(features=features({
- "val": float_feature([1, 2, -1]),
- "idx0": int64_feature([0, 9, 3]), # unsorted
- "idx1": int64_feature([1, 0, 2]),
- }))
+ example(
+ features=features({
+ "val": float_feature([3, 4]),
+ "idx0": int64_feature([5, 10]),
+ "idx1": int64_feature([0, 2]),
+ })),
+ example(
+ features=features({
+ "val": float_feature([]), # empty float list
+ "idx0": int64_feature([]),
+ "idx1": int64_feature([]),
+ })),
+ example(
+ features=features({
+ "val": feature(), # feature with nothing in it
+ # missing idx feature
+ })),
+ example(
+ features=features({
+ "val": float_feature([1, 2, -1]),
+ "idx0": int64_feature([0, 9, 3]), # unsorted
+ "idx1": int64_feature([1, 0, 2]),
+ }))
]
serialized = [m.SerializeToString() for m in original]
@@ -407,13 +413,16 @@ class ParseExampleTest(test.TestCase):
# shape batch == 4, max_elems = 13
np.array([4, 13, 3], dtype=np.int64))
- expected_output = {"sp": expected_sp,}
+ expected_output = {
+ "sp": expected_sp,
+ }
self._test({
"serialized": ops.convert_to_tensor(serialized),
"features": {
- "sp": parsing_ops.SparseFeature(
- ["idx0", "idx1"], "val", dtypes.float32, [13, 3])
+ "sp":
+ parsing_ops.SparseFeature(["idx0", "idx1"], "val",
+ dtypes.float32, [13, 3])
}
}, expected_output)
@@ -421,41 +430,37 @@ class ParseExampleTest(test.TestCase):
aname = "a"
bname = "b*has+a:tricky_name"
original = [
- example(features=features({
- aname: float_feature([1, 1]),
- bname: bytes_feature([b"b0_str"]),
- })), example(features=features({
- aname: float_feature([-1, -1]),
- bname: bytes_feature([b""]),
- }))
+ example(
+ features=features({
+ aname: float_feature([1, 1]),
+ bname: bytes_feature([b"b0_str"]),
+ })),
+ example(
+ features=features({
+ aname: float_feature([-1, -1]),
+ bname: bytes_feature([b""]),
+ }))
]
serialized = [m.SerializeToString() for m in original]
expected_output = {
aname:
- np.array(
- [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
+ np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
bname:
- np.array(
- ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
+ np.array(["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
}
# No defaults, values required
- self._test(
- {
- "serialized":
- ops.convert_to_tensor(serialized),
- "features": {
- aname:
- parsing_ops.FixedLenFeature(
- (1, 2, 1), dtype=dtypes.float32),
- bname:
- parsing_ops.FixedLenFeature(
- (1, 1, 1, 1), dtype=dtypes.string),
- }
- },
- expected_output)
+ self._test({
+ "serialized": ops.convert_to_tensor(serialized),
+ "features": {
+ aname:
+ parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
+ bname:
+ parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string),
+ }
+ }, expected_output)
# This test is identical as the previous one except
# for the creation of 'serialized'.
@@ -466,18 +471,22 @@ class ParseExampleTest(test.TestCase):
original = [
(example(features=features({
aname: float_feature([10, 10]),
- })), example(features=features({
- aname: float_feature([1, 1]),
- bname: bytes_feature([b"b0_str"]),
- }))),
+ })),
+ example(
+ features=features({
+ aname: float_feature([1, 1]),
+ bname: bytes_feature([b"b0_str"]),
+ }))),
(
example(features=features({
bname: bytes_feature([b"b100"]),
})),
- example(features=features({
- aname: float_feature([-1, -1]),
- bname: bytes_feature([b"b1"]),
- })),),
+ example(
+ features=features({
+ aname: float_feature([-1, -1]),
+ bname: bytes_feature([b"b1"]),
+ })),
+ ),
]
serialized = [
@@ -486,55 +495,45 @@ class ParseExampleTest(test.TestCase):
expected_output = {
aname:
- np.array(
- [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
+ np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
bname:
- np.array(
- ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
+ np.array(["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
}
# No defaults, values required
- self._test(
- {
- "serialized":
- ops.convert_to_tensor(serialized),
- "features": {
- aname:
- parsing_ops.FixedLenFeature(
- (1, 2, 1), dtype=dtypes.float32),
- bname:
- parsing_ops.FixedLenFeature(
- (1, 1, 1, 1), dtype=dtypes.string),
- }
- },
- expected_output)
+ self._test({
+ "serialized": ops.convert_to_tensor(serialized),
+ "features": {
+ aname:
+ parsing_ops.FixedLenFeature((1, 2, 1), dtype=dtypes.float32),
+ bname:
+ parsing_ops.FixedLenFeature((1, 1, 1, 1), dtype=dtypes.string),
+ }
+ }, expected_output)
def testSerializedContainingDenseScalar(self):
original = [
example(features=features({
"a": float_feature([1]),
- })), example(features=features({}))
+ })),
+ example(features=features({}))
]
serialized = [m.SerializeToString() for m in original]
expected_output = {
"a":
- np.array(
- [[1], [-1]], dtype=np.float32) # 2x1 (column vector)
+ np.array([[1], [-1]], dtype=np.float32) # 2x1 (column vector)
}
- self._test(
- {
- "serialized":
- ops.convert_to_tensor(serialized),
- "features": {
- "a":
- parsing_ops.FixedLenFeature(
- (1,), dtype=dtypes.float32, default_value=-1),
- }
- },
- expected_output)
+ self._test({
+ "serialized": ops.convert_to_tensor(serialized),
+ "features": {
+ "a":
+ parsing_ops.FixedLenFeature(
+ (1,), dtype=dtypes.float32, default_value=-1),
+ }
+ }, expected_output)
def testSerializedContainingDenseWithDefaults(self):
original = [
@@ -553,58 +552,48 @@ class ParseExampleTest(test.TestCase):
expected_output = {
"a":
- np.array(
- [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2,
- 1),
+ np.array([[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(
+ 3, 1, 2, 1),
"b":
- np.array(
- ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1,
- 1),
+ np.array(["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(
+ 3, 1, 1, 1, 1),
}
- self._test(
- {
- "serialized":
- ops.convert_to_tensor(serialized),
- "features": {
- "a":
- parsing_ops.FixedLenFeature(
- (1, 2, 1),
- dtype=dtypes.float32,
- default_value=[3.0, -3.0]),
- "b":
- parsing_ops.FixedLenFeature(
- (1, 1, 1, 1),
- dtype=dtypes.string,
- default_value="tmp_str"),
- }
- },
- expected_output)
+ self._test({
+ "serialized": ops.convert_to_tensor(serialized),
+ "features": {
+ "a":
+ parsing_ops.FixedLenFeature(
+ (1, 2, 1), dtype=dtypes.float32, default_value=[3.0, -3.0]),
+ "b":
+ parsing_ops.FixedLenFeature(
+ (1, 1, 1, 1), dtype=dtypes.string, default_value="tmp_str"),
+ }
+ }, expected_output)
def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
expected_st_a = ( # indices, values, shape
- np.empty(
- (0, 2), dtype=np.int64), # indices
- np.empty(
- (0,), dtype=np.int64), # sp_a is DT_INT64
- np.array(
- [2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
+ np.empty((0, 2), dtype=np.int64), # indices
+ np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
+ np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
expected_sp = ( # indices, values, shape
- np.array(
- [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array(
- ["a", "b", "c"], dtype="|S"), np.array(
- [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
+ np.array([[0, 0], [0, 3], [1, 7]], dtype=np.int64),
+ np.array(["a", "b", "c"], dtype="|S"), np.array(
+ [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
original = [
- example(features=features({
- "c": float_feature([3, 4]),
- "val": bytes_feature([b"a", b"b"]),
- "idx": int64_feature([0, 3])
- })), example(features=features({
- "c": float_feature([1, 2]),
- "val": bytes_feature([b"c"]),
- "idx": int64_feature([7])
- }))
+ example(
+ features=features({
+ "c": float_feature([3, 4]),
+ "val": bytes_feature([b"a", b"b"]),
+ "idx": int64_feature([0, 3])
+ })),
+ example(
+ features=features({
+ "c": float_feature([1, 2]),
+ "val": bytes_feature([b"c"]),
+ "idx": int64_feature([7])
+ }))
]
names = ["in1", "in2"]
@@ -617,16 +606,13 @@ class ParseExampleTest(test.TestCase):
"sp": expected_sp,
"a": np.array(2 * [[a_default]]),
"b": np.array(2 * [b_default]),
- "c": np.array(
- [[3, 4], [1, 2]], dtype=np.float32),
+ "c": np.array([[3, 4], [1, 2]], dtype=np.float32),
}
self._test(
{
- "example_names":
- names,
- "serialized":
- ops.convert_to_tensor(serialized),
+ "example_names": names,
+ "serialized": ops.convert_to_tensor(serialized),
"features": {
"st_a":
parsing_ops.VarLenFeature(dtypes.int64),
@@ -647,25 +633,26 @@ class ParseExampleTest(test.TestCase):
def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
expected_idx = ( # indices, values, shape
- np.array(
- [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
- np.array([0, 3, 7, 1]), np.array(
- [2, 2], dtype=np.int64)) # batch == 4, max_elems = 2
+ np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
+ np.array([0, 3, 7, 1]),
+ np.array([2, 2], dtype=np.int64)) # batch == 4, max_elems = 2
expected_sp = ( # indices, values, shape
- np.array(
- [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array(
- ["a", "b", "d", "c"], dtype="|S"), np.array(
- [2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
+ np.array([[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64),
+ np.array(["a", "b", "d", "c"], dtype="|S"),
+ np.array([2, 13], dtype=np.int64)) # batch == 4, max_elems = 13
original = [
- example(features=features({
- "val": bytes_feature([b"a", b"b"]),
- "idx": int64_feature([0, 3])
- })), example(features=features({
- "val": bytes_feature([b"c", b"d"]),
- "idx": int64_feature([7, 1])
- }))
+ example(
+ features=features({
+ "val": bytes_feature([b"a", b"b"]),
+ "idx": int64_feature([0, 3])
+ })),
+ example(
+ features=features({
+ "val": bytes_feature([b"c", b"d"]),
+ "idx": int64_feature([7, 1])
+ }))
]
names = ["in1", "in2"]
@@ -680,9 +667,10 @@ class ParseExampleTest(test.TestCase):
"example_names": names,
"serialized": ops.convert_to_tensor(serialized),
"features": {
- "idx": parsing_ops.VarLenFeature(dtypes.int64),
- "sp": parsing_ops.SparseFeature(
- ["idx"], "val", dtypes.string, [13]),
+ "idx":
+ parsing_ops.VarLenFeature(dtypes.int64),
+ "sp":
+ parsing_ops.SparseFeature(["idx"], "val", dtypes.string, [13]),
}
}, expected_output)
@@ -720,10 +708,11 @@ class ParseExampleTest(test.TestCase):
}
original = [
- example(features=features(
- {"a": int64_feature([truth_int[i]]),
- "b": bytes_feature(truth_str[i])}))
- for i in range(batch_size)
+ example(
+ features=features({
+ "a": int64_feature([truth_int[i]]),
+ "b": bytes_feature(truth_str[i])
+ })) for i in range(batch_size)
]
serialized = [m.SerializeToString() for m in original]
@@ -731,12 +720,18 @@ class ParseExampleTest(test.TestCase):
self._test({
"serialized": ops.convert_to_tensor(serialized, dtype=dtypes.string),
"features": {
- "a": parsing_ops.FixedLenSequenceFeature(
- shape=(), dtype=dtypes.int64, allow_missing=True,
- default_value=-1),
- "b": parsing_ops.FixedLenSequenceFeature(
- shape=[], dtype=dtypes.string, allow_missing=True,
- default_value="default"),
+ "a":
+ parsing_ops.FixedLenSequenceFeature(
+ shape=(),
+ dtype=dtypes.int64,
+ allow_missing=True,
+ default_value=-1),
+ "b":
+ parsing_ops.FixedLenSequenceFeature(
+ shape=[],
+ dtype=dtypes.string,
+ allow_missing=True,
+ default_value="default"),
}
}, expected_output)
@@ -755,18 +750,21 @@ class ParseExampleTest(test.TestCase):
example(features=features({
cname: int64_feature([2]),
})),
- example(features=features({
- aname: float_feature([1, 1]),
- bname: bytes_feature([b"b0_str", b"b1_str"]),
- })),
- example(features=features({
- aname: float_feature([-1, -1, 2, 2]),
- bname: bytes_feature([b"b1"]),
- })),
- example(features=features({
- aname: float_feature([]),
- cname: int64_feature([3]),
- })),
+ example(
+ features=features({
+ aname: float_feature([1, 1]),
+ bname: bytes_feature([b"b0_str", b"b1_str"]),
+ })),
+ example(
+ features=features({
+ aname: float_feature([-1, -1, 2, 2]),
+ bname: bytes_feature([b"b1"]),
+ })),
+ example(
+ features=features({
+ aname: float_feature([]),
+ cname: int64_feature([3]),
+ })),
]
serialized = [m.SerializeToString() for m in original]
@@ -827,7 +825,9 @@ class ParseExampleTest(test.TestCase):
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
- (2, 1), dtype=dtypes.float32, allow_missing=True,
+ (2, 1),
+ dtype=dtypes.float32,
+ allow_missing=True,
default_value=-2.0),
bname:
parsing_ops.FixedLenSequenceFeature(
@@ -867,7 +867,9 @@ class ParseExampleTest(test.TestCase):
"features": {
aname:
parsing_ops.FixedLenSequenceFeature(
- (2, 1), dtype=dtypes.float32, allow_missing=True,
+ (2, 1),
+ dtype=dtypes.float32,
+ allow_missing=True,
default_value=[]),
bname:
parsing_ops.FixedLenSequenceFeature(
@@ -908,26 +910,28 @@ class ParseExampleTest(test.TestCase):
"All dimensions of shape for feature c need to be known "
r"but received \(1, None\)."))
- self._test({
- "example_names": example_names,
- "serialized": ops.convert_to_tensor(serialized),
- "features": {
- aname:
- parsing_ops.FixedLenSequenceFeature(
- (2, 1), dtype=dtypes.float32, allow_missing=True),
- bname:
- parsing_ops.FixedLenSequenceFeature(
- (1, 1, 1), dtype=dtypes.string, allow_missing=True),
- cname:
- parsing_ops.FixedLenSequenceFeature(
- shape=[], dtype=dtypes.int64, allow_missing=False),
- dname:
- parsing_ops.FixedLenSequenceFeature(
- shape=[], dtype=dtypes.string, allow_missing=True),
- }
- }, expected_err=(ValueError,
- "Unsupported: FixedLenSequenceFeature requires "
- "allow_missing to be True."))
+ self._test(
+ {
+ "example_names": example_names,
+ "serialized": ops.convert_to_tensor(serialized),
+ "features": {
+ aname:
+ parsing_ops.FixedLenSequenceFeature(
+ (2, 1), dtype=dtypes.float32, allow_missing=True),
+ bname:
+ parsing_ops.FixedLenSequenceFeature(
+ (1, 1, 1), dtype=dtypes.string, allow_missing=True),
+ cname:
+ parsing_ops.FixedLenSequenceFeature(
+ shape=[], dtype=dtypes.int64, allow_missing=False),
+ dname:
+ parsing_ops.FixedLenSequenceFeature(
+ shape=[], dtype=dtypes.string, allow_missing=True),
+ }
+ },
+ expected_err=(ValueError,
+ "Unsupported: FixedLenSequenceFeature requires "
+ "allow_missing to be True."))
class ParseSingleExampleTest(test.TestCase):
@@ -949,8 +953,8 @@ class ParseSingleExampleTest(test.TestCase):
# Check shapes.
for k, f in kwargs["features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
- self.assertEqual(tuple(out[k].get_shape()),
- tensor_shape.as_shape(f.shape))
+ self.assertEqual(
+ tuple(out[k].get_shape()), tensor_shape.as_shape(f.shape))
elif isinstance(f, parsing_ops.VarLenFeature):
self.assertEqual(
tuple(out[k].indices.get_shape().as_list()), (None, 1))
@@ -959,29 +963,25 @@ class ParseSingleExampleTest(test.TestCase):
tuple(out[k].dense_shape.get_shape().as_list()), (1,))
def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
- original = example(features=features({
- "c": float_feature([3, 4]),
- "d": float_feature([0.0, 1.0]),
- "val": bytes_feature([b"a", b"b"]),
- "idx": int64_feature([0, 3]),
- "st_a": float_feature([3.0, 4.0])
- }))
+ original = example(
+ features=features({
+ "c": float_feature([3, 4]),
+ "d": float_feature([0.0, 1.0]),
+ "val": bytes_feature([b"a", b"b"]),
+ "idx": int64_feature([0, 3]),
+ "st_a": float_feature([3.0, 4.0])
+ }))
serialized = original.SerializeToString()
expected_st_a = (
- np.array(
- [[0], [1]], dtype=np.int64), # indices
- np.array(
- [3.0, 4.0], dtype=np.float32), # values
- np.array(
- [2], dtype=np.int64)) # shape: max_values = 2
+ np.array([[0], [1]], dtype=np.int64), # indices
+ np.array([3.0, 4.0], dtype=np.float32), # values
+ np.array([2], dtype=np.int64)) # shape: max_values = 2
expected_sp = ( # indices, values, shape
- np.array(
- [[0], [3]], dtype=np.int64), np.array(
- ["a", "b"], dtype="|S"), np.array(
- [13], dtype=np.int64)) # max_values = 13
+ np.array([[0], [3]], dtype=np.int64), np.array(["a", "b"], dtype="|S"),
+ np.array([13], dtype=np.int64)) # max_values = 13
a_default = [1, 2, 3]
b_default = np.random.rand(3, 3).astype(bytes)
@@ -996,16 +996,14 @@ class ParseSingleExampleTest(test.TestCase):
self._test(
{
- "example_names":
- ops.convert_to_tensor("in1"),
- "serialized":
- ops.convert_to_tensor(serialized),
+ "example_names": ops.convert_to_tensor("in1"),
+ "serialized": ops.convert_to_tensor(serialized),
"features": {
"st_a":
parsing_ops.VarLenFeature(dtypes.float32),
"sp":
- parsing_ops.SparseFeature(
- ["idx"], "val", dtypes.string, [13]),
+ parsing_ops.SparseFeature(["idx"], "val", dtypes.string,
+ [13]),
"a":
parsing_ops.FixedLenFeature(
(1, 3), dtypes.int64, default_value=a_default),
@@ -1016,9 +1014,8 @@ class ParseSingleExampleTest(test.TestCase):
"c":
parsing_ops.FixedLenFeature(2, dtypes.float32),
"d":
- parsing_ops.FixedLenSequenceFeature([],
- dtypes.float32,
- allow_missing=True)
+ parsing_ops.FixedLenSequenceFeature(
+ [], dtypes.float32, allow_missing=True)
}
},
expected_output)
@@ -1050,43 +1047,71 @@ class ParseSequenceExampleTest(test.TestCase):
kwargs,
expected_context_values=None,
expected_feat_list_values=None,
- expected_err=None):
+ expected_length_values=None,
+ expected_err=None,
+ batch=False):
expected_context_values = expected_context_values or {}
expected_feat_list_values = expected_feat_list_values or {}
+ expected_length_values = expected_length_values or {}
with self.test_session() as sess:
if expected_err:
with self.assertRaisesWithPredicateMatch(expected_err[0],
expected_err[1]):
- c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs)
+ if batch:
+ c_out, fl_out, _ = parsing_ops.parse_sequence_example(**kwargs)
+ else:
+ c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs)
if c_out:
sess.run(flatten_values_tensors_or_sparse(c_out.values()))
if fl_out:
sess.run(flatten_values_tensors_or_sparse(fl_out.values()))
else:
# Returns dicts w/ Tensors and SparseTensors.
- context_out, feat_list_out = parsing_ops.parse_single_sequence_example(
- **kwargs)
+ if batch:
+ (context_out, feat_list_out,
+ lengths_out) = parsing_ops.parse_sequence_example(**kwargs)
+ else:
+ (context_out,
+ feat_list_out) = parsing_ops.parse_single_sequence_example(**kwargs)
+ lengths_out = {}
+
context_result = sess.run(
- flatten_values_tensors_or_sparse(context_out.values(
- ))) if context_out else []
+ flatten_values_tensors_or_sparse(
+ context_out.values())) if context_out else []
feat_list_result = sess.run(
- flatten_values_tensors_or_sparse(feat_list_out.values(
- ))) if feat_list_out else []
+ flatten_values_tensors_or_sparse(
+ feat_list_out.values())) if feat_list_out else []
+ lengths_result = sess.run(
+ flatten_values_tensors_or_sparse(
+ lengths_out.values())) if lengths_out else []
# Check values.
_compare_output_to_expected(self, context_out, expected_context_values,
context_result)
_compare_output_to_expected(self, feat_list_out,
expected_feat_list_values, feat_list_result)
+ _compare_output_to_expected(self, lengths_out, expected_length_values,
+ lengths_result)
# Check shapes; if serialized is a Tensor we need its size to
# properly check.
if "context_features" in kwargs:
for k, f in kwargs["context_features"].items():
if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
+ if batch:
+ self.assertEqual(
+ tuple(context_out[k].get_shape().as_list()[1:]), f.shape)
+ else:
+ self.assertEqual(
+ tuple(context_out[k].get_shape().as_list()), f.shape)
+ elif isinstance(f, parsing_ops.VarLenFeature) and batch:
self.assertEqual(
- tuple(context_out[k].get_shape().as_list()), f.shape)
- elif isinstance(f, parsing_ops.VarLenFeature):
+ tuple(context_out[k].indices.get_shape().as_list()), (None, 2))
+ self.assertEqual(
+ tuple(context_out[k].values.get_shape().as_list()), (None,))
+ self.assertEqual(
+ tuple(context_out[k].dense_shape.get_shape().as_list()), (2,))
+ elif isinstance(f, parsing_ops.VarLenFeature) and not batch:
self.assertEqual(
tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
self.assertEqual(
@@ -1094,38 +1119,94 @@ class ParseSequenceExampleTest(test.TestCase):
self.assertEqual(
tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
+ def _testBoth(self,
+ kwargs,
+ expected_context_values=None,
+ expected_feat_list_values=None,
+ expected_err=None):
+ # Test using tf.parse_single_sequence_example
+ self._test(
+ kwargs,
+ expected_context_values=expected_context_values,
+ expected_feat_list_values=expected_feat_list_values,
+ expected_err=expected_err,
+ batch=False)
+
+ # Convert the input to a batch of size 1, and test using
+ # tf.parse_sequence_example.
+
+ # Some replacements are needed for the batch version.
+ kwargs["serialized"] = [kwargs.pop("serialized")]
+ kwargs["example_names"] = [kwargs.pop("example_name")
+ ] if "example_name" in kwargs else None
+ # Disable error string matching; it's not consistent for batch mode.
+ if expected_err:
+ expected_err = (expected_err[0], "")
+
+ # Add a batch dimension to expected output
+ if expected_context_values:
+ new_values = {}
+ for k in expected_context_values:
+ v = expected_context_values[k]
+ if isinstance(kwargs["context_features"][k],
+ parsing_ops.FixedLenFeature):
+ new_values[k] = np.expand_dims(v, axis=0)
+ else:
+ # Sparse tensor.
+ new_values[k] = (np.insert(v[0], 0, 0, axis=1), v[1],
+ np.insert(v[2], 0, 1))
+ expected_context_values = new_values
+
+ expected_length_values = {}
+ if expected_feat_list_values:
+ new_values = {}
+ for k in expected_feat_list_values:
+ v = expected_feat_list_values[k]
+ if isinstance(kwargs["sequence_features"][k],
+ parsing_ops.FixedLenSequenceFeature):
+ expected_length_values[k] = [np.shape(v)[0]]
+ new_values[k] = np.expand_dims(v, axis=0)
+ else:
+ # Sparse tensor.
+ new_values[k] = (np.insert(v[0], 0, 0, axis=1), v[1],
+ np.insert(v[2], 0, 1))
+ expected_feat_list_values = new_values
+
+ self._test(
+ kwargs,
+ expected_context_values=expected_context_values,
+ expected_feat_list_values=expected_feat_list_values,
+ expected_length_values=expected_length_values,
+ expected_err=expected_err,
+ batch=True)
+
def testSequenceExampleWithSparseAndDenseContext(self):
- original = sequence_example(context=features({
- "c": float_feature([3, 4]),
- "st_a": float_feature([3.0, 4.0])
- }))
+ original = sequence_example(
+ context=features({
+ "c": float_feature([3, 4]),
+ "st_a": float_feature([3.0, 4.0])
+ }))
serialized = original.SerializeToString()
expected_st_a = (
- np.array(
- [[0], [1]], dtype=np.int64), # indices
- np.array(
- [3.0, 4.0], dtype=np.float32), # values
- np.array(
- [2], dtype=np.int64)) # shape: num_features = 2
+ np.array([[0], [1]], dtype=np.int64), # indices
+ np.array([3.0, 4.0], dtype=np.float32), # values
+ np.array([2], dtype=np.int64)) # shape: num_features = 2
- a_default = [1, 2, 3]
+ a_default = [[1, 2, 3]]
b_default = np.random.rand(3, 3).astype(bytes)
expected_context_output = {
"st_a": expected_st_a,
- "a": [a_default],
+ "a": a_default,
"b": b_default,
- "c": np.array(
- [3, 4], dtype=np.float32),
+ "c": np.array([3, 4], dtype=np.float32),
}
- self._test(
+ self._testBoth(
{
- "example_name":
- "in1",
- "serialized":
- ops.convert_to_tensor(serialized),
+ "example_name": "in1",
+ "serialized": ops.convert_to_tensor(serialized),
"context_features": {
"st_a":
parsing_ops.VarLenFeature(dtypes.float32),
@@ -1143,51 +1224,54 @@ class ParseSequenceExampleTest(test.TestCase):
expected_context_values=expected_context_output)
def testSequenceExampleWithMultipleSizeFeatureLists(self):
- original = sequence_example(feature_lists=feature_lists({
- "a":
- feature_list([
- int64_feature([-1, 0, 1]),
- int64_feature([2, 3, 4]),
- int64_feature([5, 6, 7]),
- int64_feature([8, 9, 10]),
- ]),
- "b":
- feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
- "c":
- feature_list([float_feature([3, 4]), float_feature([-1, 2])]),
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a":
+ feature_list([
+ int64_feature([-1, 0, 1]),
+ int64_feature([2, 3, 4]),
+ int64_feature([5, 6, 7]),
+ int64_feature([8, 9, 10]),
+ ]),
+ "b":
+ feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
+ "c":
+ feature_list([float_feature([3, 4]),
+ float_feature([-1, 2])]),
+ }))
serialized = original.SerializeToString()
expected_feature_list_output = {
- "a": np.array(
- [ # outer dimension is time.
- [[-1, 0, 1]], # inside are 1x3 matrices
- [[2, 3, 4]],
- [[5, 6, 7]],
- [[8, 9, 10]]
- ],
- dtype=np.int64),
- "b": np.array(
- [ # outer dimension is time, inside are 2x2 matrices
- [[b"r00", b"r01"], [b"r10", b"r11"]]
- ],
- dtype=bytes),
- "c": np.array(
- [ # outer dimension is time, inside are 2-vectors
- [3, 4], [-1, 2]
- ],
- dtype=np.float32),
- "d": np.empty(
- shape=(0, 5), dtype=np.float32), # empty_allowed_missing
+ "a":
+ np.array(
+ [ # outer dimension is time.
+ [[-1, 0, 1]], # inside are 1x3 matrices
+ [[2, 3, 4]],
+ [[5, 6, 7]],
+ [[8, 9, 10]]
+ ],
+ dtype=np.int64),
+ "b":
+ np.array(
+ [ # outer dimension is time, inside are 2x2 matrices
+ [[b"r00", b"r01"], [b"r10", b"r11"]]
+ ],
+ dtype=bytes),
+ "c":
+ np.array(
+ [ # outer dimension is time, inside are 2-vectors
+ [3, 4], [-1, 2]
+ ],
+ dtype=np.float32),
+ "d":
+ np.empty(shape=(0, 5), dtype=np.float32), # empty_allowed_missing
}
- self._test(
+ self._testBoth(
{
- "example_name":
- "in1",
- "serialized":
- ops.convert_to_tensor(serialized),
+ "example_name": "in1",
+ "serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
"a":
parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
@@ -1203,56 +1287,51 @@ class ParseSequenceExampleTest(test.TestCase):
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleWithoutDebugName(self):
- original = sequence_example(feature_lists=feature_lists({
- "a":
- feature_list([int64_feature([3, 4]), int64_feature([1, 0])]),
- "st_a":
- feature_list([
- float_feature([3.0, 4.0]), float_feature([5.0]),
- float_feature([])
- ]),
- "st_b":
- feature_list([
- bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
- bytes_feature([b"b", b"c"])
- ])
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a":
+ feature_list([int64_feature([3, 4]),
+ int64_feature([1, 0])]),
+ "st_a":
+ feature_list([
+ float_feature([3.0, 4.0]),
+ float_feature([5.0]),
+ float_feature([])
+ ]),
+ "st_b":
+ feature_list([
+ bytes_feature([b"a"]),
+ bytes_feature([]),
+ bytes_feature([]),
+ bytes_feature([b"b", b"c"])
+ ])
+ }))
serialized = original.SerializeToString()
expected_st_a = (
- np.array(
- [[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
- np.array(
- [3.0, 4.0, 5.0], dtype=np.float32), # values
- np.array(
- [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
+ np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
+ np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
+ np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
expected_st_b = (
- np.array(
- [[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
- np.array(
- ["a", "b", "c"], dtype="|S"), # values
- np.array(
- [4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
+ np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
+ np.array(["a", "b", "c"], dtype="|S"), # values
+ np.array([4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
expected_st_c = (
- np.empty(
- (0, 2), dtype=np.int64), # indices
- np.empty(
- (0,), dtype=np.int64), # values
- np.array(
- [0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
+ np.empty((0, 2), dtype=np.int64), # indices
+ np.empty((0,), dtype=np.int64), # values
+ np.array([0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
expected_feature_list_output = {
- "a": np.array(
- [[3, 4], [1, 0]], dtype=np.int64),
+ "a": np.array([[3, 4], [1, 0]], dtype=np.int64),
"st_a": expected_st_a,
"st_b": expected_st_b,
"st_c": expected_st_c,
}
- self._test(
+ self._testBoth(
{
"serialized": ops.convert_to_tensor(serialized),
"sequence_features": {
@@ -1265,56 +1344,51 @@ class ParseSequenceExampleTest(test.TestCase):
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleWithSparseAndDenseFeatureLists(self):
- original = sequence_example(feature_lists=feature_lists({
- "a":
- feature_list([int64_feature([3, 4]), int64_feature([1, 0])]),
- "st_a":
- feature_list([
- float_feature([3.0, 4.0]), float_feature([5.0]),
- float_feature([])
- ]),
- "st_b":
- feature_list([
- bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
- bytes_feature([b"b", b"c"])
- ])
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a":
+ feature_list([int64_feature([3, 4]),
+ int64_feature([1, 0])]),
+ "st_a":
+ feature_list([
+ float_feature([3.0, 4.0]),
+ float_feature([5.0]),
+ float_feature([])
+ ]),
+ "st_b":
+ feature_list([
+ bytes_feature([b"a"]),
+ bytes_feature([]),
+ bytes_feature([]),
+ bytes_feature([b"b", b"c"])
+ ])
+ }))
serialized = original.SerializeToString()
expected_st_a = (
- np.array(
- [[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
- np.array(
- [3.0, 4.0, 5.0], dtype=np.float32), # values
- np.array(
- [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
+ np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64), # indices
+ np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
+ np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
expected_st_b = (
- np.array(
- [[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
- np.array(
- ["a", "b", "c"], dtype="|S"), # values
- np.array(
- [4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
+ np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64), # indices
+ np.array(["a", "b", "c"], dtype="|S"), # values
+ np.array([4, 2], dtype=np.int64)) # shape: num_time = 4, max_feat = 2
expected_st_c = (
- np.empty(
- (0, 2), dtype=np.int64), # indices
- np.empty(
- (0,), dtype=np.int64), # values
- np.array(
- [0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
+ np.empty((0, 2), dtype=np.int64), # indices
+ np.empty((0,), dtype=np.int64), # values
+ np.array([0, 0], dtype=np.int64)) # shape: num_time = 0, max_feat = 0
expected_feature_list_output = {
- "a": np.array(
- [[3, 4], [1, 0]], dtype=np.int64),
+ "a": np.array([[3, 4], [1, 0]], dtype=np.int64),
"st_a": expected_st_a,
"st_b": expected_st_b,
"st_c": expected_st_c,
}
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
@@ -1328,30 +1402,28 @@ class ParseSequenceExampleTest(test.TestCase):
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
- original = sequence_example(feature_lists=feature_lists({
- "st_a":
- feature_list([
- float_feature([3.0, 4.0]),
- feature(),
- float_feature([5.0]),
- ]),
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "st_a":
+ feature_list([
+ float_feature([3.0, 4.0]),
+ feature(),
+ float_feature([5.0]),
+ ]),
+ }))
serialized = original.SerializeToString()
expected_st_a = (
- np.array(
- [[0, 0], [0, 1], [2, 0]], dtype=np.int64), # indices
- np.array(
- [3.0, 4.0, 5.0], dtype=np.float32), # values
- np.array(
- [3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
+ np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64), # indices
+ np.array([3.0, 4.0, 5.0], dtype=np.float32), # values
+ np.array([3, 2], dtype=np.int64)) # shape: num_time = 3, max_feat = 2
expected_feature_list_output = {
"st_a": expected_st_a,
}
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
@@ -1362,13 +1434,15 @@ class ParseSequenceExampleTest(test.TestCase):
expected_feat_list_values=expected_feature_list_output)
def testSequenceExampleListWithInconsistentDataFails(self):
- original = sequence_example(feature_lists=feature_lists({
- "a": feature_list([int64_feature([-1, 0]), float_feature([2, 3])])
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a": feature_list([int64_feature([-1, 0]),
+ float_feature([2, 3])])
+ }))
serialized = original.SerializeToString()
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
@@ -1380,13 +1454,14 @@ class ParseSequenceExampleTest(test.TestCase):
" Data types don't match. Expected type: int64"))
def testSequenceExampleListWithWrongDataTypeFails(self):
- original = sequence_example(feature_lists=feature_lists({
- "a": feature_list([float_feature([2, 3])])
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a": feature_list([float_feature([2, 3])])
+ }))
serialized = original.SerializeToString()
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
@@ -1399,17 +1474,19 @@ class ParseSequenceExampleTest(test.TestCase):
" Expected type: int64"))
def testSequenceExampleListWithWrongSparseDataTypeFails(self):
- original = sequence_example(feature_lists=feature_lists({
- "a":
- feature_list([
- int64_feature([3, 4]), int64_feature([1, 2]),
- float_feature([2.0, 3.0])
- ])
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a":
+ feature_list([
+ int64_feature([3, 4]),
+ int64_feature([1, 2]),
+ float_feature([2.0, 3.0])
+ ])
+ }))
serialized = original.SerializeToString()
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
@@ -1423,13 +1500,16 @@ class ParseSequenceExampleTest(test.TestCase):
" Feature is: float_list"))
def testSequenceExampleListWithWrongShapeFails(self):
- original = sequence_example(feature_lists=feature_lists({
- "a": feature_list([int64_feature([2, 3]), int64_feature([2, 3, 4])]),
- }))
+ original = sequence_example(
+ feature_lists=feature_lists({
+ "a":
+ feature_list([int64_feature([2, 3]),
+ int64_feature([2, 3, 4])]),
+ }))
serialized = original.SerializeToString()
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(serialized),
@@ -1446,7 +1526,7 @@ class ParseSequenceExampleTest(test.TestCase):
# Test fails because we didn't add:
# feature_list_dense_defaults = {"a": None}
- self._test(
+ self._testBoth(
{
"example_name": "in1",
"serialized": ops.convert_to_tensor(original.SerializeToString()),
@@ -1461,6 +1541,67 @@ class ParseSequenceExampleTest(test.TestCase):
" feature_list_dense_missing_assumed_empty or"
" feature_list_dense_defaults?"))
+ def testSequenceExampleBatch(self):
+ first = sequence_example(
+ feature_lists=feature_lists({
+ "a":
+ feature_list([
+ int64_feature([-1, 0, 1]),
+ int64_feature([2, 3, 4]),
+ int64_feature([5, 6, 7]),
+ int64_feature([8, 9, 10]),
+ ])
+ }))
+ second = sequence_example(
+ feature_lists=feature_lists({
+ "a": feature_list([
+ int64_feature([21, 2, 11]),
+ ])
+ }))
+
+ serialized = [first.SerializeToString(), second.SerializeToString()]
+
+ expected_feature_list_output = {
+ "a":
+ np.array(
+ [ # outermost dimension is example id
+ [ # middle dimension is time.
+ [[-1, 0, 1]], # inside are 1x3 matrices
+ [[2, 3, 4]],
+ [[5, 6, 7]],
+ [[8, 9, 10]]
+ ],
+ [ # middle dimension is time.
+ [[21, 2, 11]], # inside are 1x3 matrices
+ [[0, 0, 0]], # additional entries are padded with 0
+ [[0, 0, 0]],
+ [[0, 0, 0]]
+ ]
+ ],
+ dtype=np.int64),
+ "d":
+ np.empty(shape=(2, 0, 5), dtype=np.float32), # allowed_missing
+ }
+
+ self._test(
+ {
+ "example_names": ops.convert_to_tensor(["in1", "in2"]),
+ "serialized": ops.convert_to_tensor(serialized),
+ "sequence_features": {
+ "a":
+ parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
+ "d":
+ parsing_ops.FixedLenSequenceFeature(
+ (5,), dtypes.float32, allow_missing=True),
+ }
+ },
+ expected_feat_list_values=expected_feature_list_output,
+ expected_length_values={
+ "a": [4, 1],
+ "d": [0, 0]
+ },
+ batch=True)
+
class DecodeJSONExampleTest(test.TestCase):
@@ -1531,24 +1672,27 @@ class DecodeJSONExampleTest(test.TestCase):
example(features=features({
"st_d": feature()
})),
- example(features=features({
- "st_c": float_feature([1, 2, -1]),
- "st_d": bytes_feature([b"hi"])
- })),
+ example(
+ features=features({
+ "st_c": float_feature([1, 2, -1]),
+ "st_d": bytes_feature([b"hi"])
+ })),
])
def testSerializedContainingBytes(self):
aname = "a"
bname = "b*has+a:tricky_name"
self._testRoundTrip([
- example(features=features({
- aname: float_feature([1, 1]),
- bname: bytes_feature([b"b0_str"])
- })),
- example(features=features({
- aname: float_feature([-1, -1]),
- bname: bytes_feature([b"b1"])
- })),
+ example(
+ features=features({
+ aname: float_feature([1, 1]),
+ bname: bytes_feature([b"b0_str"])
+ })),
+ example(
+ features=features({
+ aname: float_feature([-1, -1]),
+ bname: bytes_feature([b"b1"])
+ })),
])
def testInvalidSyntax(self):
diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py
index 6041e2a0c5..8224097ac4 100644
--- a/tensorflow/python/ops/parsing_ops.py
+++ b/tensorflow/python/ops/parsing_ops.py
@@ -897,6 +897,352 @@ def _parse_single_example_raw(serialized,
return outputs
+@tf_export("io.parse_sequence_example")
+def parse_sequence_example(serialized,
+ context_features=None,
+ sequence_features=None,
+ example_names=None,
+ name=None):
+ # pylint: disable=line-too-long
+ """Parses a batch of `SequenceExample` protos.
+
+ Parses a vector of serialized
+ [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
+ protos given in `serialized`.
+
+ This op parses serialized sequence examples into a tuple of dictionaries
+ mapping keys to `Tensor` and `SparseTensor` objects respectively.
+ The first dictionary contains mappings for keys appearing in
+ `context_features`, and the second dictionary contains mappings for keys
+ appearing in `sequence_features`.
+
+ At least one of `context_features` and `sequence_features` must be provided
+ and non-empty.
+
+ The `context_features` keys are associated with a `SequenceExample` as a
+ whole, independent of time / frame. In contrast, the `sequence_features` keys
+ provide a way to access variable-length data within the `FeatureList` section
+ of the `SequenceExample` proto. While the shapes of `context_features` values
+ are fixed with respect to frame, the frame dimension (the first dimension)
+ of `sequence_features` values may vary between `SequenceExample` protos,
+ and even between `feature_list` keys within the same `SequenceExample`.
+
+ `context_features` contains `VarLenFeature` and `FixedLenFeature` objects.
+ Each `VarLenFeature` is mapped to a `SparseTensor`, and each `FixedLenFeature`
+ is mapped to a `Tensor`, of the specified type, shape, and default value.
+
+ `sequence_features` contains `VarLenFeature` and `FixedLenSequenceFeature`
+ objects. Each `VarLenFeature` is mapped to a `SparseTensor`, and each
+ `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
+ The shape will be `(B,T,) + df.dense_shape` for `FixedLenSequenceFeature`
+ `df`, where `B` is the batch size, and `T` is the length of the associated
+ `FeatureList` in the `SequenceExample`. For instance,
+ `FixedLenSequenceFeature([])` yields a scalar 2-D `Tensor` of static shape
+ `[None, None]` and dynamic shape `[B, T]`, while
+ `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 3-D matrix `Tensor`
+ of static shape `[None, None, k]` and dynamic shape `[B, T, k]`.
+
+ Like the input, the resulting output tensors have a batch dimension. This
+ means that the original per-example shapes of `VarLenFeature`s and
+ `FixedLenSequenceFeature`s can be lost. To handle that situation, this op also
+ provides dicts of shape tensors as part of the output. There is one dict for
+ the context features, and one for the feature_list features. Context features
+ of type `FixedLenFeature`s will not be present, since their shapes are already
+ known by the caller. In situations where the input 'FixedLenFeature`s are of
+ different lengths across examples, the shorter examples will be padded with
+ default datatype values: 0 for numeric types, and the empty string for string
+ types.
+
+ Each `SparseTensor` corresponding to `sequence_features` represents a ragged
+ vector. Its indices are `[time, index]`, where `time` is the `FeatureList`
+ entry and `index` is the value's index in the list of values associated with
+ that time.
+
+ `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
+ entries with `allow_missing=True` are optional; otherwise, we will fail if
+ that `Feature` or `FeatureList` is missing from any example in `serialized`.
+
+ `example_name` may contain a descriptive name for the corresponding serialized
+ proto. This may be useful for debugging purposes, but it has no effect on the
+ output. If not `None`, `example_name` must be a scalar.
+
+ Args:
+ serialized: A vector (1-D Tensor) of type string containing binary
+ serialized `SequenceExample` protos.
+ context_features: A `dict` mapping feature keys to `FixedLenFeature` or
+ `VarLenFeature` values. These features are associated with a
+ `SequenceExample` as a whole.
+ sequence_features: A `dict` mapping feature keys to
+ `FixedLenSequenceFeature` or `VarLenFeature` values. These features are
+ associated with data within the `FeatureList` section of the
+ `SequenceExample` proto.
+ example_names: A vector (1-D Tensor) of strings (optional), the name of the
+ serialized protos.
+ name: A name for this operation (optional).
+
+ Returns:
+ A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
+ The first dict contains the context key/values.
+ The second dict contains the feature_list key/values.
+
+ Raises:
+ ValueError: if any feature is invalid.
+ """
+ if not (context_features or sequence_features):
+ raise ValueError("Missing features.")
+ (context_sparse_keys, context_sparse_types, context_dense_keys,
+ context_dense_types,
+ context_dense_defaults, context_dense_shapes) = _features_to_raw_params(
+ context_features, [VarLenFeature, FixedLenFeature])
+ (feature_list_sparse_keys, feature_list_sparse_types, feature_list_dense_keys,
+ feature_list_dense_types, feature_list_dense_defaults,
+ feature_list_dense_shapes) = _features_to_raw_params(
+ sequence_features, [VarLenFeature, FixedLenSequenceFeature])
+ return _parse_sequence_example_raw(
+ serialized, example_names, context_sparse_keys, context_sparse_types,
+ context_dense_keys, context_dense_types, context_dense_defaults,
+ context_dense_shapes, feature_list_sparse_keys, feature_list_sparse_types,
+ feature_list_dense_keys, feature_list_dense_types,
+ feature_list_dense_shapes, feature_list_dense_defaults, name)
+
+
+def _parse_sequence_example_raw(serialized,
+ debug_name=None,
+ context_sparse_keys=None,
+ context_sparse_types=None,
+ context_dense_keys=None,
+ context_dense_types=None,
+ context_dense_defaults=None,
+ context_dense_shapes=None,
+ feature_list_sparse_keys=None,
+ feature_list_sparse_types=None,
+ feature_list_dense_keys=None,
+ feature_list_dense_types=None,
+ feature_list_dense_shapes=None,
+ feature_list_dense_defaults=None,
+ name=None):
+ """Parses a vector of `SequenceExample` protos.
+
+ Args:
+ serialized: A vector (1-D Tensor) of type string, containing binary
+ serialized `SequenceExample` protos.
+ debug_name: A vector (1-D Tensor) of strings (optional), the names of the
+ serialized protos.
+ context_sparse_keys: A list of string keys in the `SequenceExample`'s
+ features. The results for these keys will be returned as `SparseTensor`
+ objects.
+ context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
+ Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string`
+ (`BytesList`) are supported.
+ context_dense_keys: A list of string keys in the examples' features. The
+ results for these keys will be returned as `Tensor`s
+ context_dense_types: A list of DTypes, same length as `context_dense_keys`.
+ Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string`
+ (`BytesList`) are supported.
+ context_dense_defaults: A dict mapping string keys to `Tensor`s. The keys of
+ the dict must match the context_dense_keys of the feature.
+ context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
+ The shape of the data for each context_dense feature referenced by
+ `context_dense_keys`. Required for any input tensors identified by
+ `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
+ feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
+ feature_lists. The results for these keys will be returned as
+ `SparseTensor` objects.
+ feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
+ Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), and `tf.string`
+ (`BytesList`) are supported.
+ feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
+ features_lists. The results for these keys will be returned as `Tensor`s.
+ feature_list_dense_types: A list of `DTypes`, same length as
+ `feature_list_dense_keys`. Only `tf.float32` (`FloatList`), `tf.int64`
+ (`Int64List`), and `tf.string` (`BytesList`) are supported.
+ feature_list_dense_shapes: A list of tuples, same length as
+ `feature_list_dense_keys`. The shape of the data for each `FeatureList`
+ feature referenced by `feature_list_dense_keys`.
+ feature_list_dense_defaults: A dict mapping key strings to values. The only
+ currently allowed value is `None`. Any key appearing in this dict with
+ value `None` is allowed to be missing from the `SequenceExample`. If
+ missing, the key is treated as zero-length.
+ name: A name for this operation (optional).
+
+ Returns:
+ A tuple of three `dict`s, each mapping keys to `Tensor`s and
+ `SparseTensor`s. The first dict contains the context key/values,
+ the second dict contains the feature_list key/values, and the final dict
+ contains the lengths of any dense feature_list features.
+
+ Raises:
+ ValueError: If context_sparse and context_dense key sets intersect,
+ if feature_list_sparse and feature_list_dense key sets intersect,
+ if input lengths do not match up, or if a value in
+ feature_list_dense_defaults is not None.
+ TypeError: if feature_list_dense_defaults is not either None or a dict.
+ """
+ with ops.name_scope(name, "ParseSequenceExample", [serialized]):
+ context_dense_defaults = ({} if context_dense_defaults is None else
+ context_dense_defaults)
+ context_sparse_keys = ([] if context_sparse_keys is None else
+ context_sparse_keys)
+ context_sparse_types = ([] if context_sparse_types is None else
+ context_sparse_types)
+ context_dense_keys = ([]
+ if context_dense_keys is None else context_dense_keys)
+ context_dense_types = ([] if context_dense_types is None else
+ context_dense_types)
+ context_dense_shapes = ([[]] * len(context_dense_keys)
+ if context_dense_shapes is None else
+ context_dense_shapes)
+ feature_list_sparse_keys = ([] if feature_list_sparse_keys is None else
+ feature_list_sparse_keys)
+ feature_list_sparse_types = ([] if feature_list_sparse_types is None else
+ feature_list_sparse_types)
+ feature_list_dense_keys = ([] if feature_list_dense_keys is None else
+ feature_list_dense_keys)
+ feature_list_dense_types = ([] if feature_list_dense_types is None else
+ feature_list_dense_types)
+ feature_list_dense_shapes = ([[]] * len(feature_list_dense_keys)
+ if feature_list_dense_shapes is None else
+ feature_list_dense_shapes)
+ feature_list_dense_defaults = (
+ dict()
+ if feature_list_dense_defaults is None else feature_list_dense_defaults)
+ debug_name = [] if debug_name is None else debug_name
+
+ # Internal
+ feature_list_dense_missing_assumed_empty = []
+
+ num_context_dense = len(context_dense_keys)
+ num_feature_list_dense = len(feature_list_dense_keys)
+ num_context_sparse = len(context_sparse_keys)
+ num_feature_list_sparse = len(feature_list_sparse_keys)
+
+ if len(context_dense_shapes) != num_context_dense:
+ raise ValueError(
+ "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d" %
+ (len(context_dense_shapes), num_context_dense))
+ if len(context_dense_types) != num_context_dense:
+ raise ValueError(
+ "len(context_dense_types) != len(num_context_dense): %d vs. %d" %
+ (len(context_dense_types), num_context_dense))
+ if len(feature_list_dense_shapes) != num_feature_list_dense:
+ raise ValueError(
+ "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
+ "%d vs. %d" % (len(feature_list_dense_shapes),
+ num_feature_list_dense))
+ if len(feature_list_dense_types) != num_feature_list_dense:
+ raise ValueError(
+ "len(feature_list_dense_types) != len(num_feature_list_dense):"
+ "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
+ if len(context_sparse_types) != num_context_sparse:
+ raise ValueError(
+ "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d" %
+ (len(context_sparse_types), num_context_sparse))
+ if len(feature_list_sparse_types) != num_feature_list_sparse:
+ raise ValueError(
+ "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
+ "%d vs. %d" % (len(feature_list_sparse_types),
+ num_feature_list_sparse))
+ if (num_context_dense + num_context_sparse + num_feature_list_dense +
+ num_feature_list_sparse) == 0:
+ raise ValueError(
+ "Must provide at least one context_sparse key, context_dense key, "
+ ", feature_list_sparse key, or feature_list_dense key")
+ if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
+ raise ValueError(
+ "context_dense and context_sparse keys must not intersect; "
+ "intersection: %s" % set(context_dense_keys).intersection(
+ set(context_sparse_keys)))
+ if not set(feature_list_dense_keys).isdisjoint(
+ set(feature_list_sparse_keys)):
+ raise ValueError(
+ "feature_list_dense and feature_list_sparse keys must not intersect; "
+ "intersection: %s" % set(feature_list_dense_keys).intersection(
+ set(feature_list_sparse_keys)))
+ if not isinstance(feature_list_dense_defaults, dict):
+ raise TypeError("feature_list_dense_defaults must be a dict")
+ for k, v in feature_list_dense_defaults.items():
+ if v is not None:
+ raise ValueError(
+ "Value feature_list_dense_defaults[%s] must be None" % k)
+ feature_list_dense_missing_assumed_empty.append(k)
+
+ context_dense_defaults_vec = []
+ for i, key in enumerate(context_dense_keys):
+ default_value = context_dense_defaults.get(key)
+ if default_value is None:
+ default_value = constant_op.constant([], dtype=context_dense_types[i])
+ elif not isinstance(default_value, ops.Tensor):
+ key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
+ default_value = ops.convert_to_tensor(
+ default_value, dtype=context_dense_types[i], name=key_name)
+
+ context_dense_defaults_vec.append(default_value)
+
+ context_dense_shapes = [
+ tensor_shape.as_shape(shape).as_proto()
+ for shape in context_dense_shapes
+ ]
+ feature_list_dense_shapes = [
+ tensor_shape.as_shape(shape).as_proto()
+ for shape in feature_list_dense_shapes
+ ]
+
+ # pylint: disable=protected-access
+ outputs = gen_parsing_ops.parse_sequence_example(
+ serialized=serialized,
+ debug_name=debug_name,
+ Ncontext_sparse=num_context_sparse,
+ Ncontext_dense=num_context_dense,
+ Nfeature_list_sparse=num_feature_list_sparse,
+ Nfeature_list_dense=num_feature_list_dense,
+ context_dense_defaults=context_dense_defaults_vec,
+ context_sparse_keys=context_sparse_keys,
+ context_sparse_types=context_sparse_types,
+ context_dense_keys=context_dense_keys,
+ context_dense_shapes=context_dense_shapes,
+ feature_list_sparse_keys=feature_list_sparse_keys,
+ feature_list_sparse_types=feature_list_sparse_types,
+ feature_list_dense_keys=feature_list_dense_keys,
+ feature_list_dense_types=feature_list_dense_types,
+ feature_list_dense_shapes=feature_list_dense_shapes,
+ feature_list_dense_missing_assumed_empty=(
+ feature_list_dense_missing_assumed_empty),
+ name=name)
+ # pylint: enable=protected-access
+
+ (context_sparse_indices, context_sparse_values, context_sparse_shapes,
+ context_dense_values, feature_list_sparse_indices,
+ feature_list_sparse_values, feature_list_sparse_shapes,
+ feature_list_dense_values, feature_list_dense_lengths) = outputs
+
+ context_sparse_tensors = [
+ sparse_tensor.SparseTensor(ix, val, shape)
+ for (ix, val,
+ shape) in zip(context_sparse_indices, context_sparse_values,
+ context_sparse_shapes)
+ ]
+
+ feature_list_sparse_tensors = [
+ sparse_tensor.SparseTensor(ix, val, shape)
+ for (ix, val, shape
+ ) in zip(feature_list_sparse_indices, feature_list_sparse_values,
+ feature_list_sparse_shapes)
+ ]
+
+ context_output = dict(
+ zip(context_sparse_keys + context_dense_keys,
+ context_sparse_tensors + context_dense_values))
+ feature_list_output = dict(
+ zip(feature_list_sparse_keys + feature_list_dense_keys,
+ feature_list_sparse_tensors + feature_list_dense_values))
+ feature_list_lengths = dict(
+ zip(feature_list_dense_keys, feature_list_dense_lengths))
+
+ return (context_output, feature_list_output, feature_list_lengths)
+
+
+# TODO(sundberg): rewrite this method to call the batch version, which is more
+# efficient especially for large inputs.
@tf_export("parse_single_sequence_example")
def parse_single_sequence_example(
serialized, context_features=None, sequence_features=None,
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
index 3a36c168aa..8938cf217b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.pbtxt
@@ -25,6 +25,10 @@ tf_module {
argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
+ name: "parse_sequence_example"
+ argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_names\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+ }
+ member_method {
name: "parse_tensor"
argspec: "args=[\'serialized\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
index 3a36c168aa..8938cf217b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
@@ -25,6 +25,10 @@ tf_module {
argspec: "args=[\'pattern\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
+ name: "parse_sequence_example"
+ argspec: "args=[\'serialized\', \'context_features\', \'sequence_features\', \'example_names\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+ }
+ member_method {
name: "parse_tensor"
argspec: "args=[\'serialized\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}