aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/kernel_tests/parsing_ops_test.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/kernel_tests/parsing_ops_test.py')
-rw-r--r--tensorflow/python/kernel_tests/parsing_ops_test.py414
1 files changed, 414 insertions, 0 deletions
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
new file mode 100644
index 0000000000..fba7c705fb
--- /dev/null
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -0,0 +1,414 @@
+"""Tests for tensorflow.ops.parsing_ops."""
+
+import itertools
+
+import tensorflow.python.platform
+
+import numpy as np
+import tensorflow as tf
+
+# Helpers for creating Example objects
+example = tf.train.Example
+feature = tf.train.Feature
+features = lambda d: tf.train.Features(feature=d)
+bytes_feature = lambda v: feature(bytes_list=tf.train.BytesList(value=v))
+int64_feature = lambda v: feature(int64_list=tf.train.Int64List(value=v))
+float_feature = lambda v: feature(float_list=tf.train.FloatList(value=v))
+
+
+def flatten(list_of_lists):
+ """Flatten one level of nesting."""
+ return itertools.chain.from_iterable(list_of_lists)
+
+
+def flatten_values_tensors_or_sparse(tensors_list):
+ """Flatten each SparseTensor object into 3 Tensors for session.run()."""
+ return list(flatten([[v.indices, v.values, v.shape]
+ if isinstance(v, tf.SparseTensor) else [v]
+ for v in tensors_list]))
+
+
+def _compare_output_to_expected(
+ tester, dict_tensors, expected_tensors, flat_output):
+ tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
+
+ i = 0 # Index into the flattened output of session.run()
+ for k, v in dict_tensors.iteritems():
+ expected_v = expected_tensors[k]
+ tf.logging.info("Comparing key: %s", k)
+ if isinstance(v, tf.SparseTensor):
+ # Three outputs for SparseTensor : indices, values, shape.
+ tester.assertEqual([k, 3], [k, len(expected_v)])
+ tester.assertAllEqual(flat_output[i], expected_v[0])
+ tester.assertAllEqual(flat_output[i + 1], expected_v[1])
+ tester.assertAllEqual(flat_output[i + 2], expected_v[2])
+ i += 3
+ else:
+ # One output for standard Tensor.
+ tester.assertAllEqual(flat_output[i], expected_v)
+ i += 1
+
+
+class ParseExampleTest(tf.test.TestCase):
+
+ def _test(self, kwargs, expected_values=None, expected_err_re=None):
+ with self.test_session() as sess:
+ # Pull out some keys to check shape inference
+ serialized = kwargs["serialized"]
+ dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else []
+ sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else []
+ dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else []
+
+ # Returns dict w/ Tensors and SparseTensors
+ out = tf.parse_example(**kwargs)
+
+ # Check shapes; if serialized is a Tensor we need its size to
+ # properly check.
+ batch_size = (
+ serialized.eval().size if isinstance(serialized, tf.Tensor)
+ else np.asarray(serialized).size)
+ self.assertEqual(len(dense_keys), len(dense_shapes))
+ for (k, s) in zip(dense_keys, dense_shapes):
+ self.assertEqual(tuple(out[k].get_shape().as_list()), (batch_size,) + s)
+ for k in sparse_keys:
+ self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 2))
+ self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
+ self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (2,))
+
+ # Check values
+ result = flatten_values_tensors_or_sparse(out.values()) # flatten values
+ if expected_err_re is None:
+ tf_result = sess.run(result)
+ _compare_output_to_expected(self, out, expected_values, tf_result)
+ else:
+ with self.assertRaisesOpError(expected_err_re):
+ sess.run(result)
+
+ def testEmptySerializedWithAllDefaults(self):
+ dense_keys = ["a", "b", "c"]
+ dense_shapes = [(1, 3), (3, 3), (2,)]
+ dense_types = [tf.int64, tf.string, tf.float32]
+ dense_defaults = {
+ "a": [0, 42, 0],
+ "b": np.random.rand(3, 3).astype(np.str),
+ "c": np.random.rand(2).astype(np.float32),
+ }
+
+ expected_st_a = ( # indices, values, shape
+ np.empty((0, 2), dtype=np.int64), # indices
+ np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
+ np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
+
+ expected_output = {
+ "st_a": expected_st_a,
+ "a": np.array(2 * [[dense_defaults["a"]]]),
+ "b": np.array(2 * [dense_defaults["b"]]),
+ "c": np.array(2 * [dense_defaults["c"]]),
+ }
+
+ self._test(
+ {
+ "names": np.empty((0,), dtype=np.str),
+ # empty serialized input Examples
+ "serialized": tf.convert_to_tensor(["", ""]),
+ "dense_defaults": dense_defaults,
+ "sparse_keys": ["st_a"],
+ "sparse_types": [tf.int64],
+ "dense_keys": dense_keys,
+ "dense_types": dense_types,
+ "dense_shapes": dense_shapes
+ }, expected_output)
+
+ def testEmptySerializedWithoutDefaultsShouldFail(self):
+ dense_shapes = [(1, 3), (3, 3), (2,)]
+ dense_defaults = {
+ "a": [0, 42, 0],
+ "b": np.random.rand(3, 3).astype(np.str),
+ # Feature "c" is missing, since there's gaps it will cause failure.
+ }
+ self._test(
+ {
+ "serialized": ["", ""], # empty serialized input Examples
+ "names": ["in1", "in2"],
+ "dense_defaults": dense_defaults,
+ "sparse_keys": ["st_a"],
+ "sparse_types": [tf.int64],
+ "dense_keys": ["a", "b", "c"],
+ "dense_types": [tf.int64, tf.string, tf.float32],
+ "dense_shapes": dense_shapes
+ },
+ expected_err_re="Name: in1, Feature: c is required")
+
+ def testDenseNotMatchingShapeShouldFail(self):
+ dense_shapes = [(1, 3)]
+ dense_defaults = {
+ # no default!
+ }
+
+ original = [
+ example(features=features({
+ "a": float_feature([1, 1, 3]),
+ })),
+ example(features=features({
+ "a": float_feature([-1, -1]),
+ }))
+ ]
+
+ names = ["passing", "failing"]
+ serialized = [m.SerializeToString() for m in original]
+
+ self._test(
+ {
+ "serialized": tf.convert_to_tensor(serialized),
+ "names": names,
+ "dense_defaults": dense_defaults,
+ "dense_keys": ["a"],
+ "dense_types": [tf.float32],
+ "dense_shapes": dense_shapes,
+ },
+ expected_err_re="Name: failing, Key: a. Number of float values")
+
+ def testSerializedContainingSparse(self):
+ original = [
+ example(features=features({
+ "st_c": float_feature([3, 4])
+ })),
+ example(features=features({
+ "st_c": float_feature([]), # empty float list
+ })),
+ example(features=features({
+ "st_d": feature(), # feature with nothing in it
+ })),
+ example(features=features({
+ "st_c": float_feature([1, 2, -1]),
+ "st_d": bytes_feature(["hi"])
+ }))
+ ]
+
+ serialized = [m.SerializeToString() for m in original]
+
+ expected_st_c = ( # indices, values, shape
+ np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
+ np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
+ np.array([4, 3], dtype=np.int64)) # batch == 2, max_elems = 3
+
+ expected_st_d = ( # indices, values, shape
+ np.array([[3, 0]], dtype=np.int64),
+ np.array(["hi"], dtype=np.str),
+ np.array([4, 1], dtype=np.int64)) # batch == 2, max_elems = 1
+
+ expected_output = {
+ "st_c": expected_st_c,
+ "st_d": expected_st_d,
+ }
+
+ self._test(
+ {
+ "serialized": tf.convert_to_tensor(serialized),
+ "sparse_keys": ["st_c", "st_d"],
+ "sparse_types": [tf.float32, tf.string],
+ }, expected_output)
+
+ def testSerializedContainingDense(self):
+ original = [
+ example(features=features({
+ "a": float_feature([1, 1]),
+ "b": bytes_feature(["b0_str"]),
+ })),
+ example(features=features({
+ "a": float_feature([-1, -1]),
+ "b": bytes_feature(["b1"]),
+ }))
+ ]
+
+ serialized = [m.SerializeToString() for m in original]
+
+ dense_shapes = [(1, 2, 1), (1, 1, 1, 1)]
+
+ expected_output = {
+ "a": np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
+ "b": np.array(["b0_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1),
+ }
+
+ # No defaults, values required
+ self._test(
+ {
+ "serialized": tf.convert_to_tensor(serialized),
+ "dense_keys": ["a", "b"],
+ "dense_types": [tf.float32, tf.string],
+ "dense_shapes": dense_shapes,
+ }, expected_output)
+
+ def testSerializedContainingDenseScalar(self):
+ original = [
+ example(features=features({
+ "a": float_feature([1]),
+ })),
+ example(features=features({}))
+ ]
+
+ serialized = [m.SerializeToString() for m in original]
+
+ expected_output = {
+ "a": np.array([[1], [-1]], dtype=np.float32) # 2x1 (column vector)
+ }
+
+ self._test(
+ {
+ "serialized": tf.convert_to_tensor(serialized),
+ "dense_defaults": {"a": -1},
+ "dense_shapes": [(1,)],
+ "dense_keys": ["a"],
+ "dense_types": [tf.float32],
+ }, expected_output)
+
+ def testSerializedContainingDenseWithDefaults(self):
+ original = [
+ example(features=features({
+ "a": float_feature([1, 1]),
+ })),
+ example(features=features({
+ "b": bytes_feature(["b1"]),
+ }))
+ ]
+
+ serialized = [m.SerializeToString() for m in original]
+
+ dense_shapes = [(1, 2, 1), (1, 1, 1, 1)]
+ dense_types = [tf.float32, tf.string]
+ dense_defaults = {
+ "a": [3.0, -3.0],
+ "b": "tmp_str",
+ }
+
+ expected_output = {
+ "a": np.array([[1, 1], [3, -3]], dtype=np.float32).reshape(2, 1, 2, 1),
+ "b": np.array(["tmp_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1),
+ }
+
+ self._test(
+ {
+ "serialized": tf.convert_to_tensor(serialized),
+ "dense_defaults": dense_defaults,
+ "dense_keys": ["a", "b"],
+ "dense_types": dense_types,
+ "dense_shapes": dense_shapes,
+ }, expected_output)
+
+ def testSerializedContainingSparseAndDenseWithNoDefault(self):
+ dense_defaults = {
+ "a": [1, 2, 3],
+ "b": np.random.rand(3, 3).astype(np.str),
+ # Feature "c" must be provided
+ }
+ dense_shapes = [(1, 3), (3, 3), (2,)]
+
+ expected_st_a = ( # indices, values, shape
+ np.empty((0, 2), dtype=np.int64), # indices
+ np.empty((0,), dtype=np.int64), # sp_a is DT_INT64
+ np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0
+
+ original = [
+ example(features=features({
+ "c": float_feature([3, 4])
+ })),
+ example(features=features({
+ "c": float_feature([1, 2])
+ }))
+ ]
+
+ names = ["in1", "in2"]
+ serialized = [m.SerializeToString() for m in original]
+
+ expected_output = {
+ "st_a": expected_st_a,
+ "a": np.array(2 * [[dense_defaults["a"]]]),
+ "b": np.array(2 * [dense_defaults["b"]]),
+ "c": np.array([[3, 4], [1, 2]], dtype=np.float32),
+ }
+
+ self._test(
+ {
+ "names": names,
+ "serialized": tf.convert_to_tensor(serialized),
+ "dense_defaults": dense_defaults,
+ "sparse_keys": ["st_a"],
+ "sparse_types": [tf.int64],
+ "dense_keys": ["a", "b", "c"],
+ "dense_types": [tf.int64, tf.string, tf.float32],
+ "dense_shapes": dense_shapes
+ }, expected_output)
+
+
+class ParseSingleExampleTest(tf.test.TestCase):
+
+ def _test(self, kwargs, expected_values=None, expected_err_re=None):
+ with self.test_session() as sess:
+ # Pull out some keys to check shape inference
+ dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else []
+ sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else []
+ dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else []
+
+ # Returns dict w/ Tensors and SparseTensors
+ out = tf.parse_single_example(**kwargs)
+
+ # Check shapes
+ self.assertEqual(len(dense_keys), len(dense_shapes))
+ for (k, s) in zip(dense_keys, dense_shapes):
+ self.assertEqual(tuple(out[k].get_shape()), s)
+ for k in sparse_keys:
+ self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 1))
+ self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
+ self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (1,))
+
+ # Check values
+ result = flatten_values_tensors_or_sparse(out.values()) # flatten values
+ if expected_err_re is None:
+ tf_result = sess.run(result)
+ _compare_output_to_expected(self, out, expected_values, tf_result)
+ else:
+ with self.assertRaisesOpError(expected_err_re):
+ sess.run(result)
+
+ def testSingleExampleWithSparseAndDense(self):
+ dense_types = [tf.int64, tf.string, tf.float32]
+ dense_shapes = [(1, 3), (3, 3), (2,)]
+ dense_defaults = {
+ "a": [1, 2, 3],
+ "b": np.random.rand(3, 3).astype(np.str),
+ # Feature "c" must be provided
+ }
+
+ original = example(features=features(
+ {"c": float_feature([3, 4]),
+ "st_a": float_feature([3.0, 4.0])}))
+
+ serialized = original.SerializeToString()
+
+ expected_st_a = (
+ np.array([[0], [1]], dtype=np.int64), # indices
+ np.array([3.0, 4.0], dtype=np.float32), # values
+ np.array([2], dtype=np.int64)) # shape: max_values = 2
+
+ expected_output = {
+ "st_a": expected_st_a,
+ "a": [dense_defaults["a"]],
+ "b": dense_defaults["b"],
+ "c": np.array([3, 4], dtype=np.float32),
+ }
+
+ self._test(
+ {
+ "names": "in1",
+ "serialized": tf.convert_to_tensor(serialized),
+ "dense_defaults": dense_defaults,
+ "dense_types": dense_types,
+ "sparse_keys": ["st_a"],
+ "sparse_types": [tf.float32],
+ "dense_keys": ["a", "b", "c"],
+ "dense_shapes": dense_shapes
+ }, expected_output)
+
+
+if __name__ == "__main__":
+ tf.test.main()