1 files changed, 414 insertions, 0 deletions
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
new file mode 100644
index 0000000000..fba7c705fb
--- /dev/null
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -0,0 +1,414 @@
+"""Tests for tensorflow.ops.parsing_ops."""
+
+import itertools
+
+import tensorflow.python.platform
+
+import numpy as np
+import tensorflow as tf
+
+# Helpers for creating Example objects
+example = tf.train.Example
+feature = tf.train.Feature
+features = lambda d: tf.train.Features(feature=d)
+bytes_feature = lambda v: feature(bytes_list=tf.train.BytesList(value=v))
+int64_feature = lambda v: feature(int64_list=tf.train.Int64List(value=v))
+float_feature = lambda v: feature(float_list=tf.train.FloatList(value=v))
+
+
+def flatten(list_of_lists):
+  """Flatten one level of nesting."""
+  return itertools.chain.from_iterable(list_of_lists)
+
+
+def flatten_values_tensors_or_sparse(tensors_list):
+  """Flatten each SparseTensor object into 3 Tensors for session.run()."""
+  return list(flatten([[v.indices, v.values, v.shape]
+                       if isinstance(v, tf.SparseTensor) else [v]
+                       for v in tensors_list]))
+
+
+def _compare_output_to_expected(
+    tester, dict_tensors, expected_tensors, flat_output):
+  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
+
+  i = 0  # Index into the flattened output of session.run()
+  for k, v in dict_tensors.iteritems():
+    expected_v = expected_tensors[k]
+    tf.logging.info("Comparing key: %s", k)
+    if isinstance(v, tf.SparseTensor):
+      # Three outputs for SparseTensor : indices, values, shape.
+      tester.assertEqual([k, 3], [k, len(expected_v)])
+      tester.assertAllEqual(flat_output[i], expected_v[0])
+      tester.assertAllEqual(flat_output[i + 1], expected_v[1])
+      tester.assertAllEqual(flat_output[i + 2], expected_v[2])
+      i += 3
+    else:
+      # One output for standard Tensor.
+      tester.assertAllEqual(flat_output[i], expected_v)
+      i += 1
+
+
+class ParseExampleTest(tf.test.TestCase):
+
+  def _test(self, kwargs, expected_values=None, expected_err_re=None):
+    with self.test_session() as sess:
+      # Pull out some keys to check shape inference
+      serialized = kwargs["serialized"]
+      dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else []
+      sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else []
+      dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else []
+
+      # Returns dict w/ Tensors and SparseTensors
+      out = tf.parse_example(**kwargs)
+
+      # Check shapes; if serialized is a Tensor we need its size to
+      # properly check.
+      batch_size = (
+          serialized.eval().size if isinstance(serialized, tf.Tensor)
+          else np.asarray(serialized).size)
+      self.assertEqual(len(dense_keys), len(dense_shapes))
+      for (k, s) in zip(dense_keys, dense_shapes):
+        self.assertEqual(tuple(out[k].get_shape().as_list()), (batch_size,) + s)
+      for k in sparse_keys:
+        self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 2))
+        self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
+        self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (2,))
+
+      # Check values
+      result = flatten_values_tensors_or_sparse(out.values())  # flatten values
+      if expected_err_re is None:
+        tf_result = sess.run(result)
+        _compare_output_to_expected(self, out, expected_values, tf_result)
+      else:
+        with self.assertRaisesOpError(expected_err_re):
+          sess.run(result)
+
+  def testEmptySerializedWithAllDefaults(self):
+    dense_keys = ["a", "b", "c"]
+    dense_shapes = [(1, 3), (3, 3), (2,)]
+    dense_types = [tf.int64, tf.string, tf.float32]
+    dense_defaults = {
+        "a": [0, 42, 0],
+        "b": np.random.rand(3, 3).astype(np.str),
+        "c": np.random.rand(2).astype(np.float32),
+    }
+
+    expected_st_a = (  # indices, values, shape
+        np.empty((0, 2), dtype=np.int64),  # indices
+        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
+
+    expected_output = {
+        "st_a": expected_st_a,
+        "a": np.array(2 * [[dense_defaults["a"]]]),
+        "b": np.array(2 * [dense_defaults["b"]]),
+        "c": np.array(2 * [dense_defaults["c"]]),
+    }
+
+    self._test(
+        {
+            "names": np.empty((0,), dtype=np.str),
+            # empty serialized input Examples
+            "serialized": tf.convert_to_tensor(["", ""]),
+            "dense_defaults": dense_defaults,
+            "sparse_keys": ["st_a"],
+            "sparse_types": [tf.int64],
+            "dense_keys": dense_keys,
+            "dense_types": dense_types,
+            "dense_shapes": dense_shapes
+        }, expected_output)
+
+  def testEmptySerializedWithoutDefaultsShouldFail(self):
+    dense_shapes = [(1, 3), (3, 3), (2,)]
+    dense_defaults = {
+        "a": [0, 42, 0],
+        "b": np.random.rand(3, 3).astype(np.str),
+        # Feature "c" is missing, since there's gaps it will cause failure.
+    }
+    self._test(
+        {
+            "serialized": ["", ""],  # empty serialized input Examples
+            "names": ["in1", "in2"],
+            "dense_defaults": dense_defaults,
+            "sparse_keys": ["st_a"],
+            "sparse_types": [tf.int64],
+            "dense_keys": ["a", "b", "c"],
+            "dense_types": [tf.int64, tf.string, tf.float32],
+            "dense_shapes": dense_shapes
+        },
+        expected_err_re="Name: in1, Feature: c is required")
+
+  def testDenseNotMatchingShapeShouldFail(self):
+    dense_shapes = [(1, 3)]
+    dense_defaults = {
+        # no default!
+    }
+
+    original = [
+        example(features=features({
+            "a": float_feature([1, 1, 3]),
+        })),
+        example(features=features({
+            "a": float_feature([-1, -1]),
+        }))
+    ]
+
+    names = ["passing", "failing"]
+    serialized = [m.SerializeToString() for m in original]
+
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "names": names,
+            "dense_defaults": dense_defaults,
+            "dense_keys": ["a"],
+            "dense_types": [tf.float32],
+            "dense_shapes": dense_shapes,
+        },
+        expected_err_re="Name: failing, Key: a.  Number of float values")
+
+  def testSerializedContainingSparse(self):
+    original = [
+        example(features=features({
+            "st_c": float_feature([3, 4])
+        })),
+        example(features=features({
+            "st_c": float_feature([]),  # empty float list
+        })),
+        example(features=features({
+            "st_d": feature(),  # feature with nothing in it
+        })),
+        example(features=features({
+            "st_c": float_feature([1, 2, -1]),
+            "st_d": bytes_feature(["hi"])
+        }))
+    ]
+
+    serialized = [m.SerializeToString() for m in original]
+
+    expected_st_c = (  # indices, values, shape
+        np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
+        np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
+        np.array([4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
+
+    expected_st_d = (  # indices, values, shape
+        np.array([[3, 0]], dtype=np.int64),
+        np.array(["hi"], dtype=np.str),
+        np.array([4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
+
+    expected_output = {
+        "st_c": expected_st_c,
+        "st_d": expected_st_d,
+    }
+
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "sparse_keys": ["st_c", "st_d"],
+            "sparse_types": [tf.float32, tf.string],
+        }, expected_output)
+
+  def testSerializedContainingDense(self):
+    original = [
+        example(features=features({
+            "a": float_feature([1, 1]),
+            "b": bytes_feature(["b0_str"]),
+        })),
+        example(features=features({
+            "a": float_feature([-1, -1]),
+            "b": bytes_feature(["b1"]),
+        }))
+    ]
+
+    serialized = [m.SerializeToString() for m in original]
+
+    dense_shapes = [(1, 2, 1), (1, 1, 1, 1)]
+
+    expected_output = {
+        "a": np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
+        "b": np.array(["b0_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1),
+    }
+
+    # No defaults, values required
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "dense_keys": ["a", "b"],
+            "dense_types": [tf.float32, tf.string],
+            "dense_shapes": dense_shapes,
+        }, expected_output)
+
+  def testSerializedContainingDenseScalar(self):
+    original = [
+        example(features=features({
+            "a": float_feature([1]),
+        })),
+        example(features=features({}))
+    ]
+
+    serialized = [m.SerializeToString() for m in original]
+
+    expected_output = {
+        "a": np.array([[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
+    }
+
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "dense_defaults": {"a": -1},
+            "dense_shapes": [(1,)],
+            "dense_keys": ["a"],
+            "dense_types": [tf.float32],
+        }, expected_output)
+
+  def testSerializedContainingDenseWithDefaults(self):
+    original = [
+        example(features=features({
+            "a": float_feature([1, 1]),
+        })),
+        example(features=features({
+            "b": bytes_feature(["b1"]),
+        }))
+    ]
+
+    serialized = [m.SerializeToString() for m in original]
+
+    dense_shapes = [(1, 2, 1), (1, 1, 1, 1)]
+    dense_types = [tf.float32, tf.string]
+    dense_defaults = {
+        "a": [3.0, -3.0],
+        "b": "tmp_str",
+    }
+
+    expected_output = {
+        "a": np.array([[1, 1], [3, -3]], dtype=np.float32).reshape(2, 1, 2, 1),
+        "b": np.array(["tmp_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1),
+    }
+
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "dense_defaults": dense_defaults,
+            "dense_keys": ["a", "b"],
+            "dense_types": dense_types,
+            "dense_shapes": dense_shapes,
+        }, expected_output)
+
+  def testSerializedContainingSparseAndDenseWithNoDefault(self):
+    dense_defaults = {
+        "a": [1, 2, 3],
+        "b": np.random.rand(3, 3).astype(np.str),
+        # Feature "c" must be provided
+    }
+    dense_shapes = [(1, 3), (3, 3), (2,)]
+
+    expected_st_a = (  # indices, values, shape
+        np.empty((0, 2), dtype=np.int64),  # indices
+        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
+
+    original = [
+        example(features=features({
+            "c": float_feature([3, 4])
+        })),
+        example(features=features({
+            "c": float_feature([1, 2])
+        }))
+    ]
+
+    names = ["in1", "in2"]
+    serialized = [m.SerializeToString() for m in original]
+
+    expected_output = {
+        "st_a": expected_st_a,
+        "a": np.array(2 * [[dense_defaults["a"]]]),
+        "b": np.array(2 * [dense_defaults["b"]]),
+        "c": np.array([[3, 4], [1, 2]], dtype=np.float32),
+    }
+
+    self._test(
+        {
+            "names": names,
+            "serialized": tf.convert_to_tensor(serialized),
+            "dense_defaults": dense_defaults,
+            "sparse_keys": ["st_a"],
+            "sparse_types": [tf.int64],
+            "dense_keys": ["a", "b", "c"],
+            "dense_types": [tf.int64, tf.string, tf.float32],
+            "dense_shapes": dense_shapes
+        }, expected_output)
+
+
+class ParseSingleExampleTest(tf.test.TestCase):
+
+  def _test(self, kwargs, expected_values=None, expected_err_re=None):
+    with self.test_session() as sess:
+      # Pull out some keys to check shape inference
+      dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else []
+      sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else []
+      dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else []
+
+      # Returns dict w/ Tensors and SparseTensors
+      out = tf.parse_single_example(**kwargs)
+
+      # Check shapes
+      self.assertEqual(len(dense_keys), len(dense_shapes))
+      for (k, s) in zip(dense_keys, dense_shapes):
+        self.assertEqual(tuple(out[k].get_shape()), s)
+      for k in sparse_keys:
+        self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 1))
+        self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
+        self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (1,))
+
+      # Check values
+      result = flatten_values_tensors_or_sparse(out.values())  # flatten values
+      if expected_err_re is None:
+        tf_result = sess.run(result)
+        _compare_output_to_expected(self, out, expected_values, tf_result)
+      else:
+        with self.assertRaisesOpError(expected_err_re):
+          sess.run(result)
+
+  def testSingleExampleWithSparseAndDense(self):
+    dense_types = [tf.int64, tf.string, tf.float32]
+    dense_shapes = [(1, 3), (3, 3), (2,)]
+    dense_defaults = {
+        "a": [1, 2, 3],
+        "b": np.random.rand(3, 3).astype(np.str),
+        # Feature "c" must be provided
+    }
+
+    original = example(features=features(
+        {"c": float_feature([3, 4]),
+         "st_a": float_feature([3.0, 4.0])}))
+
+    serialized = original.SerializeToString()
+
+    expected_st_a = (
+        np.array([[0], [1]], dtype=np.int64),  # indices
+        np.array([3.0, 4.0], dtype=np.float32),  # values
+        np.array([2], dtype=np.int64))  # shape: max_values = 2
+
+    expected_output = {
+        "st_a": expected_st_a,
+        "a": [dense_defaults["a"]],
+        "b": dense_defaults["b"],
+        "c": np.array([3, 4], dtype=np.float32),
+    }
+
+    self._test(
+        {
+            "names": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "dense_defaults": dense_defaults,
+            "dense_types": dense_types,
+            "sparse_keys": ["st_a"],
+            "sparse_types": [tf.float32],
+            "dense_keys": ["a", "b", "c"],
+            "dense_shapes": dense_shapes
+        }, expected_output)
+
+
+if __name__ == "__main__":
+  tf.test.main()