Fix tf.Example parsing when the Example feature name is provided, but no

value exists. In this case, if no default is given then the error should be correct. If a default *is* provided, it should be used. Also, reformatted the parsing_ops_test.py file (which doesn't lose history, since it hasn't changed much since my original version). Change: 129393762
author: Eugene Brevdo <ebrevdo@google.com> 2016-08-04 16:32:31 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-08-04 17:46:33 -0700
commit: d2589654ddc7bffc2f51eb5bebba2eb78f48a9a2 (patch)
tree: de82106f989305995b2d851a983a87bb7050e794 /tensorflow/python/kernel_tests/parsing_ops_test.py
parent: 6982c512b6491ff1424e67f16bc1ae68432c11ce (diff)
1 files changed, 421 insertions, 308 deletions
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py
index 58f6da9f97..52d3c0dde1 100644
--- a/tensorflow/python/kernel_tests/parsing_ops_test.py
+++ b/tensorflow/python/kernel_tests/parsing_ops_test.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Tests for tensorflow.ops.parsing_ops."""
 
 from __future__ import absolute_import
@@ -46,13 +45,13 @@ def flatten(list_of_lists):
 
 def flatten_values_tensors_or_sparse(tensors_list):
   """Flatten each SparseTensor object into 3 Tensors for session.run()."""
-  return list(flatten([[v.indices, v.values, v.shape]
-                       if isinstance(v, tf.SparseTensor) else [v]
-                       for v in tensors_list]))
+  return list(
+      flatten([[v.indices, v.values, v.shape] if isinstance(v, tf.SparseTensor)
+               else [v] for v in tensors_list]))
 
 
-def _compare_output_to_expected(
-    tester, dict_tensors, expected_tensors, flat_output):
+def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
+                                flat_output):
   tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
 
   i = 0  # Index into the flattened output of session.run()
@@ -74,11 +73,11 @@ def _compare_output_to_expected(
 
 class ParseExampleTest(tf.test.TestCase):
 
-  def _test(
-      self, kwargs, expected_values=None, expected_err=None):
+  def _test(self, kwargs, expected_values=None, expected_err=None):
     with self.test_session() as sess:
       if expected_err:
-        with self.assertRaisesRegexp(expected_err[0], expected_err[1]):
+        with self.assertRaisesWithPredicateMatch(
+            expected_err[0], expected_err[1]):
           out = tf.parse_example(**kwargs)
           sess.run(flatten_values_tensors_or_sparse(out.values()))
       else:
@@ -92,9 +91,8 @@ class ParseExampleTest(tf.test.TestCase):
       # Check shapes; if serialized is a Tensor we need its size to
       # properly check.
       serialized = kwargs["serialized"]
-      batch_size = (
-          serialized.eval().size if isinstance(serialized, tf.Tensor)
-          else np.asarray(serialized).size)
+      batch_size = (serialized.eval().size if isinstance(serialized, tf.Tensor)
+                    else np.asarray(serialized).size)
       for k, f in kwargs["features"].items():
         if isinstance(f, tf.FixedLenFeature) and f.shape is not None:
           self.assertEqual(
@@ -115,9 +113,12 @@ class ParseExampleTest(tf.test.TestCase):
     c_default = np.random.rand(2).astype(np.float32)
 
     expected_st_a = (  # indices, values, shape
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
-        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array(
+            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
 
     expected_output = {
         sparse_name: expected_st_a,
@@ -126,38 +127,63 @@ class ParseExampleTest(tf.test.TestCase):
         c_name: np.array(2 * [c_default]),
     }
 
-    self._test({
-        "example_names": np.empty((0,), dtype=bytes),
-        "serialized": tf.convert_to_tensor(["", ""]),
-        "features": {
-            sparse_name: tf.VarLenFeature(tf.int64),
-            a_name: tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            b_name: tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            c_name: tf.FixedLenFeature((2,), tf.float32, default_value=c_default),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": np.empty(
+                (0,), dtype=bytes),
+            "serialized": tf.convert_to_tensor(["", ""]),
+            "features": {
+                sparse_name: tf.VarLenFeature(tf.int64),
+                a_name: tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                b_name: tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                c_name: tf.FixedLenFeature(
+                    (2,), tf.float32, default_value=c_default),
+            }
+        },
+        expected_output)
 
   def testEmptySerializedWithoutDefaultsShouldFail(self):
-    self._test({
-        "example_names": ["in1", "in2"],
-        "serialized": ["", ""],
-        "features": {
-            "st_a": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=[0, 42, 0]),
-            "b": tf.FixedLenFeature(
-                (3, 3), tf.string,
-                default_value=np.random.rand(3, 3).astype(bytes)),
-            # Feature "c" is missing a default, this gap will cause failure.
-            "c": tf.FixedLenFeature((2,), dtype=tf.float32),
-        }
-    }, expected_err=(tf.OpError, "Name: in1, Feature: c is required"))
+    input_features = {
+        "st_a": tf.VarLenFeature(tf.int64),
+        "a": tf.FixedLenFeature(
+            (1, 3), tf.int64, default_value=[0, 42, 0]),
+        "b": tf.FixedLenFeature(
+            (3, 3),
+            tf.string,
+            default_value=np.random.rand(3, 3).astype(bytes)),
+        # Feature "c" is missing a default, this gap will cause failure.
+        "c": tf.FixedLenFeature(
+            (2,), dtype=tf.float32),
+    }
+
+    # Edge case where the key is there but the feature value is empty
+    original = example(features=features({
+        "c": feature()
+    }))
+    self._test(
+        {
+            "example_names": ["in1"],
+            "serialized": [original.SerializeToString()],
+            "features": input_features,
+        },
+        expected_err=(tf.OpError, "Name: in1, Feature: c is required"))
+
+    # Standard case of missing key and value.
+    self._test(
+        {
+            "example_names": ["in1", "in2"],
+            "serialized": ["", ""],
+            "features": input_features,
+        },
+        expected_err=(tf.OpError, "Name: in1, Feature: c is required"))
 
   def testDenseNotMatchingShapeShouldFail(self):
     original = [
         example(features=features({
             "a": float_feature([1, 1, 3]),
-        })),
-        example(features=features({
+        })), example(features=features({
             "a": float_feature([-1, -1]),
         }))
     ]
@@ -165,27 +191,27 @@ class ParseExampleTest(tf.test.TestCase):
     names = ["passing", "failing"]
     serialized = [m.SerializeToString() for m in original]
 
-    self._test({
-        "example_names": names,
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {"a": tf.FixedLenFeature((1, 3), tf.float32)}
-    }, expected_err=(
-        tf.OpError, "Name: failing, Key: a, Index: 1.  Number of float val"))
+    self._test(
+        {
+            "example_names": names,
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {"a": tf.FixedLenFeature((1, 3), tf.float32)}
+        },
+        expected_err=(tf.OpError,
+                      "Name: failing, Key: a, Index: 1.  Number of float val"))
 
   def testDenseDefaultNoShapeShouldFail(self):
-    original = [
-        example(features=features({
-            "a": float_feature([1, 1, 3]),
-        })),
-    ]
+    original = [example(features=features({"a": float_feature([1, 1, 3]),})),]
 
     serialized = [m.SerializeToString() for m in original]
 
-    self._test({
-        "example_names": ["failing"],
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {"a": tf.FixedLenFeature(None, tf.float32)}
-    }, expected_err=(ValueError, "Missing shape for feature a"))
+    self._test(
+        {
+            "example_names": ["failing"],
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {"a": tf.FixedLenFeature(None, tf.float32)}
+        },
+        expected_err=(ValueError, "Missing shape for feature a"))
 
   def testSerializedContainingSparse(self):
     original = [
@@ -207,14 +233,16 @@ class ParseExampleTest(tf.test.TestCase):
     serialized = [m.SerializeToString() for m in original]
 
     expected_st_c = (  # indices, values, shape
-        np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
-        np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
-        np.array([4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
+        np.array(
+            [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array(
+                [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array(
+                    [4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
 
     expected_st_d = (  # indices, values, shape
-        np.array([[3, 0]], dtype=np.int64),
-        np.array(["hi"], dtype=bytes),
-        np.array([4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
+        np.array(
+            [[3, 0]], dtype=np.int64), np.array(
+                ["hi"], dtype=bytes), np.array(
+                    [4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
 
     expected_output = {
         "st_c": expected_st_c,
@@ -236,8 +264,7 @@ class ParseExampleTest(tf.test.TestCase):
         example(features=features({
             aname: float_feature([1, 1]),
             bname: bytes_feature([b"b0_str"]),
-        })),
-        example(features=features({
+        })), example(features=features({
             aname: float_feature([-1, -1]),
             bname: bytes_feature([b"b1"]),
         }))
@@ -248,24 +275,28 @@ class ParseExampleTest(tf.test.TestCase):
     expected_output = {
         aname: np.array(
             [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
-        bname: np.array(["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
+        bname: np.array(
+            ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
     }
 
     # No defaults, values required
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            aname: tf.FixedLenFeature((1, 2, 1), dtype=tf.float32),
-            bname: tf.FixedLenFeature((1, 1, 1, 1), dtype=tf.string),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                aname: tf.FixedLenFeature(
+                    (1, 2, 1), dtype=tf.float32),
+                bname: tf.FixedLenFeature(
+                    (1, 1, 1, 1), dtype=tf.string),
+            }
+        },
+        expected_output)
 
   def testSerializedContainingDenseScalar(self):
     original = [
         example(features=features({
             "a": float_feature([1]),
-        })),
-        example(features=features({}))
+        })), example(features=features({}))
     ]
 
     serialized = [m.SerializeToString() for m in original]
@@ -274,12 +305,15 @@ class ParseExampleTest(tf.test.TestCase):
         "a": np.array([[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
     }
 
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "a": tf.FixedLenFeature((1,), dtype=tf.float32, default_value=-1),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "a": tf.FixedLenFeature(
+                    (1,), dtype=tf.float32, default_value=-1),
+            }
+        },
+        expected_output)
 
   def testSerializedContainingDenseWithDefaults(self):
     original = [
@@ -288,37 +322,46 @@ class ParseExampleTest(tf.test.TestCase):
         })),
         example(features=features({
             "b": bytes_feature([b"b1"]),
-        }))
+        })),
+        example(features=features({
+            "b": feature()
+        })),
     ]
 
     serialized = [m.SerializeToString() for m in original]
 
     expected_output = {
-        "a": np.array([[1, 1], [3, -3]], dtype=np.float32).reshape(2, 1, 2, 1),
-        "b": np.array(["tmp_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
+        "a": np.array(
+            [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2, 1),
+        "b": np.array(
+            ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1, 1),
     }
 
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "a": tf.FixedLenFeature(
-                (1, 2, 1), dtype=tf.float32, default_value=[3.0, -3.0]),
-            "b": tf.FixedLenFeature(
-                (1, 1, 1, 1), dtype=tf.string, default_value="tmp_str"),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "a": tf.FixedLenFeature(
+                    (1, 2, 1), dtype=tf.float32, default_value=[3.0, -3.0]),
+                "b": tf.FixedLenFeature(
+                    (1, 1, 1, 1), dtype=tf.string, default_value="tmp_str"),
+            }
+        },
+        expected_output)
 
   def testSerializedContainingSparseAndDenseWithNoDefault(self):
     expected_st_a = (  # indices, values, shape
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
-        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # sp_a is DT_INT64
+        np.array(
+            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
 
     original = [
         example(features=features({
             "c": float_feature([3, 4])
-        })),
-        example(features=features({
+        })), example(features=features({
             "c": float_feature([1, 2])
         }))
     ]
@@ -332,20 +375,25 @@ class ParseExampleTest(tf.test.TestCase):
         "st_a": expected_st_a,
         "a": np.array(2 * [[a_default]]),
         "b": np.array(2 * [b_default]),
-        "c": np.array([[3, 4], [1, 2]], dtype=np.float32),
+        "c": np.array(
+            [[3, 4], [1, 2]], dtype=np.float32),
     }
 
-    self._test({
-        "example_names": names,
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "st_a": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            "b": tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            # Feature "c" must be provided, since it has no default_value.
-            "c": tf.FixedLenFeature((2,), tf.float32),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": names,
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "st_a": tf.VarLenFeature(tf.int64),
+                "a": tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                "b": tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c": tf.FixedLenFeature((2,), tf.float32),
+            }
+        },
+        expected_output)
 
 
 class ParseSingleExampleTest(tf.test.TestCase):
@@ -353,7 +401,8 @@ class ParseSingleExampleTest(tf.test.TestCase):
   def _test(self, kwargs, expected_values=None, expected_err=None):
     with self.test_session() as sess:
       if expected_err:
-        with self.assertRaisesRegexp(expected_err[0], expected_err[1]):
+        with self.assertRaisesWithPredicateMatch(
+            expected_err[0], expected_err[1]):
           out = tf.parse_single_example(**kwargs)
           sess.run(flatten_values_tensors_or_sparse(out.values()))
       else:
@@ -374,16 +423,17 @@ class ParseSingleExampleTest(tf.test.TestCase):
           self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (1,))
 
   def testSingleExampleWithSparseAndDense(self):
-    original = example(features=features(
-        {"c": float_feature([3, 4]),
-         "st_a": float_feature([3.0, 4.0])}))
+    original = example(features=features({"c": float_feature([3, 4]),
+                                          "st_a": float_feature([3.0, 4.0])}))
 
     serialized = original.SerializeToString()
 
-    expected_st_a = (
-        np.array([[0], [1]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0], dtype=np.float32),  # values
-        np.array([2], dtype=np.int64))  # shape: max_values = 2
+    expected_st_a = (np.array(
+        [[0], [1]], dtype=np.int64),  # indices
+                     np.array(
+                         [3.0, 4.0], dtype=np.float32),  # values
+                     np.array(
+                         [2], dtype=np.int64))  # shape: max_values = 2
 
     a_default = [1, 2, 3]
     b_default = np.random.rand(3, 3).astype(bytes)
@@ -391,20 +441,25 @@ class ParseSingleExampleTest(tf.test.TestCase):
         "st_a": expected_st_a,
         "a": [a_default],
         "b": b_default,
-        "c": np.array([3, 4], dtype=np.float32),
+        "c": np.array(
+            [3, 4], dtype=np.float32),
     }
 
-    self._test({
-        "example_names": tf.convert_to_tensor("in1"),
-        "serialized": tf.convert_to_tensor(serialized),
-        "features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            "b": tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            # Feature "c" must be provided, since it has no default_value.
-            "c": tf.FixedLenFeature((2,), tf.float32),
-        }
-    }, expected_output)
+    self._test(
+        {
+            "example_names": tf.convert_to_tensor("in1"),
+            "serialized": tf.convert_to_tensor(serialized),
+            "features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "a": tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                "b": tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c": tf.FixedLenFeature((2,), tf.float32),
+            }
+        },
+        expected_output)
 
 
 class ParseSequenceExampleTest(tf.test.TestCase):
@@ -413,26 +468,31 @@ class ParseSequenceExampleTest(tf.test.TestCase):
     value = sequence_example(
         context=features({
             "global_feature": float_feature([1, 2, 3]),
-            }),
+        }),
         feature_lists=feature_lists({
             "repeated_feature_2_frames": feature_list([
                 bytes_feature([b"a", b"b", b"c"]),
-                bytes_feature([b"a", b"d", b"e"])]),
+                bytes_feature([b"a", b"d", b"e"])
+            ]),
             "repeated_feature_3_frames": feature_list([
-                int64_feature([3, 4, 5, 6, 7]),
-                int64_feature([-1, 0, 0, 0, 0]),
-                int64_feature([1, 2, 3, 4, 5])])
-            }))
+                int64_feature([3, 4, 5, 6, 7]), int64_feature([-1, 0, 0, 0, 0]),
+                int64_feature([1, 2, 3, 4, 5])
+            ])
+        }))
     value.SerializeToString()  # Smoke test
 
-  def _test(self, kwargs, expected_context_values=None,
-            expected_feat_list_values=None, expected_err=None):
+  def _test(self,
+            kwargs,
+            expected_context_values=None,
+            expected_feat_list_values=None,
+            expected_err=None):
     expected_context_values = expected_context_values or {}
     expected_feat_list_values = expected_feat_list_values or {}
 
     with self.test_session() as sess:
       if expected_err:
-        with self.assertRaisesRegexp(expected_err[0], expected_err[1]):
+        with self.assertRaisesWithPredicateMatch(
+            expected_err[0], expected_err[1]):
           c_out, fl_out = tf.parse_single_sequence_example(**kwargs)
           if c_out:
             sess.run(flatten_values_tensors_or_sparse(c_out.values()))
@@ -442,16 +502,16 @@ class ParseSequenceExampleTest(tf.test.TestCase):
         # Returns dicts w/ Tensors and SparseTensors.
         context_out, feat_list_out = tf.parse_single_sequence_example(**kwargs)
         context_result = sess.run(
-            flatten_values_tensors_or_sparse(
-                context_out.values())) if context_out else []
+            flatten_values_tensors_or_sparse(context_out.values(
+            ))) if context_out else []
         feat_list_result = sess.run(
-            flatten_values_tensors_or_sparse(
-                feat_list_out.values())) if feat_list_out else []
+            flatten_values_tensors_or_sparse(feat_list_out.values(
+            ))) if feat_list_out else []
         # Check values.
-        _compare_output_to_expected(
-            self, context_out, expected_context_values, context_result)
-        _compare_output_to_expected(
-            self, feat_list_out, expected_feat_list_values, feat_list_result)
+        _compare_output_to_expected(self, context_out, expected_context_values,
+                                    context_result)
+        _compare_output_to_expected(self, feat_list_out,
+                                    expected_feat_list_values, feat_list_result)
 
       # Check shapes; if serialized is a Tensor we need its size to
       # properly check.
@@ -469,16 +529,18 @@ class ParseSequenceExampleTest(tf.test.TestCase):
                 tuple(context_out[k].shape.get_shape().as_list()), (1,))
 
   def testSequenceExampleWithSparseAndDenseContext(self):
-    original = sequence_example(context=features(
-        {"c": float_feature([3, 4]),
-         "st_a": float_feature([3.0, 4.0])}))
+    original = sequence_example(context=features({"c": float_feature([3, 4]),
+                                                  "st_a": float_feature(
+                                                      [3.0, 4.0])}))
 
     serialized = original.SerializeToString()
 
-    expected_st_a = (
-        np.array([[0], [1]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0], dtype=np.float32),  # values
-        np.array([2], dtype=np.int64))  # shape: num_features = 2
+    expected_st_a = (np.array(
+        [[0], [1]], dtype=np.int64),  # indices
+                     np.array(
+                         [3.0, 4.0], dtype=np.float32),  # values
+                     np.array(
+                         [2], dtype=np.int64))  # shape: num_features = 2
 
     a_default = [1, 2, 3]
     b_default = np.random.rand(3, 3).astype(bytes)
@@ -486,20 +548,25 @@ class ParseSequenceExampleTest(tf.test.TestCase):
         "st_a": expected_st_a,
         "a": [a_default],
         "b": b_default,
-        "c": np.array([3, 4], dtype=np.float32),
+        "c": np.array(
+            [3, 4], dtype=np.float32),
     }
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "context_features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "a": tf.FixedLenFeature((1, 3), tf.int64, default_value=a_default),
-            "b": tf.FixedLenFeature((3, 3), tf.string, default_value=b_default),
-            # Feature "c" must be provided, since it has no default_value.
-            "c": tf.FixedLenFeature((2,), tf.float32),
-        }
-    }, expected_context_values=expected_context_output)
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "context_features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "a": tf.FixedLenFeature(
+                    (1, 3), tf.int64, default_value=a_default),
+                "b": tf.FixedLenFeature(
+                    (3, 3), tf.string, default_value=b_default),
+                # Feature "c" must be provided, since it has no default_value.
+                "c": tf.FixedLenFeature((2,), tf.float32),
+            }
+        },
+        expected_context_values=expected_context_output)
 
   def testSequenceExampleWithMultipleSizeFeatureLists(self):
     original = sequence_example(feature_lists=feature_lists({
@@ -507,229 +574,274 @@ class ParseSequenceExampleTest(tf.test.TestCase):
             int64_feature([-1, 0, 1]),
             int64_feature([2, 3, 4]),
             int64_feature([5, 6, 7]),
-            int64_feature([8, 9, 10]),]),
+            int64_feature([8, 9, 10]),
+        ]),
         "b": feature_list([
-            bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
+            bytes_feature([b"r00", b"r01", b"r10", b"r11"])
+        ]),
         "c": feature_list([
-            float_feature([3, 4]),
-            float_feature([-1, 2])]),
-        }))
+            float_feature([3, 4]), float_feature([-1, 2])
+        ]),
+    }))
 
     serialized = original.SerializeToString()
 
     expected_feature_list_output = {
-        "a": np.array([  # outer dimension is time.
-            [[-1, 0, 1]],  # inside are 1x3 matrices
-            [[2, 3, 4]],
-            [[5, 6, 7]],
-            [[8, 9, 10]]], dtype=np.int64),
-        "b": np.array([  # outer dimension is time, inside are 2x2 matrices
-            [[b"r00", b"r01"], [b"r10", b"r11"]]], dtype=bytes),
-        "c": np.array([  # outer dimension is time, inside are 2-vectors
-            [3, 4],
-            [-1, 2]], dtype=np.float32),
-        "d": np.empty(shape=(0, 5), dtype=np.float32),  # empty_allowed_missing
-        }
+        "a": np.array(
+            [  # outer dimension is time.
+                [[-1, 0, 1]],  # inside are 1x3 matrices
+                [[2, 3, 4]],
+                [[5, 6, 7]],
+                [[8, 9, 10]]
+            ],
+            dtype=np.int64),
+        "b": np.array(
+            [  # outer dimension is time, inside are 2x2 matrices
+                [[b"r00", b"r01"], [b"r10", b"r11"]]
+            ],
+            dtype=bytes),
+        "c": np.array(
+            [  # outer dimension is time, inside are 2-vectors
+                [3, 4], [-1, 2]
+            ],
+            dtype=np.float32),
+        "d": np.empty(
+            shape=(0, 5), dtype=np.float32),  # empty_allowed_missing
+    }
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {
-            "a": tf.FixedLenSequenceFeature((1, 3), tf.int64),
-            "b": tf.FixedLenSequenceFeature((2, 2), tf.string),
-            "c": tf.FixedLenSequenceFeature((2,), tf.float32),
-            "d": tf.FixedLenSequenceFeature((5,), tf.float32, allow_missing=True),
-        }
-    }, expected_feat_list_values=expected_feature_list_output)
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {
+                "a": tf.FixedLenSequenceFeature((1, 3), tf.int64),
+                "b": tf.FixedLenSequenceFeature((2, 2), tf.string),
+                "c": tf.FixedLenSequenceFeature((2,), tf.float32),
+                "d": tf.FixedLenSequenceFeature(
+                    (5,), tf.float32, allow_missing=True),
+            }
+        },
+        expected_feat_list_values=expected_feature_list_output)
 
   def testSequenceExampleWithoutDebugName(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([3, 4]),
-            int64_feature([1, 0])]),
+            int64_feature([3, 4]), int64_feature([1, 0])
+        ]),
         "st_a": feature_list([
-            float_feature([3.0, 4.0]),
-            float_feature([5.0]),
-            float_feature([])]),
+            float_feature([3.0, 4.0]), float_feature([5.0]), float_feature([])
+        ]),
         "st_b": feature_list([
-            bytes_feature([b"a"]),
-            bytes_feature([]),
-            bytes_feature([]),
-            bytes_feature([b"b", b"c"])])}))
+            bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
+            bytes_feature([b"b", b"c"])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
     expected_st_a = (
-        np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0, 5.0], dtype=np.float32),  # values
-        np.array([3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
+        np.array(
+            [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
+        np.array(
+            [3.0, 4.0, 5.0], dtype=np.float32),  # values
+        np.array(
+            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
 
     expected_st_b = (
-        np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
-        np.array(["a", "b", "c"], dtype="|S"),  # values
-        np.array([4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
+        np.array(
+            [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
+        np.array(
+            ["a", "b", "c"], dtype="|S"),  # values
+        np.array(
+            [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
 
     expected_st_c = (
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # values
-        np.array([0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # values
+        np.array(
+            [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
 
     expected_feature_list_output = {
-        "a": np.array([[3, 4], [1, 0]], dtype=np.int64),
+        "a": np.array(
+            [[3, 4], [1, 0]], dtype=np.int64),
         "st_a": expected_st_a,
         "st_b": expected_st_b,
         "st_c": expected_st_c,
     }
 
-    self._test({
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "st_b": tf.VarLenFeature(tf.string),
-            "st_c": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenSequenceFeature((2,), tf.int64),
-        }
-    }, expected_feat_list_values=expected_feature_list_output)
+    self._test(
+        {
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "st_b": tf.VarLenFeature(tf.string),
+                "st_c": tf.VarLenFeature(tf.int64),
+                "a": tf.FixedLenSequenceFeature((2,), tf.int64),
+            }
+        },
+        expected_feat_list_values=expected_feature_list_output)
 
   def testSequenceExampleWithSparseAndDenseFeatureLists(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([3, 4]),
-            int64_feature([1, 0])]),
+            int64_feature([3, 4]), int64_feature([1, 0])
+        ]),
         "st_a": feature_list([
-            float_feature([3.0, 4.0]),
-            float_feature([5.0]),
-            float_feature([])]),
+            float_feature([3.0, 4.0]), float_feature([5.0]), float_feature([])
+        ]),
         "st_b": feature_list([
-            bytes_feature([b"a"]),
-            bytes_feature([]),
-            bytes_feature([]),
-            bytes_feature([b"b", b"c"])])}))
+            bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
+            bytes_feature([b"b", b"c"])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
     expected_st_a = (
-        np.array([[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
-        np.array([3.0, 4.0, 5.0], dtype=np.float32),  # values
-        np.array([3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
+        np.array(
+            [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
+        np.array(
+            [3.0, 4.0, 5.0], dtype=np.float32),  # values
+        np.array(
+            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
 
     expected_st_b = (
-        np.array([[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
-        np.array(["a", "b", "c"], dtype="|S"),  # values
-        np.array([4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
+        np.array(
+            [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
+        np.array(
+            ["a", "b", "c"], dtype="|S"),  # values
+        np.array(
+            [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
 
     expected_st_c = (
-        np.empty((0, 2), dtype=np.int64),  # indices
-        np.empty((0,), dtype=np.int64),  # values
-        np.array([0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
+        np.empty(
+            (0, 2), dtype=np.int64),  # indices
+        np.empty(
+            (0,), dtype=np.int64),  # values
+        np.array(
+            [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
 
     expected_feature_list_output = {
-        "a": np.array([[3, 4], [1, 0]], dtype=np.int64),
+        "a": np.array(
+            [[3, 4], [1, 0]], dtype=np.int64),
         "st_a": expected_st_a,
         "st_b": expected_st_b,
         "st_c": expected_st_c,
     }
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {
-            "st_a": tf.VarLenFeature(tf.float32),
-            "st_b": tf.VarLenFeature(tf.string),
-            "st_c": tf.VarLenFeature(tf.int64),
-            "a": tf.FixedLenSequenceFeature((2,), tf.int64),
-        }
-    }, expected_feat_list_values=expected_feature_list_output)
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {
+                "st_a": tf.VarLenFeature(tf.float32),
+                "st_b": tf.VarLenFeature(tf.string),
+                "st_c": tf.VarLenFeature(tf.int64),
+                "a": tf.FixedLenSequenceFeature((2,), tf.int64),
+            }
+        },
+        expected_feat_list_values=expected_feature_list_output)
 
   def testSequenceExampleListWithInconsistentDataFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([-1, 0]),
-            float_feature([2, 3])])
-        }))
+            int64_feature([-1, 0]), float_feature([2, 3])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Feature list: a, Index: 1."
-        "  Data types don't match. Expected type: int64"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError, "Feature list: a, Index: 1."
+                      "  Data types don't match. Expected type: int64"))
 
   def testSequenceExampleListWithWrongDataTypeFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            float_feature([2, 3])])
-        }))
+            float_feature([2, 3])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Feature list: a, Index: 0.  Data types don't match."
-        " Expected type: int64"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError,
+                      "Feature list: a, Index: 0.  Data types don't match."
+                      " Expected type: int64"))
 
   def testSequenceExampleListWithWrongSparseDataTypeFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([3, 4]),
-            int64_feature([1, 2]),
-            float_feature([2.0, 3.0])])
-        }))
+            int64_feature([3, 4]), int64_feature([1, 2]),
+            float_feature([2.0, 3.0])
+        ])
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Name: in1, Feature list: a, Index: 2."
-        "  Data types don't match. Expected type: int64"
-        "  Feature is: float_list"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError, "Name: in1, Feature list: a, Index: 2."
+                      "  Data types don't match. Expected type: int64"
+                      "  Feature is: float_list"))
 
   def testSequenceExampleListWithWrongShapeFails(self):
     original = sequence_example(feature_lists=feature_lists({
         "a": feature_list([
-            int64_feature([2, 3]),
-            int64_feature([2, 3, 4])]),
-        }))
+            int64_feature([2, 3]), int64_feature([2, 3, 4])
+        ]),
+    }))
 
     serialized = original.SerializeToString()
 
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(serialized),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        r"Name: in1, Key: a, Index: 1."
-        r"  Number of int64 values != expected."
-        r"  values size: 3 but output shape: \[2\]"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(serialized),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(tf.OpError, r"Name: in1, Key: a, Index: 1."
+                      r"  Number of int64 values != expected."
+                      r"  values size: 3 but output shape: \[2\]"))
 
   def testSequenceExampleWithMissingFeatureListFails(self):
     original = sequence_example(feature_lists=feature_lists({}))
 
     # Test fails because we didn't add:
     #  feature_list_dense_defaults = {"a": None}
-    self._test({
-        "example_name": "in1",
-        "serialized": tf.convert_to_tensor(original.SerializeToString()),
-        "sequence_features": {"a": tf.FixedLenSequenceFeature((2,), tf.int64)}
-    }, expected_err=(
-        tf.OpError,
-        "Name: in1, Feature list 'a' is required but could not be found."
-        "  Did you mean to include it in"
-        " feature_list_dense_missing_assumed_empty or"
-        " feature_list_dense_defaults?"))
+    self._test(
+        {
+            "example_name": "in1",
+            "serialized": tf.convert_to_tensor(original.SerializeToString()),
+            "sequence_features": {"a": tf.FixedLenSequenceFeature(
+                (2,), tf.int64)}
+        },
+        expected_err=(
+            tf.OpError,
+            "Name: in1, Feature list 'a' is required but could not be found."
+            "  Did you mean to include it in"
+            " feature_list_dense_missing_assumed_empty or"
+            " feature_list_dense_defaults?"))
 
 
 class DecodeJSONExampleTest(tf.test.TestCase):
@@ -740,14 +852,15 @@ class DecodeJSONExampleTest(tf.test.TestCase):
 
       json_tensor = tf.constant(
           [json_format.MessageToJson(m) for m in examples.flatten()],
-          shape=examples.shape, dtype=tf.string)
+          shape=examples.shape,
+          dtype=tf.string)
       binary_tensor = tf.decode_json_example(json_tensor)
       binary_val = sess.run(binary_tensor)
 
       if examples.shape:
         self.assertShapeEqual(binary_val, json_tensor)
-        for input_example, output_binary in zip(np.array(examples).flatten(),
-                                                binary_val.flatten()):
+        for input_example, output_binary in zip(
+            np.array(examples).flatten(), binary_val.flatten()):
           output_example = tf.train.Example()
           output_example.ParseFromString(output_binary)
           self.assertProtoEquals(input_example, output_example)
author	Eugene Brevdo <ebrevdo@google.com>	2016-08-04 16:32:31 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-08-04 17:46:33 -0700
commit	d2589654ddc7bffc2f51eb5bebba2eb78f48a9a2 (patch)
tree	de82106f989305995b2d851a983a87bb7050e794 /tensorflow/python/kernel_tests/parsing_ops_test.py
parent	6982c512b6491ff1424e67f16bc1ae68432c11ce (diff)