diff options
Diffstat (limited to 'tensorflow/python/kernel_tests/parsing_ops_test.py')
-rw-r--r-- | tensorflow/python/kernel_tests/parsing_ops_test.py | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/tensorflow/python/kernel_tests/parsing_ops_test.py b/tensorflow/python/kernel_tests/parsing_ops_test.py new file mode 100644 index 0000000000..fba7c705fb --- /dev/null +++ b/tensorflow/python/kernel_tests/parsing_ops_test.py @@ -0,0 +1,414 @@ +"""Tests for tensorflow.ops.parsing_ops.""" + +import itertools + +import tensorflow.python.platform + +import numpy as np +import tensorflow as tf + +# Helpers for creating Example objects +example = tf.train.Example +feature = tf.train.Feature +features = lambda d: tf.train.Features(feature=d) +bytes_feature = lambda v: feature(bytes_list=tf.train.BytesList(value=v)) +int64_feature = lambda v: feature(int64_list=tf.train.Int64List(value=v)) +float_feature = lambda v: feature(float_list=tf.train.FloatList(value=v)) + + +def flatten(list_of_lists): + """Flatten one level of nesting.""" + return itertools.chain.from_iterable(list_of_lists) + + +def flatten_values_tensors_or_sparse(tensors_list): + """Flatten each SparseTensor object into 3 Tensors for session.run().""" + return list(flatten([[v.indices, v.values, v.shape] + if isinstance(v, tf.SparseTensor) else [v] + for v in tensors_list])) + + +def _compare_output_to_expected( + tester, dict_tensors, expected_tensors, flat_output): + tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys())) + + i = 0 # Index into the flattened output of session.run() + for k, v in dict_tensors.iteritems(): + expected_v = expected_tensors[k] + tf.logging.info("Comparing key: %s", k) + if isinstance(v, tf.SparseTensor): + # Three outputs for SparseTensor : indices, values, shape. + tester.assertEqual([k, 3], [k, len(expected_v)]) + tester.assertAllEqual(flat_output[i], expected_v[0]) + tester.assertAllEqual(flat_output[i + 1], expected_v[1]) + tester.assertAllEqual(flat_output[i + 2], expected_v[2]) + i += 3 + else: + # One output for standard Tensor. + tester.assertAllEqual(flat_output[i], expected_v) + i += 1 + + +class ParseExampleTest(tf.test.TestCase): + + def _test(self, kwargs, expected_values=None, expected_err_re=None): + with self.test_session() as sess: + # Pull out some keys to check shape inference + serialized = kwargs["serialized"] + dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else [] + sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else [] + dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else [] + + # Returns dict w/ Tensors and SparseTensors + out = tf.parse_example(**kwargs) + + # Check shapes; if serialized is a Tensor we need its size to + # properly check. + batch_size = ( + serialized.eval().size if isinstance(serialized, tf.Tensor) + else np.asarray(serialized).size) + self.assertEqual(len(dense_keys), len(dense_shapes)) + for (k, s) in zip(dense_keys, dense_shapes): + self.assertEqual(tuple(out[k].get_shape().as_list()), (batch_size,) + s) + for k in sparse_keys: + self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 2)) + self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) + self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (2,)) + + # Check values + result = flatten_values_tensors_or_sparse(out.values()) # flatten values + if expected_err_re is None: + tf_result = sess.run(result) + _compare_output_to_expected(self, out, expected_values, tf_result) + else: + with self.assertRaisesOpError(expected_err_re): + sess.run(result) + + def testEmptySerializedWithAllDefaults(self): + dense_keys = ["a", "b", "c"] + dense_shapes = [(1, 3), (3, 3), (2,)] + dense_types = [tf.int64, tf.string, tf.float32] + dense_defaults = { + "a": [0, 42, 0], + "b": np.random.rand(3, 3).astype(np.str), + "c": np.random.rand(2).astype(np.float32), + } + + expected_st_a = ( # indices, values, shape + np.empty((0, 2), dtype=np.int64), # indices + np.empty((0,), dtype=np.int64), # sp_a is DT_INT64 + np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 + + expected_output = { + "st_a": expected_st_a, + "a": np.array(2 * [[dense_defaults["a"]]]), + "b": np.array(2 * [dense_defaults["b"]]), + "c": np.array(2 * [dense_defaults["c"]]), + } + + self._test( + { + "names": np.empty((0,), dtype=np.str), + # empty serialized input Examples + "serialized": tf.convert_to_tensor(["", ""]), + "dense_defaults": dense_defaults, + "sparse_keys": ["st_a"], + "sparse_types": [tf.int64], + "dense_keys": dense_keys, + "dense_types": dense_types, + "dense_shapes": dense_shapes + }, expected_output) + + def testEmptySerializedWithoutDefaultsShouldFail(self): + dense_shapes = [(1, 3), (3, 3), (2,)] + dense_defaults = { + "a": [0, 42, 0], + "b": np.random.rand(3, 3).astype(np.str), + # Feature "c" is missing, since there's gaps it will cause failure. + } + self._test( + { + "serialized": ["", ""], # empty serialized input Examples + "names": ["in1", "in2"], + "dense_defaults": dense_defaults, + "sparse_keys": ["st_a"], + "sparse_types": [tf.int64], + "dense_keys": ["a", "b", "c"], + "dense_types": [tf.int64, tf.string, tf.float32], + "dense_shapes": dense_shapes + }, + expected_err_re="Name: in1, Feature: c is required") + + def testDenseNotMatchingShapeShouldFail(self): + dense_shapes = [(1, 3)] + dense_defaults = { + # no default! + } + + original = [ + example(features=features({ + "a": float_feature([1, 1, 3]), + })), + example(features=features({ + "a": float_feature([-1, -1]), + })) + ] + + names = ["passing", "failing"] + serialized = [m.SerializeToString() for m in original] + + self._test( + { + "serialized": tf.convert_to_tensor(serialized), + "names": names, + "dense_defaults": dense_defaults, + "dense_keys": ["a"], + "dense_types": [tf.float32], + "dense_shapes": dense_shapes, + }, + expected_err_re="Name: failing, Key: a. Number of float values") + + def testSerializedContainingSparse(self): + original = [ + example(features=features({ + "st_c": float_feature([3, 4]) + })), + example(features=features({ + "st_c": float_feature([]), # empty float list + })), + example(features=features({ + "st_d": feature(), # feature with nothing in it + })), + example(features=features({ + "st_c": float_feature([1, 2, -1]), + "st_d": bytes_feature(["hi"]) + })) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_st_c = ( # indices, values, shape + np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), + np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), + np.array([4, 3], dtype=np.int64)) # batch == 2, max_elems = 3 + + expected_st_d = ( # indices, values, shape + np.array([[3, 0]], dtype=np.int64), + np.array(["hi"], dtype=np.str), + np.array([4, 1], dtype=np.int64)) # batch == 2, max_elems = 1 + + expected_output = { + "st_c": expected_st_c, + "st_d": expected_st_d, + } + + self._test( + { + "serialized": tf.convert_to_tensor(serialized), + "sparse_keys": ["st_c", "st_d"], + "sparse_types": [tf.float32, tf.string], + }, expected_output) + + def testSerializedContainingDense(self): + original = [ + example(features=features({ + "a": float_feature([1, 1]), + "b": bytes_feature(["b0_str"]), + })), + example(features=features({ + "a": float_feature([-1, -1]), + "b": bytes_feature(["b1"]), + })) + ] + + serialized = [m.SerializeToString() for m in original] + + dense_shapes = [(1, 2, 1), (1, 1, 1, 1)] + + expected_output = { + "a": np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1), + "b": np.array(["b0_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1), + } + + # No defaults, values required + self._test( + { + "serialized": tf.convert_to_tensor(serialized), + "dense_keys": ["a", "b"], + "dense_types": [tf.float32, tf.string], + "dense_shapes": dense_shapes, + }, expected_output) + + def testSerializedContainingDenseScalar(self): + original = [ + example(features=features({ + "a": float_feature([1]), + })), + example(features=features({})) + ] + + serialized = [m.SerializeToString() for m in original] + + expected_output = { + "a": np.array([[1], [-1]], dtype=np.float32) # 2x1 (column vector) + } + + self._test( + { + "serialized": tf.convert_to_tensor(serialized), + "dense_defaults": {"a": -1}, + "dense_shapes": [(1,)], + "dense_keys": ["a"], + "dense_types": [tf.float32], + }, expected_output) + + def testSerializedContainingDenseWithDefaults(self): + original = [ + example(features=features({ + "a": float_feature([1, 1]), + })), + example(features=features({ + "b": bytes_feature(["b1"]), + })) + ] + + serialized = [m.SerializeToString() for m in original] + + dense_shapes = [(1, 2, 1), (1, 1, 1, 1)] + dense_types = [tf.float32, tf.string] + dense_defaults = { + "a": [3.0, -3.0], + "b": "tmp_str", + } + + expected_output = { + "a": np.array([[1, 1], [3, -3]], dtype=np.float32).reshape(2, 1, 2, 1), + "b": np.array(["tmp_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1), + } + + self._test( + { + "serialized": tf.convert_to_tensor(serialized), + "dense_defaults": dense_defaults, + "dense_keys": ["a", "b"], + "dense_types": dense_types, + "dense_shapes": dense_shapes, + }, expected_output) + + def testSerializedContainingSparseAndDenseWithNoDefault(self): + dense_defaults = { + "a": [1, 2, 3], + "b": np.random.rand(3, 3).astype(np.str), + # Feature "c" must be provided + } + dense_shapes = [(1, 3), (3, 3), (2,)] + + expected_st_a = ( # indices, values, shape + np.empty((0, 2), dtype=np.int64), # indices + np.empty((0,), dtype=np.int64), # sp_a is DT_INT64 + np.array([2, 0], dtype=np.int64)) # batch == 2, max_elems = 0 + + original = [ + example(features=features({ + "c": float_feature([3, 4]) + })), + example(features=features({ + "c": float_feature([1, 2]) + })) + ] + + names = ["in1", "in2"] + serialized = [m.SerializeToString() for m in original] + + expected_output = { + "st_a": expected_st_a, + "a": np.array(2 * [[dense_defaults["a"]]]), + "b": np.array(2 * [dense_defaults["b"]]), + "c": np.array([[3, 4], [1, 2]], dtype=np.float32), + } + + self._test( + { + "names": names, + "serialized": tf.convert_to_tensor(serialized), + "dense_defaults": dense_defaults, + "sparse_keys": ["st_a"], + "sparse_types": [tf.int64], + "dense_keys": ["a", "b", "c"], + "dense_types": [tf.int64, tf.string, tf.float32], + "dense_shapes": dense_shapes + }, expected_output) + + +class ParseSingleExampleTest(tf.test.TestCase): + + def _test(self, kwargs, expected_values=None, expected_err_re=None): + with self.test_session() as sess: + # Pull out some keys to check shape inference + dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else [] + sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else [] + dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else [] + + # Returns dict w/ Tensors and SparseTensors + out = tf.parse_single_example(**kwargs) + + # Check shapes + self.assertEqual(len(dense_keys), len(dense_shapes)) + for (k, s) in zip(dense_keys, dense_shapes): + self.assertEqual(tuple(out[k].get_shape()), s) + for k in sparse_keys: + self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 1)) + self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,)) + self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (1,)) + + # Check values + result = flatten_values_tensors_or_sparse(out.values()) # flatten values + if expected_err_re is None: + tf_result = sess.run(result) + _compare_output_to_expected(self, out, expected_values, tf_result) + else: + with self.assertRaisesOpError(expected_err_re): + sess.run(result) + + def testSingleExampleWithSparseAndDense(self): + dense_types = [tf.int64, tf.string, tf.float32] + dense_shapes = [(1, 3), (3, 3), (2,)] + dense_defaults = { + "a": [1, 2, 3], + "b": np.random.rand(3, 3).astype(np.str), + # Feature "c" must be provided + } + + original = example(features=features( + {"c": float_feature([3, 4]), + "st_a": float_feature([3.0, 4.0])})) + + serialized = original.SerializeToString() + + expected_st_a = ( + np.array([[0], [1]], dtype=np.int64), # indices + np.array([3.0, 4.0], dtype=np.float32), # values + np.array([2], dtype=np.int64)) # shape: max_values = 2 + + expected_output = { + "st_a": expected_st_a, + "a": [dense_defaults["a"]], + "b": dense_defaults["b"], + "c": np.array([3, 4], dtype=np.float32), + } + + self._test( + { + "names": "in1", + "serialized": tf.convert_to_tensor(serialized), + "dense_defaults": dense_defaults, + "dense_types": dense_types, + "sparse_keys": ["st_a"], + "sparse_types": [tf.float32], + "dense_keys": ["a", "b", "c"], + "dense_shapes": dense_shapes + }, expected_output) + + +if __name__ == "__main__": + tf.test.main() |