1 files changed, 303 insertions, 0 deletions
diff --git a/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test_base.py b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test_base.py
new file mode 100644
index 0000000000..e3570e38a3
--- /dev/null
+++ b/tensorflow/contrib/proto/python/kernel_tests/decode_proto_op_test_base.py
@@ -0,0 +1,303 @@
+# =============================================================================
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Tests for decode_proto op."""
+
+# Python3 preparedness imports.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+
+from google.protobuf import text_format
+
+from tensorflow.contrib.proto.python.kernel_tests import proto_op_test_base as test_base
+from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+
+
+class DecodeProtoOpTestBase(test_base.ProtoOpTestBase, parameterized.TestCase):
+  """Base class for testing proto decoding ops."""
+
+  def __init__(self, decode_module, methodName='runTest'):  # pylint: disable=invalid-name
+    """DecodeProtoOpTestBase initializer.
+
+    Args:
+      decode_module: a module containing the `decode_proto_op` method
+      methodName: the name of the test method (same as for test.TestCase)
+    """
+
+    super(DecodeProtoOpTestBase, self).__init__(methodName)
+    self._decode_module = decode_module
+
+  def _compareValues(self, fd, vs, evs):
+    """Compare lists/arrays of field values."""
+
+    if len(vs) != len(evs):
+      self.fail('Field %s decoded %d outputs, expected %d' %
+                (fd.name, len(vs), len(evs)))
+    for i, ev in enumerate(evs):
+      # Special case fuzzy match for float32. TensorFlow seems to mess with
+      # MAX_FLT slightly and the test doesn't work otherwise.
+      # TODO(nix): ask on TF list about why MAX_FLT doesn't pass through.
+      if fd.cpp_type == fd.CPPTYPE_FLOAT:
+        # Numpy isclose() is better than assertIsClose() which uses an absolute
+        # value comparison.
+        self.assertTrue(
+            np.isclose(vs[i], ev), 'expected %r, actual %r' % (ev, vs[i]))
+      elif fd.cpp_type == fd.CPPTYPE_STRING:
+        # In Python3 string tensor values will be represented as bytes, so we
+        # reencode the proto values to match that.
+        self.assertEqual(vs[i], ev.encode('ascii'))
+      else:
+        # Doubles and other types pass through unscathed.
+        self.assertEqual(vs[i], ev)
+
+  def _compareProtos(self, batch_shape, sizes, fields, field_dict):
+    """Compare protos of type TestValue.
+
+    Args:
+      batch_shape: the shape of the input tensor of serialized messages.
+      sizes: int matrix of repeat counts returned by decode_proto
+      fields: list of test_example_pb2.FieldSpec (types and expected values)
+      field_dict: map from field names to decoded numpy tensors of values
+    """
+
+    # Check that expected values match.
+    for field in fields:
+      values = field_dict[field.name]
+      self.assertEqual(dtypes.as_dtype(values.dtype), field.dtype)
+
+      fd = field.value.DESCRIPTOR.fields_by_name[field.name]
+
+      # Values has the same shape as the input plus an extra
+      # dimension for repeats.
+      self.assertEqual(list(values.shape)[:-1], batch_shape)
+
+      # Nested messages are represented as TF strings, requiring
+      # some special handling.
+      if field.name == 'message_value':
+        vs = []
+        for buf in values.flat:
+          msg = test_example_pb2.PrimitiveValue()
+          msg.ParseFromString(buf)
+          vs.append(msg)
+        evs = getattr(field.value, field.name)
+        if len(vs) != len(evs):
+          self.fail('Field %s decoded %d outputs, expected %d' %
+                    (fd.name, len(vs), len(evs)))
+        for v, ev in zip(vs, evs):
+          self.assertEqual(v, ev)
+        continue
+
+      tf_type_to_primitive_value_field = {
+          dtypes.bool:
+              'bool_value',
+          dtypes.float32:
+              'float_value',
+          dtypes.float64:
+              'double_value',
+          dtypes.int8:
+              'int8_value',
+          dtypes.int32:
+              'int32_value',
+          dtypes.int64:
+              'int64_value',
+          dtypes.string:
+              'string_value',
+          dtypes.uint8:
+              'uint8_value',
+          dtypes.uint32:
+              'uint32_value',
+          dtypes.uint64:
+              'uint64_value',
+      }
+      tf_field_name = tf_type_to_primitive_value_field.get(field.dtype)
+      if tf_field_name is None:
+        self.fail('Unhandled tensorflow type %d' % field.dtype)
+
+      self._compareValues(fd, values.flat,
+                          getattr(field.value, tf_field_name))
+
+  def _runDecodeProtoTests(self, fields, case_sizes, batch_shape, batch,
+                           message_type, message_format, sanitize,
+                           force_disordered=False):
+    """Run decode tests on a batch of messages.
+
+    Args:
+      fields: list of test_example_pb2.FieldSpec (types and expected values)
+      case_sizes: expected sizes array
+      batch_shape: the shape of the input tensor of serialized messages
+      batch: list of serialized messages
+      message_type: descriptor name for messages
+      message_format: format of messages, 'text' or 'binary'
+      sanitize: whether to sanitize binary protobuf inputs
+      force_disordered: whether to force fields encoded out of order.
+    """
+
+    if force_disordered:
+      # Exercise code path that handles out-of-order fields by prepending extra
+      # fields with tag numbers higher than any real field. Note that this won't
+      # work with sanitization because that forces reserialization using a
+      # trusted decoder and encoder.
+      assert not sanitize
+      extra_fields = test_example_pb2.ExtraFields()
+      extra_fields.string_value = 'IGNORE ME'
+      extra_fields.bool_value = False
+      extra_msg = extra_fields.SerializeToString()
+      batch = [extra_msg + msg for msg in batch]
+
+    # Numpy silently truncates the strings if you don't specify dtype=object.
+    batch = np.array(batch, dtype=object)
+    batch = np.reshape(batch, batch_shape)
+
+    field_names = [f.name for f in fields]
+    output_types = [f.dtype for f in fields]
+
+    with self.test_session() as sess:
+      sizes, vtensor = self._decode_module.decode_proto(
+          batch,
+          message_type=message_type,
+          field_names=field_names,
+          output_types=output_types,
+          message_format=message_format,
+          sanitize=sanitize)
+
+      vlist = sess.run([sizes] + vtensor)
+      sizes = vlist[0]
+      # Values is a list of tensors, one for each field.
+      value_tensors = vlist[1:]
+
+      # Check that the repeat sizes are correct.
+      self.assertTrue(
+          np.all(np.array(sizes.shape) == batch_shape + [len(field_names)]))
+
+      # Check that the decoded sizes match the expected sizes.
+      self.assertEqual(len(sizes.flat), len(case_sizes))
+      self.assertTrue(
+          np.all(sizes.flat == np.array(
+              case_sizes, dtype=np.int32)))
+
+      field_dict = dict(zip(field_names, value_tensors))
+
+      self._compareProtos(batch_shape, sizes, fields, field_dict)
+
+  @parameterized.named_parameters(*test_base.ProtoOpTestBase.named_parameters())
+  def testBinary(self, case):
+    batch = [value.SerializeToString() for value in case.values]
+    self._runDecodeProtoTests(
+        case.fields,
+        case.sizes,
+        list(case.shapes),
+        batch,
+        'tensorflow.contrib.proto.TestValue',
+        'binary',
+        sanitize=False)
+
+  @parameterized.named_parameters(*test_base.ProtoOpTestBase.named_parameters())
+  def testBinaryDisordered(self, case):
+    batch = [value.SerializeToString() for value in case.values]
+    self._runDecodeProtoTests(
+        case.fields,
+        case.sizes,
+        list(case.shapes),
+        batch,
+        'tensorflow.contrib.proto.TestValue',
+        'binary',
+        sanitize=False,
+        force_disordered=True)
+
+  @parameterized.named_parameters(*test_base.ProtoOpTestBase.named_parameters())
+  def testPacked(self, case):
+    # Now try with the packed serialization.
+    #
+    # We test the packed representations by loading the same test case using
+    # PackedTestValue instead of TestValue. To do this we rely on the text
+    # format being the same for packed and unpacked fields, and reparse the
+    # test message using the packed version of the proto.
+    packed_batch = [
+        # Note: float_format='.17g' is necessary to ensure preservation of
+        # doubles and floats in text format.
+        text_format.Parse(
+            text_format.MessageToString(
+                value, float_format='.17g'),
+            test_example_pb2.PackedTestValue()).SerializeToString()
+        for value in case.values
+    ]
+
+    self._runDecodeProtoTests(
+        case.fields,
+        case.sizes,
+        list(case.shapes),
+        packed_batch,
+        'tensorflow.contrib.proto.PackedTestValue',
+        'binary',
+        sanitize=False)
+
+  @parameterized.named_parameters(*test_base.ProtoOpTestBase.named_parameters())
+  def testText(self, case):
+    # Note: float_format='.17g' is necessary to ensure preservation of
+    # doubles and floats in text format.
+    text_batch = [
+        text_format.MessageToString(
+            value, float_format='.17g') for value in case.values
+    ]
+
+    self._runDecodeProtoTests(
+        case.fields,
+        case.sizes,
+        list(case.shapes),
+        text_batch,
+        'tensorflow.contrib.proto.TestValue',
+        'text',
+        sanitize=False)
+
+  @parameterized.named_parameters(*test_base.ProtoOpTestBase.named_parameters())
+  def testSanitizerGood(self, case):
+    batch = [value.SerializeToString() for value in case.values]
+    self._runDecodeProtoTests(
+        case.fields,
+        case.sizes,
+        list(case.shapes),
+        batch,
+        'tensorflow.contrib.proto.TestValue',
+        'binary',
+        sanitize=True)
+
+  @parameterized.parameters((False), (True))
+  def testCorruptProtobuf(self, sanitize):
+    corrupt_proto = 'This is not a binary protobuf'
+
+    # Numpy silently truncates the strings if you don't specify dtype=object.
+    batch = np.array(corrupt_proto, dtype=object)
+    msg_type = 'tensorflow.contrib.proto.TestCase'
+    field_names = ['sizes']
+    field_types = [dtypes.int32]
+
+    with self.test_session() as sess:
+      ctensor, vtensor = self._decode_module.decode_proto(
+          batch,
+          message_type=msg_type,
+          field_names=field_names,
+          output_types=field_types,
+          sanitize=sanitize)
+      with self.assertRaisesRegexp(errors.DataLossError,
+                                   'Unable to parse binary protobuf'
+                                   '|Failed to consume entire buffer'):
+        _ = sess.run([ctensor] + vtensor)