"""Tests for tensorflow.ops.parsing_ops."""

import itertools

import tensorflow.python.platform

import numpy as np
import tensorflow as tf

# Helpers for creating Example objects
example = tf.train.Example
feature = tf.train.Feature
features = lambda d: tf.train.Features(feature=d)
bytes_feature = lambda v: feature(bytes_list=tf.train.BytesList(value=v))
int64_feature = lambda v: feature(int64_list=tf.train.Int64List(value=v))
float_feature = lambda v: feature(float_list=tf.train.FloatList(value=v))


def flatten(list_of_lists):
  """Flatten one level of nesting."""
  return itertools.chain.from_iterable(list_of_lists)


def flatten_values_tensors_or_sparse(tensors_list):
  """Flatten each SparseTensor object into 3 Tensors for session.run()."""
  return list(flatten([[v.indices, v.values, v.shape]
                       if isinstance(v, tf.SparseTensor) else [v]
                       for v in tensors_list]))


def _compare_output_to_expected(
    tester, dict_tensors, expected_tensors, flat_output):
  tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))

  i = 0  # Index into the flattened output of session.run()
  for k, v in dict_tensors.iteritems():
    expected_v = expected_tensors[k]
    tf.logging.info("Comparing key: %s", k)
    if isinstance(v, tf.SparseTensor):
      # Three outputs for SparseTensor : indices, values, shape.
      tester.assertEqual([k, 3], [k, len(expected_v)])
      tester.assertAllEqual(flat_output[i], expected_v[0])
      tester.assertAllEqual(flat_output[i + 1], expected_v[1])
      tester.assertAllEqual(flat_output[i + 2], expected_v[2])
      i += 3
    else:
      # One output for standard Tensor.
      tester.assertAllEqual(flat_output[i], expected_v)
      i += 1


class ParseExampleTest(tf.test.TestCase):

  def _test(self, kwargs, expected_values=None, expected_err_re=None):
    with self.test_session() as sess:
      # Pull out some keys to check shape inference
      serialized = kwargs["serialized"]
      dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else []
      sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else []
      dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else []

      # Returns dict w/ Tensors and SparseTensors
      out = tf.parse_example(**kwargs)

      # Check shapes; if serialized is a Tensor we need its size to
      # properly check.
      batch_size = (
          serialized.eval().size if isinstance(serialized, tf.Tensor)
          else np.asarray(serialized).size)
      self.assertEqual(len(dense_keys), len(dense_shapes))
      for (k, s) in zip(dense_keys, dense_shapes):
        self.assertEqual(tuple(out[k].get_shape().as_list()), (batch_size,) + s)
      for k in sparse_keys:
        self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 2))
        self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
        self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (2,))

      # Check values
      result = flatten_values_tensors_or_sparse(out.values())  # flatten values
      if expected_err_re is None:
        tf_result = sess.run(result)
        _compare_output_to_expected(self, out, expected_values, tf_result)
      else:
        with self.assertRaisesOpError(expected_err_re):
          sess.run(result)

  def testEmptySerializedWithAllDefaults(self):
    dense_keys = ["a", "b", "c"]
    dense_shapes = [(1, 3), (3, 3), (2,)]
    dense_types = [tf.int64, tf.string, tf.float32]
    dense_defaults = {
        "a": [0, 42, 0],
        "b": np.random.rand(3, 3).astype(np.str),
        "c": np.random.rand(2).astype(np.float32),
    }

    expected_st_a = (  # indices, values, shape
        np.empty((0, 2), dtype=np.int64),  # indices
        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0

    expected_output = {
        "st_a": expected_st_a,
        "a": np.array(2 * [[dense_defaults["a"]]]),
        "b": np.array(2 * [dense_defaults["b"]]),
        "c": np.array(2 * [dense_defaults["c"]]),
    }

    self._test(
        {
            "names": np.empty((0,), dtype=np.str),
            # empty serialized input Examples
            "serialized": tf.convert_to_tensor(["", ""]),
            "dense_defaults": dense_defaults,
            "sparse_keys": ["st_a"],
            "sparse_types": [tf.int64],
            "dense_keys": dense_keys,
            "dense_types": dense_types,
            "dense_shapes": dense_shapes
        }, expected_output)

  def testEmptySerializedWithoutDefaultsShouldFail(self):
    dense_shapes = [(1, 3), (3, 3), (2,)]
    dense_defaults = {
        "a": [0, 42, 0],
        "b": np.random.rand(3, 3).astype(np.str),
        # Feature "c" is missing, since there's gaps it will cause failure.
    }
    self._test(
        {
            "serialized": ["", ""],  # empty serialized input Examples
            "names": ["in1", "in2"],
            "dense_defaults": dense_defaults,
            "sparse_keys": ["st_a"],
            "sparse_types": [tf.int64],
            "dense_keys": ["a", "b", "c"],
            "dense_types": [tf.int64, tf.string, tf.float32],
            "dense_shapes": dense_shapes
        },
        expected_err_re="Name: in1, Feature: c is required")

  def testDenseNotMatchingShapeShouldFail(self):
    dense_shapes = [(1, 3)]
    dense_defaults = {
        # no default!
    }

    original = [
        example(features=features({
            "a": float_feature([1, 1, 3]),
        })),
        example(features=features({
            "a": float_feature([-1, -1]),
        }))
    ]

    names = ["passing", "failing"]
    serialized = [m.SerializeToString() for m in original]

    self._test(
        {
            "serialized": tf.convert_to_tensor(serialized),
            "names": names,
            "dense_defaults": dense_defaults,
            "dense_keys": ["a"],
            "dense_types": [tf.float32],
            "dense_shapes": dense_shapes,
        },
        expected_err_re="Name: failing, Key: a.  Number of float values")

  def testSerializedContainingSparse(self):
    original = [
        example(features=features({
            "st_c": float_feature([3, 4])
        })),
        example(features=features({
            "st_c": float_feature([]),  # empty float list
        })),
        example(features=features({
            "st_d": feature(),  # feature with nothing in it
        })),
        example(features=features({
            "st_c": float_feature([1, 2, -1]),
            "st_d": bytes_feature(["hi"])
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_st_c = (  # indices, values, shape
        np.array([[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64),
        np.array([3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32),
        np.array([4, 3], dtype=np.int64))  # batch == 2, max_elems = 3

    expected_st_d = (  # indices, values, shape
        np.array([[3, 0]], dtype=np.int64),
        np.array(["hi"], dtype=np.str),
        np.array([4, 1], dtype=np.int64))  # batch == 2, max_elems = 1

    expected_output = {
        "st_c": expected_st_c,
        "st_d": expected_st_d,
    }

    self._test(
        {
            "serialized": tf.convert_to_tensor(serialized),
            "sparse_keys": ["st_c", "st_d"],
            "sparse_types": [tf.float32, tf.string],
        }, expected_output)

  def testSerializedContainingDense(self):
    original = [
        example(features=features({
            "a": float_feature([1, 1]),
            "b": bytes_feature(["b0_str"]),
        })),
        example(features=features({
            "a": float_feature([-1, -1]),
            "b": bytes_feature(["b1"]),
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    dense_shapes = [(1, 2, 1), (1, 1, 1, 1)]

    expected_output = {
        "a": np.array([[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
        "b": np.array(["b0_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1),
    }

    # No defaults, values required
    self._test(
        {
            "serialized": tf.convert_to_tensor(serialized),
            "dense_keys": ["a", "b"],
            "dense_types": [tf.float32, tf.string],
            "dense_shapes": dense_shapes,
        }, expected_output)

  def testSerializedContainingDenseScalar(self):
    original = [
        example(features=features({
            "a": float_feature([1]),
        })),
        example(features=features({}))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_output = {
        "a": np.array([[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
    }

    self._test(
        {
            "serialized": tf.convert_to_tensor(serialized),
            "dense_defaults": {"a": -1},
            "dense_shapes": [(1,)],
            "dense_keys": ["a"],
            "dense_types": [tf.float32],
        }, expected_output)

  def testSerializedContainingDenseWithDefaults(self):
    original = [
        example(features=features({
            "a": float_feature([1, 1]),
        })),
        example(features=features({
            "b": bytes_feature(["b1"]),
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    dense_shapes = [(1, 2, 1), (1, 1, 1, 1)]
    dense_types = [tf.float32, tf.string]
    dense_defaults = {
        "a": [3.0, -3.0],
        "b": "tmp_str",
    }

    expected_output = {
        "a": np.array([[1, 1], [3, -3]], dtype=np.float32).reshape(2, 1, 2, 1),
        "b": np.array(["tmp_str", "b1"], dtype=np.str).reshape(2, 1, 1, 1, 1),
    }

    self._test(
        {
            "serialized": tf.convert_to_tensor(serialized),
            "dense_defaults": dense_defaults,
            "dense_keys": ["a", "b"],
            "dense_types": dense_types,
            "dense_shapes": dense_shapes,
        }, expected_output)

  def testSerializedContainingSparseAndDenseWithNoDefault(self):
    dense_defaults = {
        "a": [1, 2, 3],
        "b": np.random.rand(3, 3).astype(np.str),
        # Feature "c" must be provided
    }
    dense_shapes = [(1, 3), (3, 3), (2,)]

    expected_st_a = (  # indices, values, shape
        np.empty((0, 2), dtype=np.int64),  # indices
        np.empty((0,), dtype=np.int64),  # sp_a is DT_INT64
        np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0

    original = [
        example(features=features({
            "c": float_feature([3, 4])
        })),
        example(features=features({
            "c": float_feature([1, 2])
        }))
    ]

    names = ["in1", "in2"]
    serialized = [m.SerializeToString() for m in original]

    expected_output = {
        "st_a": expected_st_a,
        "a": np.array(2 * [[dense_defaults["a"]]]),
        "b": np.array(2 * [dense_defaults["b"]]),
        "c": np.array([[3, 4], [1, 2]], dtype=np.float32),
    }

    self._test(
        {
            "names": names,
            "serialized": tf.convert_to_tensor(serialized),
            "dense_defaults": dense_defaults,
            "sparse_keys": ["st_a"],
            "sparse_types": [tf.int64],
            "dense_keys": ["a", "b", "c"],
            "dense_types": [tf.int64, tf.string, tf.float32],
            "dense_shapes": dense_shapes
        }, expected_output)


class ParseSingleExampleTest(tf.test.TestCase):

  def _test(self, kwargs, expected_values=None, expected_err_re=None):
    with self.test_session() as sess:
      # Pull out some keys to check shape inference
      dense_keys = kwargs["dense_keys"] if "dense_keys" in kwargs else []
      sparse_keys = kwargs["sparse_keys"] if "sparse_keys" in kwargs else []
      dense_shapes = kwargs["dense_shapes"] if "dense_shapes" in kwargs else []

      # Returns dict w/ Tensors and SparseTensors
      out = tf.parse_single_example(**kwargs)

      # Check shapes
      self.assertEqual(len(dense_keys), len(dense_shapes))
      for (k, s) in zip(dense_keys, dense_shapes):
        self.assertEqual(tuple(out[k].get_shape()), s)
      for k in sparse_keys:
        self.assertEqual(tuple(out[k].indices.get_shape().as_list()), (None, 1))
        self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
        self.assertEqual(tuple(out[k].shape.get_shape().as_list()), (1,))

      # Check values
      result = flatten_values_tensors_or_sparse(out.values())  # flatten values
      if expected_err_re is None:
        tf_result = sess.run(result)
        _compare_output_to_expected(self, out, expected_values, tf_result)
      else:
        with self.assertRaisesOpError(expected_err_re):
          sess.run(result)

  def testSingleExampleWithSparseAndDense(self):
    dense_types = [tf.int64, tf.string, tf.float32]
    dense_shapes = [(1, 3), (3, 3), (2,)]
    dense_defaults = {
        "a": [1, 2, 3],
        "b": np.random.rand(3, 3).astype(np.str),
        # Feature "c" must be provided
    }

    original = example(features=features(
        {"c": float_feature([3, 4]),
         "st_a": float_feature([3.0, 4.0])}))

    serialized = original.SerializeToString()

    expected_st_a = (
        np.array([[0], [1]], dtype=np.int64),  # indices
        np.array([3.0, 4.0], dtype=np.float32),  # values
        np.array([2], dtype=np.int64))  # shape: max_values = 2

    expected_output = {
        "st_a": expected_st_a,
        "a": [dense_defaults["a"]],
        "b": dense_defaults["b"],
        "c": np.array([3, 4], dtype=np.float32),
    }

    self._test(
        {
            "names": "in1",
            "serialized": tf.convert_to_tensor(serialized),
            "dense_defaults": dense_defaults,
            "dense_types": dense_types,
            "sparse_keys": ["st_a"],
            "sparse_types": [tf.float32],
            "dense_keys": ["a", "b", "c"],
            "dense_shapes": dense_shapes
        }, expected_output)


if __name__ == "__main__":
  tf.test.main()