"""Gradients for operators defined in array_ops.py."""

from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import constant_op
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import gen_array_ops


@ops.RegisterGradient("Pack")
def _PackGrad(op, grad):
  """Gradient for pack op."""
  return array_ops.unpack(grad, num=op.get_attr('N'))


@ops.RegisterGradient("Unpack")
def _UnpackGrad(_, *grads):
  """Gradient for unpack op."""
  return array_ops.pack(grads)


@ops.RegisterGradient("Concat")
def _ConcatGrad(op, grad):
  """Gradient for concat op."""
  assert isinstance(grad, ops.Tensor)
  # Degenerate concatenation, just return grad.
  if len(op.inputs) == 2:
    return [None, grad]
  # Get the inputs' tensor shapes
  sizes = [array_ops.shape(x) for x in op.inputs[1:]]
  concat_dim = op.inputs[0]
  # Since shape is 1-D, shape_of_shape = [rank-of-inputs]
  shape_of_shape = array_ops.shape(sizes[0])
  # Make a vector of length equal to the input's dimensions,
  # with 0's everywhere and 1 in the concat dim position.
  # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
  mask = array_ops.concat(0,
                          [array_ops.fill(
                              array_ops.expand_dims(concat_dim, 0), 0), [1],
                           array_ops.fill(shape_of_shape - concat_dim - 1, 0)])
  out_grads = []
  begin = array_ops.fill(shape_of_shape, 0)
  for i in range(len(sizes)):
    out_grads.append(array_ops.slice(grad, begin, sizes[i]))
    # Lint complains begin = begin + ...
    begin = math_ops.add(begin, sizes[i] * mask)
  return [None] + out_grads


@ops.RegisterGradient("Slice")
def _SliceGrad(op, grad):
  """Gradient for Slice op."""
  # Create an Nx2 padding where the first column represents how many
  # zeros are to be prepended for each dimension, and the second
  # column indicates how many zeros are appended.
  #
  # The number of zeros to append is the shape of the input
  # elementwise-subtracted by both the begin vector and sizes vector.
  #
  # Some more reshaping is needed to assemble this tensor with the
  # right dimensions.
  input_vec = op.inputs[0]
  begin_vec = op.inputs[1]
  input_rank = array_ops.rank(input_vec)
  slice_size = array_ops.shape(op.outputs[0])

  shape = array_ops.pack([input_rank, 1])
  before_pad = array_ops.reshape(begin_vec, shape)
  after_pad = array_ops.reshape(
      array_ops.shape(input_vec) - slice_size - begin_vec, shape)
  paddings = array_ops.concat(1, [before_pad, after_pad])
  return array_ops.pad(grad, paddings), None, None


@ops.RegisterGradient("Split")
def _SplitGrad(op, *grads):
  return None, array_ops.concat(op.inputs[0], list(grads))


ops.NoGradient("Const")

# TODO(liqzhang): The gradient for Diag operator would be
# the diagonal of the backprop. Implement if there is a need.
ops.NoGradient("Diag")

# Edit Distance has no gradient (but can be used to eval seq2seq or CTC).
ops.NoGradient("EditDistance")

ops.NoGradient("Fill")


@ops.RegisterGradient("Gather")
def _GatherGrad(op, grad):
  return [
      ops.IndexedSlices(grad, op.inputs[1], array_ops.shape(op.inputs[0])), None
  ]


@ops.RegisterGradient("Identity")
def _IdGrad(_, grad):
  return grad


@ops.RegisterGradient("RefIdentity")
def _RefIdGrad(_, grad):
  return grad


ops.NoGradient("StopGradient")


@ops.RegisterGradient("Reshape")
def _ReshapeGrad(op, grad):
  return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None]


ops.NoGradient("InvertPermutation")


def _ReshapeToInput(op, grad):
  """Reshapes the gradient to the shape of the original input."""
  return array_ops.reshape(grad, array_ops.shape(op.inputs[0]))


@ops.RegisterGradient("ExpandDims")
def _ExpandDimsGrad(op, grad):
  return [_ReshapeToInput(op, grad), None]


@ops.RegisterGradient("Squeeze")
def _SqueezeGrad(op, grad):
  return _ReshapeToInput(op, grad)


@ops.RegisterGradient("Transpose")
def _TransposeGrad(op, grad):
  """Returns unshuffle(grad)."""
  p = op.inputs[1]
  return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None]


ops.NoGradient("Shape")


ops.NoGradient("Rank")


ops.NoGradient("Size")


@ops.RegisterGradient("Tile")
def _TileGrad(op, grad):
  """Sum reduces grad along the tiled dimensions."""
  assert isinstance(grad, ops.Tensor)
  return [gen_array_ops._tile_grad(grad, op.inputs[1]), None]


ops.NoGradient("TileGrad")


ops.NoGradient("BroadcastGradientArgs")


@ops.RegisterGradient("Pad")
def _PadGrad(op, grad):
  """Gradient for Pad."""
  # Pad introduces values around the original tensor, so the gradient function
  # slices the original shape out of the gradient."""
  x = op.inputs[0]
  a = op.inputs[1]  # [Rank(x), 2]
  # Takes a slice of a. The 1st column. [Rank(x), 1].
  pad_before = array_ops.slice(a, [0, 0],
                               array_ops.pack([array_ops.rank(x), 1]))
  # Make it a 1-D tensor.
  begin = array_ops.reshape(pad_before, [-1])
  sizes = array_ops.shape(x)
  return array_ops.slice(grad, begin, sizes), None


# ReverseSequence is just a permutation.  The gradient permutes back.
@ops.RegisterGradient("ReverseSequence")
def _ReverseSequenceGrad(op, grad):
  seq_lengths = op.inputs[1]
  return [array_ops.reverse_sequence(grad,
                                    seq_dim=op.get_attr("seq_dim"),
                                    seq_lengths=seq_lengths),
          None]