diff options
Diffstat (limited to 'tensorflow/python/ops/array_grad.py')
-rw-r--r-- | tensorflow/python/ops/array_grad.py | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py new file mode 100644 index 0000000000..2a463940d6 --- /dev/null +++ b/tensorflow/python/ops/array_grad.py @@ -0,0 +1,187 @@ +"""Gradients for operators defined in array_ops.py.""" + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import constant_op +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import gen_array_ops + + +@ops.RegisterGradient("Pack") +def _PackGrad(op, grad): + """Gradient for pack op.""" + return array_ops.unpack(grad, num=op.get_attr('N')) + + +@ops.RegisterGradient("Unpack") +def _UnpackGrad(_, *grads): + """Gradient for unpack op.""" + return array_ops.pack(grads) + + +@ops.RegisterGradient("Concat") +def _ConcatGrad(op, grad): + """Gradient for concat op.""" + assert isinstance(grad, ops.Tensor) + # Degenerate concatenation, just return grad. + if len(op.inputs) == 2: + return [None, grad] + # Get the inputs' tensor shapes + sizes = [array_ops.shape(x) for x in op.inputs[1:]] + concat_dim = op.inputs[0] + # Since shape is 1-D, shape_of_shape = [rank-of-inputs] + shape_of_shape = array_ops.shape(sizes[0]) + # Make a vector of length equal to the input's dimensions, + # with 0's everywhere and 1 in the concat dim position. + # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now) + mask = array_ops.concat(0, + [array_ops.fill( + array_ops.expand_dims(concat_dim, 0), 0), [1], + array_ops.fill(shape_of_shape - concat_dim - 1, 0)]) + out_grads = [] + begin = array_ops.fill(shape_of_shape, 0) + for i in range(len(sizes)): + out_grads.append(array_ops.slice(grad, begin, sizes[i])) + # Lint complains begin = begin + ... + begin = math_ops.add(begin, sizes[i] * mask) + return [None] + out_grads + + +@ops.RegisterGradient("Slice") +def _SliceGrad(op, grad): + """Gradient for Slice op.""" + # Create an Nx2 padding where the first column represents how many + # zeros are to be prepended for each dimension, and the second + # column indicates how many zeros are appended. + # + # The number of zeros to append is the shape of the input + # elementwise-subtracted by both the begin vector and sizes vector. + # + # Some more reshaping is needed to assemble this tensor with the + # right dimensions. + input_vec = op.inputs[0] + begin_vec = op.inputs[1] + input_rank = array_ops.rank(input_vec) + slice_size = array_ops.shape(op.outputs[0]) + + shape = array_ops.pack([input_rank, 1]) + before_pad = array_ops.reshape(begin_vec, shape) + after_pad = array_ops.reshape( + array_ops.shape(input_vec) - slice_size - begin_vec, shape) + paddings = array_ops.concat(1, [before_pad, after_pad]) + return array_ops.pad(grad, paddings), None, None + + +@ops.RegisterGradient("Split") +def _SplitGrad(op, *grads): + return None, array_ops.concat(op.inputs[0], list(grads)) + + +ops.NoGradient("Const") + +# TODO(liqzhang): The gradient for Diag operator would be +# the diagonal of the backprop. Implement if there is a need. +ops.NoGradient("Diag") + +# Edit Distance has no gradient (but can be used to eval seq2seq or CTC). +ops.NoGradient("EditDistance") + +ops.NoGradient("Fill") + + +@ops.RegisterGradient("Gather") +def _GatherGrad(op, grad): + return [ + ops.IndexedSlices(grad, op.inputs[1], array_ops.shape(op.inputs[0])), None + ] + + +@ops.RegisterGradient("Identity") +def _IdGrad(_, grad): + return grad + + +@ops.RegisterGradient("RefIdentity") +def _RefIdGrad(_, grad): + return grad + + +ops.NoGradient("StopGradient") + + +@ops.RegisterGradient("Reshape") +def _ReshapeGrad(op, grad): + return [array_ops.reshape(grad, array_ops.shape(op.inputs[0])), None] + + +ops.NoGradient("InvertPermutation") + + +def _ReshapeToInput(op, grad): + """Reshapes the gradient to the shape of the original input.""" + return array_ops.reshape(grad, array_ops.shape(op.inputs[0])) + + +@ops.RegisterGradient("ExpandDims") +def _ExpandDimsGrad(op, grad): + return [_ReshapeToInput(op, grad), None] + + +@ops.RegisterGradient("Squeeze") +def _SqueezeGrad(op, grad): + return _ReshapeToInput(op, grad) + + +@ops.RegisterGradient("Transpose") +def _TransposeGrad(op, grad): + """Returns unshuffle(grad).""" + p = op.inputs[1] + return [array_ops.transpose(grad, array_ops.invert_permutation(p)), None] + + +ops.NoGradient("Shape") + + +ops.NoGradient("Rank") + + +ops.NoGradient("Size") + + +@ops.RegisterGradient("Tile") +def _TileGrad(op, grad): + """Sum reduces grad along the tiled dimensions.""" + assert isinstance(grad, ops.Tensor) + return [gen_array_ops._tile_grad(grad, op.inputs[1]), None] + + +ops.NoGradient("TileGrad") + + +ops.NoGradient("BroadcastGradientArgs") + + +@ops.RegisterGradient("Pad") +def _PadGrad(op, grad): + """Gradient for Pad.""" + # Pad introduces values around the original tensor, so the gradient function + # slices the original shape out of the gradient.""" + x = op.inputs[0] + a = op.inputs[1] # [Rank(x), 2] + # Takes a slice of a. The 1st column. [Rank(x), 1]. + pad_before = array_ops.slice(a, [0, 0], + array_ops.pack([array_ops.rank(x), 1])) + # Make it a 1-D tensor. + begin = array_ops.reshape(pad_before, [-1]) + sizes = array_ops.shape(x) + return array_ops.slice(grad, begin, sizes), None + + +# ReverseSequence is just a permutation. The gradient permutes back. +@ops.RegisterGradient("ReverseSequence") +def _ReverseSequenceGrad(op, grad): + seq_lengths = op.inputs[1] + return [array_ops.reverse_sequence(grad, + seq_dim=op.get_attr("seq_dim"), + seq_lengths=seq_lengths), + None] |