From f41959ccb2d9d4c722fe8fc3351401d53bcf4900 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Fri, 6 Nov 2015 16:27:58 -0800 Subject: TensorFlow: Initial commit of TensorFlow library. TensorFlow is an open source software library for numerical computation using data flow graphs. Base CL: 107276108 --- tensorflow/python/ops/clip_ops.py | 234 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 tensorflow/python/ops/clip_ops.py (limited to 'tensorflow/python/ops/clip_ops.py') diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py new file mode 100644 index 0000000000..08781932f9 --- /dev/null +++ b/tensorflow/python/ops/clip_ops.py @@ -0,0 +1,234 @@ +"""Operations for clipping (gradient, weight) tensors to min/max values.""" + +import collections + +from tensorflow.python.framework import ops +from tensorflow.python.framework import types +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import constant_op +from tensorflow.python.ops import math_ops + + +def clip_by_value(t, clip_value_min, clip_value_max, + name=None): + """Clips tensor values to a specified min and max. + + Given a tensor `t`, this operation returns a tensor of the same type and + shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. + Any values less than `clip_value_min` are set to `clip_value_min`. Any values + greater than `clip_value_max` are set to `clip_value_max`. + + Args: + t: A `Tensor`. + clip_value_min: A 0-D (scalar) `Tensor`. The minimum value to clip by. + clip_value_max: A 0-D (scalar) `Tensor`. The maximum value to clip by. + name: A name for the operation (optional). + + Returns: + A clipped `Tensor`. + """ + with ops.op_scope([t, clip_value_min, clip_value_max], name, + "clip_by_value") as name: + t = ops.convert_to_tensor(t, name="t") + + # Go through list of tensors, for each value in each tensor clip + t_min = math_ops.minimum( + t, array_ops.fill(array_ops.shape(t), clip_value_max)) + t_max = math_ops.maximum( + t_min, array_ops.fill(array_ops.shape(t), clip_value_min), + name=name) + + return t_max + + +def clip_by_norm(t, clip_norm, name=None): + """Clips tensor values to a maximum L2-norm. + + Given a tensor `t`, and a maximum clip value `clip_norm`, this operation + normalizes `t` so that its L2-norm is less than or equal to `clip_norm'. + Specifically, if the L2-norm is already less than or equal to `clip_norm`, + then `t` is not modified. If the L2-norm is greater than `clip_norm`, then + this operation returns a tensor of the same type and shape as `t` with its + values set to: + + `t * clip_norm / l2norm(t)` + + In this case, the L2-norm of the output tensor is `clip_norm`. + + This operation is typically used to clip gradients before applying them with + an optimizer. + + Args: + t: A `Tensor`. + clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. + name: A name for the operation (optional). + + Returns: + A clipped `Tensor`. + """ + with ops.op_scope([t, clip_norm], name, "clip_by_norm") as name: + t = ops.convert_to_tensor(t, name="t") + + # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm + l2norm_inv = math_ops.rsqrt( + math_ops.reduce_sum(t * t, math_ops.range(0, array_ops.rank(t)))) + tclip = array_ops.identity(t * clip_norm * math_ops.minimum( + l2norm_inv, constant_op.constant(1.0 / clip_norm)), name=name) + + return tclip + +def global_norm(t_list, name=None): + """Computes the global norm of multiple tensors. + + Given a tuple or list of tensors `t_list`, this operation returns the + global norm of the elements in all tensors in `t_list`. The global norm is + computed as: + + `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))` + + Any entries in `t_list` that are of type None are ignored. + + Args: + t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. + name: A name for the operation (optional). + + Returns: + A 0-D (scalar) `Tensor` of type `float`. + + Raises: + TypeError: If `t_list` is not a sequence. + """ + if (not isinstance(t_list, collections.Sequence) + or isinstance(t_list, basestring)): + raise TypeError("t_list should be a sequence") + t_list = list(t_list) + with ops.op_scope(t_list, name, "global_norm") as name: + values = [ + ops.convert_to_tensor( + t.values if isinstance(t, ops.IndexedSlices) else t, + name="t_%d" % i) + if t is not None else t + for i, t in enumerate(t_list)] + squared_norms = array_ops.pack( + [math_ops.reduce_sum(v * v) for v in values if v]) + + norm = math_ops.sqrt( + math_ops.reduce_sum(squared_norms), name="global_norm") + + return norm + +def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None): + """Clips values of multiple tensors by the ratio of the sum of their norms. + + Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`, + this operation returns a list of clipped tensors `list_clipped` + and the global norm (`global_norm`) of all tensors in `t_list`. Optionally, + if you've already computed the global norm for `t_list`, you can specify + the global norm with `use_norm`. + + To perform the clipping, the values t_list[i] are set to: + + `t_list[i] * clip_norm / max(global_norm, clip_norm)` + + where: + + `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))` + + If `clip_norm > global_norm` then the entries in `t_list` remain as they are, + otherwise they're all shrunk by the global ratio. + + Any of the entries of `t_list` that are of type None are ignored. + + This is the correct way to perform gradient clipping (for example, see + R. Pascanu, T. Mikolov, and Y. Bengio, "On the difficulty of training + Recurrent Neural Networks". http://arxiv.org/abs/1211.5063) + + However, it is slower than `clip_by_norm()` because all the parameters must be + ready before the clipping operation can be performed. + + Args: + t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None. + clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio. + use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global + norm to use. If not provided, `global_norm()` is used to compute the norm. + name: A name for the operation (optional). + + Returns: + list_clipped: A list of `Tensors` of the same type as `list_t`. + global_norm: A 0-D (scalar) `Tensor` representing the global norm. + + Raises: + TypeError: If `t_list` is not a sequence. + """ + if (not isinstance(t_list, collections.Sequence) + or isinstance(t_list, basestring)): + raise TypeError("t_list should be a sequence") + t_list = list(t_list) + if use_norm is None: + use_norm = global_norm(t_list, name) + + with ops.op_scope(t_list + [clip_norm], name, "clip_by_global_norm") as name: + # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm + scale = clip_norm * math_ops.minimum( + 1.0 / use_norm, constant_op.constant(1.0 / clip_norm)) + + values = [ + ops.convert_to_tensor( + t.values if isinstance(t, ops.IndexedSlices) else t, + name="t_%d" % i) + if t is not None else t + for i, t in enumerate(t_list)] + + values_clipped = [ + array_ops.identity(v * scale, name="%s_%d" % (name, i)) + if v is not None else None + for i, v in enumerate(values)] + + list_clipped = [ + ops.IndexedSlices(c_v, t.indices) + if isinstance(t, ops.IndexedSlices) + else c_v + for (c_v, t) in zip(values_clipped, t_list)] + + return list_clipped, use_norm + + +def clip_by_average_norm(t, clip_norm, name=None): + """Clips tensor values to a maximum average L2-norm. + + Given a tensor `t`, and a maximum clip value `clip_norm`, this operation + normalizes `t` so that its average L2-norm is less than or equal to + `clip_norm'. Specifically, if the average L2-norm is already less than or + equal to `clip_norm`, then `t` is not modified. If the average L2-norm is + greater than `clip_norm`, then this operation returns a tensor of the same + type and shape as `t` with its values set to: + + `t * clip_norm / l2norm_avg(t)` + + In this case, the average L2-norm of the output tensor is `clip_norm`. + + This operation is typically used to clip gradients before applying them with + an optimizer. + + Args: + t: A `Tensor`. + clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value. + name: A name for the operation (optional). + + Returns: + A clipped `Tensor`. + """ + with ops.op_scope([t, clip_norm], name, "clip_by_average_norm") as name: + t = ops.convert_to_tensor(t, name="t") + + # Calculate L2-norm per element, clip elements by ratio of clip_norm to + # L2-norm per element + n_element = math_ops.cast(array_ops.size(t), types.float32) + l2norm_inv = math_ops.rsqrt( + math_ops.reduce_sum(t * t, math_ops.range(0, array_ops.rank(t)))) + tclip = array_ops.identity( + t * clip_norm * math_ops.minimum( + l2norm_inv * n_element, constant_op.constant(1.0 / clip_norm)), + name=name) + + return tclip -- cgit v1.2.3