aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/clip_ops.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/ops/clip_ops.py')
-rw-r--r--tensorflow/python/ops/clip_ops.py234
1 files changed, 234 insertions, 0 deletions
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
new file mode 100644
index 0000000000..08781932f9
--- /dev/null
+++ b/tensorflow/python/ops/clip_ops.py
@@ -0,0 +1,234 @@
+"""Operations for clipping (gradient, weight) tensors to min/max values."""
+
+import collections
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+
+
+def clip_by_value(t, clip_value_min, clip_value_max,
+ name=None):
+ """Clips tensor values to a specified min and max.
+
+ Given a tensor `t`, this operation returns a tensor of the same type and
+ shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
+ Any values less than `clip_value_min` are set to `clip_value_min`. Any values
+ greater than `clip_value_max` are set to `clip_value_max`.
+
+ Args:
+ t: A `Tensor`.
+ clip_value_min: A 0-D (scalar) `Tensor`. The minimum value to clip by.
+ clip_value_max: A 0-D (scalar) `Tensor`. The maximum value to clip by.
+ name: A name for the operation (optional).
+
+ Returns:
+ A clipped `Tensor`.
+ """
+ with ops.op_scope([t, clip_value_min, clip_value_max], name,
+ "clip_by_value") as name:
+ t = ops.convert_to_tensor(t, name="t")
+
+ # Go through list of tensors, for each value in each tensor clip
+ t_min = math_ops.minimum(
+ t, array_ops.fill(array_ops.shape(t), clip_value_max))
+ t_max = math_ops.maximum(
+ t_min, array_ops.fill(array_ops.shape(t), clip_value_min),
+ name=name)
+
+ return t_max
+
+
+def clip_by_norm(t, clip_norm, name=None):
+ """Clips tensor values to a maximum L2-norm.
+
+ Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
+ normalizes `t` so that its L2-norm is less than or equal to `clip_norm'.
+ Specifically, if the L2-norm is already less than or equal to `clip_norm`,
+ then `t` is not modified. If the L2-norm is greater than `clip_norm`, then
+ this operation returns a tensor of the same type and shape as `t` with its
+ values set to:
+
+ `t * clip_norm / l2norm(t)`
+
+ In this case, the L2-norm of the output tensor is `clip_norm`.
+
+ This operation is typically used to clip gradients before applying them with
+ an optimizer.
+
+ Args:
+ t: A `Tensor`.
+ clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
+ name: A name for the operation (optional).
+
+ Returns:
+ A clipped `Tensor`.
+ """
+ with ops.op_scope([t, clip_norm], name, "clip_by_norm") as name:
+ t = ops.convert_to_tensor(t, name="t")
+
+ # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
+ l2norm_inv = math_ops.rsqrt(
+ math_ops.reduce_sum(t * t, math_ops.range(0, array_ops.rank(t))))
+ tclip = array_ops.identity(t * clip_norm * math_ops.minimum(
+ l2norm_inv, constant_op.constant(1.0 / clip_norm)), name=name)
+
+ return tclip
+
+def global_norm(t_list, name=None):
+ """Computes the global norm of multiple tensors.
+
+ Given a tuple or list of tensors `t_list`, this operation returns the
+ global norm of the elements in all tensors in `t_list`. The global norm is
+ computed as:
+
+ `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`
+
+ Any entries in `t_list` that are of type None are ignored.
+
+ Args:
+ t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
+ name: A name for the operation (optional).
+
+ Returns:
+ A 0-D (scalar) `Tensor` of type `float`.
+
+ Raises:
+ TypeError: If `t_list` is not a sequence.
+ """
+ if (not isinstance(t_list, collections.Sequence)
+ or isinstance(t_list, basestring)):
+ raise TypeError("t_list should be a sequence")
+ t_list = list(t_list)
+ with ops.op_scope(t_list, name, "global_norm") as name:
+ values = [
+ ops.convert_to_tensor(
+ t.values if isinstance(t, ops.IndexedSlices) else t,
+ name="t_%d" % i)
+ if t is not None else t
+ for i, t in enumerate(t_list)]
+ squared_norms = array_ops.pack(
+ [math_ops.reduce_sum(v * v) for v in values if v])
+
+ norm = math_ops.sqrt(
+ math_ops.reduce_sum(squared_norms), name="global_norm")
+
+ return norm
+
+def clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None):
+ """Clips values of multiple tensors by the ratio of the sum of their norms.
+
+ Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
+ this operation returns a list of clipped tensors `list_clipped`
+ and the global norm (`global_norm`) of all tensors in `t_list`. Optionally,
+ if you've already computed the global norm for `t_list`, you can specify
+ the global norm with `use_norm`.
+
+ To perform the clipping, the values t_list[i] are set to:
+
+ `t_list[i] * clip_norm / max(global_norm, clip_norm)`
+
+ where:
+
+ `global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))`
+
+ If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
+ otherwise they're all shrunk by the global ratio.
+
+ Any of the entries of `t_list` that are of type None are ignored.
+
+ This is the correct way to perform gradient clipping (for example, see
+ R. Pascanu, T. Mikolov, and Y. Bengio, "On the difficulty of training
+ Recurrent Neural Networks". http://arxiv.org/abs/1211.5063)
+
+ However, it is slower than `clip_by_norm()` because all the parameters must be
+ ready before the clipping operation can be performed.
+
+ Args:
+ t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
+ clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
+ use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
+ norm to use. If not provided, `global_norm()` is used to compute the norm.
+ name: A name for the operation (optional).
+
+ Returns:
+ list_clipped: A list of `Tensors` of the same type as `list_t`.
+ global_norm: A 0-D (scalar) `Tensor` representing the global norm.
+
+ Raises:
+ TypeError: If `t_list` is not a sequence.
+ """
+ if (not isinstance(t_list, collections.Sequence)
+ or isinstance(t_list, basestring)):
+ raise TypeError("t_list should be a sequence")
+ t_list = list(t_list)
+ if use_norm is None:
+ use_norm = global_norm(t_list, name)
+
+ with ops.op_scope(t_list + [clip_norm], name, "clip_by_global_norm") as name:
+ # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
+ scale = clip_norm * math_ops.minimum(
+ 1.0 / use_norm, constant_op.constant(1.0 / clip_norm))
+
+ values = [
+ ops.convert_to_tensor(
+ t.values if isinstance(t, ops.IndexedSlices) else t,
+ name="t_%d" % i)
+ if t is not None else t
+ for i, t in enumerate(t_list)]
+
+ values_clipped = [
+ array_ops.identity(v * scale, name="%s_%d" % (name, i))
+ if v is not None else None
+ for i, v in enumerate(values)]
+
+ list_clipped = [
+ ops.IndexedSlices(c_v, t.indices)
+ if isinstance(t, ops.IndexedSlices)
+ else c_v
+ for (c_v, t) in zip(values_clipped, t_list)]
+
+ return list_clipped, use_norm
+
+
+def clip_by_average_norm(t, clip_norm, name=None):
+ """Clips tensor values to a maximum average L2-norm.
+
+ Given a tensor `t`, and a maximum clip value `clip_norm`, this operation
+ normalizes `t` so that its average L2-norm is less than or equal to
+ `clip_norm'. Specifically, if the average L2-norm is already less than or
+ equal to `clip_norm`, then `t` is not modified. If the average L2-norm is
+ greater than `clip_norm`, then this operation returns a tensor of the same
+ type and shape as `t` with its values set to:
+
+ `t * clip_norm / l2norm_avg(t)`
+
+ In this case, the average L2-norm of the output tensor is `clip_norm`.
+
+ This operation is typically used to clip gradients before applying them with
+ an optimizer.
+
+ Args:
+ t: A `Tensor`.
+ clip_norm: A 0-D (scalar) `Tensor` > 0. A maximum clipping value.
+ name: A name for the operation (optional).
+
+ Returns:
+ A clipped `Tensor`.
+ """
+ with ops.op_scope([t, clip_norm], name, "clip_by_average_norm") as name:
+ t = ops.convert_to_tensor(t, name="t")
+
+ # Calculate L2-norm per element, clip elements by ratio of clip_norm to
+ # L2-norm per element
+ n_element = math_ops.cast(array_ops.size(t), types.float32)
+ l2norm_inv = math_ops.rsqrt(
+ math_ops.reduce_sum(t * t, math_ops.range(0, array_ops.rank(t))))
+ tclip = array_ops.identity(
+ t * clip_norm * math_ops.minimum(
+ l2norm_inv * n_element, constant_op.constant(1.0 / clip_norm)),
+ name=name)
+
+ return tclip