diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-11-09 16:11:25 -0800 |
---|---|---|
committer | Andrew Selle <aselle@andyselle.com> | 2017-11-10 16:14:40 -0800 |
commit | cbcd08fcc56f2a871b4dcb3b15ec111dc3e7ebf3 (patch) | |
tree | f19069f81682a344a9758eca73a85d535e3d6709 | |
parent | 33135e87f8beb2e1b20f0a4c14e8b8bdc76c7faa (diff) |
Added some additional documentation to the swish() function
PiperOrigin-RevId: 175228315
-rw-r--r-- | tensorflow/python/ops/nn_impl.py | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 2c83e4e29f..431ea1186a 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -275,9 +275,6 @@ def _swish_shape(op): return [op.inputs[0].shape] -# Set noinline=True so that sigmoid(features) is re-computed during -# backprop, and we can free the sigmoid(features) expression immediately -# after use during the forward pass. @function.Defun(shape_func=_swish_shape, func_name="swish_grad", noinline=True) def _swish_grad(features, grad): """Gradient of Swish function defined below.""" @@ -287,6 +284,11 @@ def _swish_grad(features, grad): return grad * activation_grad +# Naively, x * tf.nn.sigmoid(x) requires keeping both x and sigmoid(x) around +# for backprop, effectively doubling the tensor's memory consumption. We use a +# @Defun decorator with noinline=True so that sigmoid(features) is re-computed +# during backprop, and we can free the sigmoid(features) expression immediately +# after use during the forward pass. @function.Defun( grad_func=_swish_grad, shape_func=_swish_shape, @@ -296,7 +298,7 @@ def swish(features): # pylint: disable=g-doc-args """Computes the Swish activation function: `x * sigmoid(x)`. - Source: "Swish: a Self-Gated Activation Function" (Ramachandran et al. 2017) + Source: "Searching for Activation Functions" (Ramachandran et al. 2017) https://arxiv.org/abs/1710.05941 Args: |