aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-10-09 09:45:16 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-10-09 09:49:01 -0700
commit7e2b50d8490f573b470ca97bd06a4677830db738 (patch)
treed3dafdd491392581c5ffa3211aa07f2ae1bb10e4
parent4878a28ac3e5b63cd820c9aa13cb0c4f0025ec23 (diff)
Update docs of MomentumOptimizer about use_nesterov and of RMSProp about
momentum PiperOrigin-RevId: 171546603
-rw-r--r--tensorflow/python/training/momentum.py5
-rw-r--r--tensorflow/python/training/rmsprop.py2
2 files changed, 6 insertions, 1 deletions
diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py
index f34ff22f07..7c00e219fd 100644
--- a/tensorflow/python/training/momentum.py
+++ b/tensorflow/python/training/momentum.py
@@ -53,7 +53,10 @@ class MomentumOptimizer(optimizer.Optimizer):
gradients. Defaults to "Momentum".
use_nesterov: If `True` use Nesterov Momentum.
See [Sutskever et al., 2013](
- http://jmlr.org/proceedings/papers/v28/sutskever13.pdf)
+ http://jmlr.org/proceedings/papers/v28/sutskever13.pdf).
+ This implementation always computes gradients at the value of the
+ variable(s) passed to the optimizer. Using Nesterov Momentum makes the
+ variable(s) track the values called `theta_t + mu*v_t` in the paper.
"""
super(MomentumOptimizer, self).__init__(use_locking, name)
diff --git a/tensorflow/python/training/rmsprop.py b/tensorflow/python/training/rmsprop.py
index d046456c85..ebec725b7b 100644
--- a/tensorflow/python/training/rmsprop.py
+++ b/tensorflow/python/training/rmsprop.py
@@ -26,6 +26,8 @@ mean_square = decay * mean_square{t-1} + (1-decay) * gradient ** 2
mom = momentum * mom{t-1} + learning_rate * g_t / sqrt(mean_square + epsilon)
delta = - mom
+This implementation of RMSProp uses plain momentum, not Nesterov momentum.
+
The centered version additionally maintains a moving (discounted) average of the
gradients, and uses that average to estimate the variance: