From 76ab96c8a5b2d77dfc191c94ff54fd5e52c561f2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 13:31:52 -0700 Subject: Changed Adam algorithm variant formula from sqrt(max(v, epsilon**2)) to sqrt(v + epsilon**2) and changed flag name accordingly. PiperOrigin-RevId: 216240045 --- tensorflow/contrib/tpu/proto/optimization_parameters.proto | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow/contrib') diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto index 8529b48c15..c2e3be03db 100644 --- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto +++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto @@ -62,9 +62,9 @@ message FtrlParameters { // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in // order to get correct results; a warning will be printed otherwise (which may -// change to an error in the future). If use_max_with_epsilon is set, the Adam +// change to an error in the future). If use_sum_inside_sqrt is set, the Adam // variable update formula will be changed from m / (sqrt(v) + epsilon) to -// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU +// m / sqrt(v + epsilon**2); this option improves the performance of TPU // training and is not expected to harm model quality. message AdamParameters { float beta1 = 3; @@ -73,7 +73,7 @@ message AdamParameters { float initial_m = 6; float initial_v = 7; bool use_non_lazy_adam = 8; - bool use_max_with_epsilon = 9; + bool use_sum_inside_sqrt = 10; } // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer -- cgit v1.2.3