aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/training/learning_rate_decay.py
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-09-05 17:13:24 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-05 17:17:21 -0700
commit017599d0a1fa7a7227a43649db67e96311033a4e (patch)
treedc4b00270acc53b2e098022fb03e4b54cb40ab9e /tensorflow/python/training/learning_rate_decay.py
parente7b37766f53d5d9d976f2ba3046d3df3333c8ebb (diff)
This CL changes the graph-mode API of the learning_rate_decay functions in TF 2.0 to return a no-arg callable to output a learning rate, instead of directly outputting a learning rate tensor.
This brings the graph mode API in line with the eager execution API, where this change was made to allow changing the learning rate value across different invocations of optimizer functions. PiperOrigin-RevId: 211726295
Diffstat (limited to 'tensorflow/python/training/learning_rate_decay.py')
-rw-r--r--tensorflow/python/training/learning_rate_decay.py432
1 files changed, 114 insertions, 318 deletions
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index fd195a7965..29b5465321 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -17,19 +17,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import math
-
from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.training import learning_rate_decay_v2
from tensorflow.python.util.tf_export import tf_export
-@tf_export("train.exponential_decay")
+@tf_export(v1=["train.exponential_decay"])
def exponential_decay(learning_rate,
global_step,
decay_steps,
@@ -95,32 +88,19 @@ def exponential_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for exponential_decay.")
- with ops.name_scope(
- name, "ExponentialDecay",
- [learning_rate, global_step, decay_steps, decay_rate]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- decay_rate = math_ops.cast(decay_rate, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- p = global_step_recomp / decay_steps
- if staircase:
- p = math_ops.floor(p)
- return math_ops.multiply(
- learning_rate, math_ops.pow(decay_rate, p), name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.piecewise_constant")
+ decayed_lr = learning_rate_decay_v2.exponential_decay(learning_rate,
+ global_step,
+ decay_steps,
+ decay_rate,
+ staircase=staircase,
+ name=name)
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.piecewise_constant"])
def piecewise_constant(x, boundaries, values, name=None):
"""Piecewise constant from boundaries and interval values.
@@ -163,58 +143,15 @@ def piecewise_constant(x, boundaries, values, name=None):
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if len(boundaries) != len(values) - 1:
- raise ValueError(
- "The length of boundaries should be 1 less than the length of values")
- with ops.name_scope(name, "PiecewiseConstant",
- [x, boundaries, values, name]) as name:
- boundaries = ops.convert_n_to_tensor(boundaries)
- values = ops.convert_n_to_tensor(values)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- x_recomp = ops.convert_to_tensor(x)
- # Avoid explicit conversion to x's dtype. This could result in faulty
- # comparisons, for example if floats are converted to integers.
- for i, b in enumerate(boundaries):
- if b.dtype.base_dtype != x_recomp.dtype.base_dtype:
- # We can promote int32 boundaries to int64 without loss of precision.
- # This covers the most common case where the user passes in boundaries
- # as an array of Python integers.
- if (b.dtype.base_dtype == dtypes.int32 and
- x_recomp.dtype.base_dtype == dtypes.int64):
- b = math_ops.cast(b, x_recomp.dtype.base_dtype)
- boundaries[i] = b
- else:
- raise ValueError(
- "Boundaries (%s) must have the same dtype as x (%s)." %
- (b.dtype.base_dtype, x_recomp.dtype.base_dtype))
- # TODO(rdipietro): Ensure that boundaries' elements strictly increases.
- for v in values[1:]:
- if v.dtype.base_dtype != values[0].dtype.base_dtype:
- raise ValueError(
- "Values must have elements all with the same dtype (%s vs %s)." %
- (values[0].dtype.base_dtype, v.dtype.base_dtype))
- pred_fn_pairs = []
- pred_fn_pairs.append((x_recomp <= boundaries[0], lambda: values[0]))
- pred_fn_pairs.append((x_recomp > boundaries[-1], lambda: values[-1]))
- for low, high, v in zip(boundaries[:-1], boundaries[1:], values[1:-1]):
- # Need to bind v here; can do this with lambda v=v: ...
- pred = (x_recomp > low) & (x_recomp <= high)
- pred_fn_pairs.append((pred, lambda v=v: v))
-
- # The default isn't needed here because our conditions are mutually
- # exclusive and exhaustive, but tf.case requires it.
- default = lambda: values[0]
- return control_flow_ops.case(pred_fn_pairs, default, exclusive=True)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.polynomial_decay")
+ decayed_lr = learning_rate_decay_v2.piecewise_constant(x, boundaries, values,
+ name=name)
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.polynomial_decay"])
def polynomial_decay(learning_rate,
global_step,
decay_steps,
@@ -299,46 +236,22 @@ def polynomial_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for polynomial_decay.")
- with ops.name_scope(
- name, "PolynomialDecay",
- [learning_rate, global_step, decay_steps, end_learning_rate, power
- ]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- end_learning_rate = math_ops.cast(end_learning_rate, dtype)
- power = math_ops.cast(power, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- decay_steps_recomp = math_ops.cast(decay_steps, dtype)
- if cycle:
- # Find the first multiple of decay_steps that is bigger than
- # global_step. If global_step is zero set the multiplier to 1
- multiplier = control_flow_ops.cond(
- math_ops.equal(global_step_recomp, 0), lambda: 1.0,
- lambda: math_ops.ceil(global_step_recomp / decay_steps))
- decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
- else:
- # Make sure that the global_step used is not bigger than decay_steps.
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
-
- p = math_ops.div(global_step_recomp, decay_steps_recomp)
- return math_ops.add(
- math_ops.multiply(learning_rate - end_learning_rate,
- math_ops.pow(1 - p, power)),
- end_learning_rate,
- name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.natural_exp_decay")
+ decayed_lr = learning_rate_decay_v2.polynomial_decay(
+ learning_rate,
+ global_step,
+ decay_steps,
+ end_learning_rate=end_learning_rate,
+ power=power,
+ cycle=cycle,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.natural_exp_decay"])
def natural_exp_decay(learning_rate,
global_step,
decay_steps,
@@ -410,32 +323,17 @@ def natural_exp_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for natural_exp_decay.")
- with ops.name_scope(name, "NaturalExpDecay",
- [learning_rate, global_step, decay_rate]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- decay_rate = math_ops.cast(decay_rate, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- p = global_step_recomp / decay_steps
- if staircase:
- p = math_ops.floor(p)
- exponent = math_ops.exp(
- math_ops.multiply(math_ops.negative(decay_rate), p))
- return math_ops.multiply(learning_rate, exponent, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.inverse_time_decay")
+ decayed_lr = learning_rate_decay_v2.natural_exp_decay(
+ learning_rate, global_step, decay_steps, decay_rate, staircase=staircase,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.inverse_time_decay"])
def inverse_time_decay(learning_rate,
global_step,
decay_steps,
@@ -507,32 +405,21 @@ def inverse_time_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for inverse_time_decay.")
- with ops.name_scope(name, "InverseTimeDecay",
- [learning_rate, global_step, decay_rate]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- decay_rate = math_ops.cast(decay_rate, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- p = global_step_recomp / decay_steps
- if staircase:
- p = math_ops.floor(p)
- const = math_ops.cast(constant_op.constant(1), dtype)
- denom = math_ops.add(const, math_ops.multiply(decay_rate, p))
- return math_ops.div(learning_rate, denom, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.cosine_decay")
+ decayed_lr = learning_rate_decay_v2.inverse_time_decay(
+ learning_rate,
+ global_step,
+ decay_steps,
+ decay_rate,
+ staircase=staircase,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.cosine_decay"])
def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):
"""Applies cosine decay to the learning rate.
@@ -581,32 +468,16 @@ def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("cosine decay requires global_step")
- with ops.name_scope(name, "CosineDecay",
- [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
- completed_fraction = global_step_recomp / decay_steps
- cosine_decayed = 0.5 * (1.0 + math_ops.cos(
- constant_op.constant(math.pi) * completed_fraction))
-
- decayed = (1 - alpha) * cosine_decayed + alpha
- return math_ops.multiply(learning_rate, decayed)
+ decayed_lr = learning_rate_decay_v2.cosine_decay(
+ learning_rate, global_step, decay_steps, alpha=alpha, name=name)
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
- return decayed_lr
+ return decayed_lr
-@tf_export("train.cosine_decay_restarts")
+@tf_export(v1=["train.cosine_decay_restarts"])
def cosine_decay_restarts(learning_rate,
global_step,
first_decay_steps,
@@ -664,57 +535,22 @@ def cosine_decay_restarts(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("cosine decay restarts requires global_step")
- with ops.name_scope(name, "SGDRDecay", [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(
- learning_rate, name="initial_learning_rate")
- dtype = learning_rate.dtype
- first_decay_steps = math_ops.cast(first_decay_steps, dtype)
- alpha = math_ops.cast(alpha, dtype)
- t_mul = math_ops.cast(t_mul, dtype)
- m_mul = math_ops.cast(m_mul, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- completed_fraction = global_step_recomp / first_decay_steps
-
- def compute_step(completed_fraction, geometric=False):
- """Helper for `cond` operation."""
- if geometric:
- i_restart = math_ops.floor(
- math_ops.log(1.0 - completed_fraction * (1.0 - t_mul)) /
- math_ops.log(t_mul))
-
- sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul)
- completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart
-
- else:
- i_restart = math_ops.floor(completed_fraction)
- completed_fraction -= i_restart
+ decayed_lr = learning_rate_decay_v2.cosine_decay_restarts(
+ learning_rate,
+ global_step,
+ first_decay_steps,
+ t_mul=t_mul,
+ m_mul=m_mul,
+ alpha=alpha,
+ name=name)
- return i_restart, completed_fraction
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
- i_restart, completed_fraction = control_flow_ops.cond(
- math_ops.equal(t_mul, 1.0),
- lambda: compute_step(completed_fraction, geometric=False),
- lambda: compute_step(completed_fraction, geometric=True))
+ return decayed_lr
- m_fac = m_mul**i_restart
- cosine_decayed = 0.5 * m_fac * (1.0 + math_ops.cos(
- constant_op.constant(math.pi) * completed_fraction))
- decayed = (1 - alpha) * cosine_decayed + alpha
- return math_ops.multiply(learning_rate, decayed, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.linear_cosine_decay")
+@tf_export(v1=["train.linear_cosine_decay"])
def linear_cosine_decay(learning_rate,
global_step,
decay_steps,
@@ -781,37 +617,22 @@ def linear_cosine_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("linear cosine decay requires global_step")
- with ops.name_scope(name, "LinearCosineDecay",
- [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- num_periods = math_ops.cast(num_periods, dtype)
- alpha = math_ops.cast(alpha, dtype)
- beta = math_ops.cast(beta, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
- linear_decayed = (decay_steps - global_step_recomp) / decay_steps
- completed_fraction = global_step_recomp / decay_steps
- fraction = 2.0 * num_periods * completed_fraction
- cosine_decayed = 0.5 * (
- 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
-
- linear_cosine_decayed = (alpha + linear_decayed) * cosine_decayed + beta
- return math_ops.multiply(learning_rate, linear_cosine_decayed, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.noisy_linear_cosine_decay")
+ decayed_lr = learning_rate_decay_v2.linear_cosine_decay(
+ learning_rate,
+ global_step,
+ decay_steps,
+ num_periods=num_periods,
+ alpha=alpha,
+ beta=beta,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.noisy_linear_cosine_decay"])
def noisy_linear_cosine_decay(learning_rate,
global_step,
decay_steps,
@@ -886,42 +707,17 @@ def noisy_linear_cosine_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("noisy linear cosine decay requires global_step")
- with ops.name_scope(name, "NoisyLinearCosineDecay",
- [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- initial_variance = math_ops.cast(initial_variance, dtype)
- variance_decay = math_ops.cast(variance_decay, dtype)
- num_periods = math_ops.cast(num_periods, dtype)
- alpha = math_ops.cast(alpha, dtype)
- beta = math_ops.cast(beta, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
- linear_decayed = (decay_steps - global_step_recomp) / decay_steps
- variance = initial_variance / (
- math_ops.pow(1.0 + global_step_recomp, variance_decay))
- std = math_ops.sqrt(variance)
- noisy_linear_decayed = (
- linear_decayed + random_ops.random_normal(
- linear_decayed.shape, stddev=std))
-
- completed_fraction = global_step_recomp / decay_steps
- fraction = 2.0 * num_periods * completed_fraction
- cosine_decayed = 0.5 * (
- 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
- noisy_linear_cosine_decayed = (
- (alpha + noisy_linear_decayed) * cosine_decayed + beta)
-
- return math_ops.multiply(
- learning_rate, noisy_linear_cosine_decayed, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
+ decayed_lr = learning_rate_decay_v2.noisy_linear_cosine_decay(
+ learning_rate, global_step,
+ decay_steps,
+ initial_variance=initial_variance,
+ variance_decay=variance_decay,
+ num_periods=num_periods,
+ alpha=alpha,
+ beta=beta,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr