aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/training/learning_rate_decay.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/training/learning_rate_decay.py')
-rw-r--r--tensorflow/python/training/learning_rate_decay.py432
1 files changed, 114 insertions, 318 deletions
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index fd195a7965..29b5465321 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -17,19 +17,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-import math
-
from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.training import learning_rate_decay_v2
from tensorflow.python.util.tf_export import tf_export
-@tf_export("train.exponential_decay")
+@tf_export(v1=["train.exponential_decay"])
def exponential_decay(learning_rate,
global_step,
decay_steps,
@@ -95,32 +88,19 @@ def exponential_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for exponential_decay.")
- with ops.name_scope(
- name, "ExponentialDecay",
- [learning_rate, global_step, decay_steps, decay_rate]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- decay_rate = math_ops.cast(decay_rate, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- p = global_step_recomp / decay_steps
- if staircase:
- p = math_ops.floor(p)
- return math_ops.multiply(
- learning_rate, math_ops.pow(decay_rate, p), name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.piecewise_constant")
+ decayed_lr = learning_rate_decay_v2.exponential_decay(learning_rate,
+ global_step,
+ decay_steps,
+ decay_rate,
+ staircase=staircase,
+ name=name)
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.piecewise_constant"])
def piecewise_constant(x, boundaries, values, name=None):
"""Piecewise constant from boundaries and interval values.
@@ -163,58 +143,15 @@ def piecewise_constant(x, boundaries, values, name=None):
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if len(boundaries) != len(values) - 1:
- raise ValueError(
- "The length of boundaries should be 1 less than the length of values")
- with ops.name_scope(name, "PiecewiseConstant",
- [x, boundaries, values, name]) as name:
- boundaries = ops.convert_n_to_tensor(boundaries)
- values = ops.convert_n_to_tensor(values)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- x_recomp = ops.convert_to_tensor(x)
- # Avoid explicit conversion to x's dtype. This could result in faulty
- # comparisons, for example if floats are converted to integers.
- for i, b in enumerate(boundaries):
- if b.dtype.base_dtype != x_recomp.dtype.base_dtype:
- # We can promote int32 boundaries to int64 without loss of precision.
- # This covers the most common case where the user passes in boundaries
- # as an array of Python integers.
- if (b.dtype.base_dtype == dtypes.int32 and
- x_recomp.dtype.base_dtype == dtypes.int64):
- b = math_ops.cast(b, x_recomp.dtype.base_dtype)
- boundaries[i] = b
- else:
- raise ValueError(
- "Boundaries (%s) must have the same dtype as x (%s)." %
- (b.dtype.base_dtype, x_recomp.dtype.base_dtype))
- # TODO(rdipietro): Ensure that boundaries' elements strictly increases.
- for v in values[1:]:
- if v.dtype.base_dtype != values[0].dtype.base_dtype:
- raise ValueError(
- "Values must have elements all with the same dtype (%s vs %s)." %
- (values[0].dtype.base_dtype, v.dtype.base_dtype))
- pred_fn_pairs = []
- pred_fn_pairs.append((x_recomp <= boundaries[0], lambda: values[0]))
- pred_fn_pairs.append((x_recomp > boundaries[-1], lambda: values[-1]))
- for low, high, v in zip(boundaries[:-1], boundaries[1:], values[1:-1]):
- # Need to bind v here; can do this with lambda v=v: ...
- pred = (x_recomp > low) & (x_recomp <= high)
- pred_fn_pairs.append((pred, lambda v=v: v))
-
- # The default isn't needed here because our conditions are mutually
- # exclusive and exhaustive, but tf.case requires it.
- default = lambda: values[0]
- return control_flow_ops.case(pred_fn_pairs, default, exclusive=True)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.polynomial_decay")
+ decayed_lr = learning_rate_decay_v2.piecewise_constant(x, boundaries, values,
+ name=name)
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.polynomial_decay"])
def polynomial_decay(learning_rate,
global_step,
decay_steps,
@@ -299,46 +236,22 @@ def polynomial_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for polynomial_decay.")
- with ops.name_scope(
- name, "PolynomialDecay",
- [learning_rate, global_step, decay_steps, end_learning_rate, power
- ]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- end_learning_rate = math_ops.cast(end_learning_rate, dtype)
- power = math_ops.cast(power, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- decay_steps_recomp = math_ops.cast(decay_steps, dtype)
- if cycle:
- # Find the first multiple of decay_steps that is bigger than
- # global_step. If global_step is zero set the multiplier to 1
- multiplier = control_flow_ops.cond(
- math_ops.equal(global_step_recomp, 0), lambda: 1.0,
- lambda: math_ops.ceil(global_step_recomp / decay_steps))
- decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
- else:
- # Make sure that the global_step used is not bigger than decay_steps.
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
-
- p = math_ops.div(global_step_recomp, decay_steps_recomp)
- return math_ops.add(
- math_ops.multiply(learning_rate - end_learning_rate,
- math_ops.pow(1 - p, power)),
- end_learning_rate,
- name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.natural_exp_decay")
+ decayed_lr = learning_rate_decay_v2.polynomial_decay(
+ learning_rate,
+ global_step,
+ decay_steps,
+ end_learning_rate=end_learning_rate,
+ power=power,
+ cycle=cycle,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.natural_exp_decay"])
def natural_exp_decay(learning_rate,
global_step,
decay_steps,
@@ -410,32 +323,17 @@ def natural_exp_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for natural_exp_decay.")
- with ops.name_scope(name, "NaturalExpDecay",
- [learning_rate, global_step, decay_rate]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- decay_rate = math_ops.cast(decay_rate, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- p = global_step_recomp / decay_steps
- if staircase:
- p = math_ops.floor(p)
- exponent = math_ops.exp(
- math_ops.multiply(math_ops.negative(decay_rate), p))
- return math_ops.multiply(learning_rate, exponent, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.inverse_time_decay")
+ decayed_lr = learning_rate_decay_v2.natural_exp_decay(
+ learning_rate, global_step, decay_steps, decay_rate, staircase=staircase,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.inverse_time_decay"])
def inverse_time_decay(learning_rate,
global_step,
decay_steps,
@@ -507,32 +405,21 @@ def inverse_time_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("global_step is required for inverse_time_decay.")
- with ops.name_scope(name, "InverseTimeDecay",
- [learning_rate, global_step, decay_rate]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- decay_rate = math_ops.cast(decay_rate, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- p = global_step_recomp / decay_steps
- if staircase:
- p = math_ops.floor(p)
- const = math_ops.cast(constant_op.constant(1), dtype)
- denom = math_ops.add(const, math_ops.multiply(decay_rate, p))
- return math_ops.div(learning_rate, denom, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.cosine_decay")
+ decayed_lr = learning_rate_decay_v2.inverse_time_decay(
+ learning_rate,
+ global_step,
+ decay_steps,
+ decay_rate,
+ staircase=staircase,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.cosine_decay"])
def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):
"""Applies cosine decay to the learning rate.
@@ -581,32 +468,16 @@ def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("cosine decay requires global_step")
- with ops.name_scope(name, "CosineDecay",
- [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
- completed_fraction = global_step_recomp / decay_steps
- cosine_decayed = 0.5 * (1.0 + math_ops.cos(
- constant_op.constant(math.pi) * completed_fraction))
-
- decayed = (1 - alpha) * cosine_decayed + alpha
- return math_ops.multiply(learning_rate, decayed)
+ decayed_lr = learning_rate_decay_v2.cosine_decay(
+ learning_rate, global_step, decay_steps, alpha=alpha, name=name)
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
- return decayed_lr
+ return decayed_lr
-@tf_export("train.cosine_decay_restarts")
+@tf_export(v1=["train.cosine_decay_restarts"])
def cosine_decay_restarts(learning_rate,
global_step,
first_decay_steps,
@@ -664,57 +535,22 @@ def cosine_decay_restarts(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("cosine decay restarts requires global_step")
- with ops.name_scope(name, "SGDRDecay", [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(
- learning_rate, name="initial_learning_rate")
- dtype = learning_rate.dtype
- first_decay_steps = math_ops.cast(first_decay_steps, dtype)
- alpha = math_ops.cast(alpha, dtype)
- t_mul = math_ops.cast(t_mul, dtype)
- m_mul = math_ops.cast(m_mul, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- completed_fraction = global_step_recomp / first_decay_steps
-
- def compute_step(completed_fraction, geometric=False):
- """Helper for `cond` operation."""
- if geometric:
- i_restart = math_ops.floor(
- math_ops.log(1.0 - completed_fraction * (1.0 - t_mul)) /
- math_ops.log(t_mul))
-
- sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul)
- completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart
-
- else:
- i_restart = math_ops.floor(completed_fraction)
- completed_fraction -= i_restart
+ decayed_lr = learning_rate_decay_v2.cosine_decay_restarts(
+ learning_rate,
+ global_step,
+ first_decay_steps,
+ t_mul=t_mul,
+ m_mul=m_mul,
+ alpha=alpha,
+ name=name)
- return i_restart, completed_fraction
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
- i_restart, completed_fraction = control_flow_ops.cond(
- math_ops.equal(t_mul, 1.0),
- lambda: compute_step(completed_fraction, geometric=False),
- lambda: compute_step(completed_fraction, geometric=True))
+ return decayed_lr
- m_fac = m_mul**i_restart
- cosine_decayed = 0.5 * m_fac * (1.0 + math_ops.cos(
- constant_op.constant(math.pi) * completed_fraction))
- decayed = (1 - alpha) * cosine_decayed + alpha
- return math_ops.multiply(learning_rate, decayed, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.linear_cosine_decay")
+@tf_export(v1=["train.linear_cosine_decay"])
def linear_cosine_decay(learning_rate,
global_step,
decay_steps,
@@ -781,37 +617,22 @@ def linear_cosine_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("linear cosine decay requires global_step")
- with ops.name_scope(name, "LinearCosineDecay",
- [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- num_periods = math_ops.cast(num_periods, dtype)
- alpha = math_ops.cast(alpha, dtype)
- beta = math_ops.cast(beta, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
- linear_decayed = (decay_steps - global_step_recomp) / decay_steps
- completed_fraction = global_step_recomp / decay_steps
- fraction = 2.0 * num_periods * completed_fraction
- cosine_decayed = 0.5 * (
- 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
-
- linear_cosine_decayed = (alpha + linear_decayed) * cosine_decayed + beta
- return math_ops.multiply(learning_rate, linear_cosine_decayed, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
-
-
-@tf_export("train.noisy_linear_cosine_decay")
+ decayed_lr = learning_rate_decay_v2.linear_cosine_decay(
+ learning_rate,
+ global_step,
+ decay_steps,
+ num_periods=num_periods,
+ alpha=alpha,
+ beta=beta,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr
+
+
+@tf_export(v1=["train.noisy_linear_cosine_decay"])
def noisy_linear_cosine_decay(learning_rate,
global_step,
decay_steps,
@@ -886,42 +707,17 @@ def noisy_linear_cosine_decay(learning_rate,
the learning rate value across different invocations of optimizer functions.
@end_compatibility
"""
- if global_step is None:
- raise ValueError("noisy linear cosine decay requires global_step")
- with ops.name_scope(name, "NoisyLinearCosineDecay",
- [learning_rate, global_step]) as name:
- learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
- dtype = learning_rate.dtype
- decay_steps = math_ops.cast(decay_steps, dtype)
- initial_variance = math_ops.cast(initial_variance, dtype)
- variance_decay = math_ops.cast(variance_decay, dtype)
- num_periods = math_ops.cast(num_periods, dtype)
- alpha = math_ops.cast(alpha, dtype)
- beta = math_ops.cast(beta, dtype)
-
- def decayed_lr():
- """Helper to recompute learning rate; most helpful in eager-mode."""
- global_step_recomp = math_ops.cast(global_step, dtype)
- global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
- linear_decayed = (decay_steps - global_step_recomp) / decay_steps
- variance = initial_variance / (
- math_ops.pow(1.0 + global_step_recomp, variance_decay))
- std = math_ops.sqrt(variance)
- noisy_linear_decayed = (
- linear_decayed + random_ops.random_normal(
- linear_decayed.shape, stddev=std))
-
- completed_fraction = global_step_recomp / decay_steps
- fraction = 2.0 * num_periods * completed_fraction
- cosine_decayed = 0.5 * (
- 1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
- noisy_linear_cosine_decayed = (
- (alpha + noisy_linear_decayed) * cosine_decayed + beta)
-
- return math_ops.multiply(
- learning_rate, noisy_linear_cosine_decayed, name=name)
-
- if not context.executing_eagerly():
- decayed_lr = decayed_lr()
-
- return decayed_lr
+ decayed_lr = learning_rate_decay_v2.noisy_linear_cosine_decay(
+ learning_rate, global_step,
+ decay_steps,
+ initial_variance=initial_variance,
+ variance_decay=variance_decay,
+ num_periods=num_periods,
+ alpha=alpha,
+ beta=beta,
+ name=name)
+
+ if not context.executing_eagerly():
+ decayed_lr = decayed_lr()
+
+ return decayed_lr