1 files changed, 114 insertions, 318 deletions
diff --git a/tensorflow/python/training/learning_rate_decay.py b/tensorflow/python/training/learning_rate_decay.py
index fd195a7965..29b5465321 100644
--- a/tensorflow/python/training/learning_rate_decay.py
+++ b/tensorflow/python/training/learning_rate_decay.py
@@ -17,19 +17,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import math
-
 from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
+from tensorflow.python.training import learning_rate_decay_v2
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("train.exponential_decay")
+@tf_export(v1=["train.exponential_decay"])
 def exponential_decay(learning_rate,
                       global_step,
                       decay_steps,
@@ -95,32 +88,19 @@ def exponential_decay(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("global_step is required for exponential_decay.")
-  with ops.name_scope(
-      name, "ExponentialDecay",
-      [learning_rate, global_step, decay_steps, decay_rate]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    decay_steps = math_ops.cast(decay_steps, dtype)
-    decay_rate = math_ops.cast(decay_rate, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      p = global_step_recomp / decay_steps
-      if staircase:
-        p = math_ops.floor(p)
-      return math_ops.multiply(
-          learning_rate, math_ops.pow(decay_rate, p), name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.piecewise_constant")
+  decayed_lr = learning_rate_decay_v2.exponential_decay(learning_rate,
+                                                        global_step,
+                                                        decay_steps,
+                                                        decay_rate,
+                                                        staircase=staircase,
+                                                        name=name)
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr
+
+
+@tf_export(v1=["train.piecewise_constant"])
 def piecewise_constant(x, boundaries, values, name=None):
   """Piecewise constant from boundaries and interval values.
 
@@ -163,58 +143,15 @@ def piecewise_constant(x, boundaries, values, name=None):
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if len(boundaries) != len(values) - 1:
-    raise ValueError(
-        "The length of boundaries should be 1 less than the length of values")
-  with ops.name_scope(name, "PiecewiseConstant",
-                      [x, boundaries, values, name]) as name:
-    boundaries = ops.convert_n_to_tensor(boundaries)
-    values = ops.convert_n_to_tensor(values)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      x_recomp = ops.convert_to_tensor(x)
-      # Avoid explicit conversion to x's dtype. This could result in faulty
-      # comparisons, for example if floats are converted to integers.
-      for i, b in enumerate(boundaries):
-        if b.dtype.base_dtype != x_recomp.dtype.base_dtype:
-          # We can promote int32 boundaries to int64 without loss of precision.
-          # This covers the most common case where the user passes in boundaries
-          # as an array of Python integers.
-          if (b.dtype.base_dtype == dtypes.int32 and
-              x_recomp.dtype.base_dtype == dtypes.int64):
-            b = math_ops.cast(b, x_recomp.dtype.base_dtype)
-            boundaries[i] = b
-          else:
-            raise ValueError(
-                "Boundaries (%s) must have the same dtype as x (%s)." %
-                (b.dtype.base_dtype, x_recomp.dtype.base_dtype))
-      # TODO(rdipietro): Ensure that boundaries' elements strictly increases.
-      for v in values[1:]:
-        if v.dtype.base_dtype != values[0].dtype.base_dtype:
-          raise ValueError(
-              "Values must have elements all with the same dtype (%s vs %s)." %
-              (values[0].dtype.base_dtype, v.dtype.base_dtype))
-      pred_fn_pairs = []
-      pred_fn_pairs.append((x_recomp <= boundaries[0], lambda: values[0]))
-      pred_fn_pairs.append((x_recomp > boundaries[-1], lambda: values[-1]))
-      for low, high, v in zip(boundaries[:-1], boundaries[1:], values[1:-1]):
-        # Need to bind v here; can do this with lambda v=v: ...
-        pred = (x_recomp > low) & (x_recomp <= high)
-        pred_fn_pairs.append((pred, lambda v=v: v))
-
-      # The default isn't needed here because our conditions are mutually
-      # exclusive and exhaustive, but tf.case requires it.
-      default = lambda: values[0]
-      return control_flow_ops.case(pred_fn_pairs, default, exclusive=True)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.polynomial_decay")
+  decayed_lr = learning_rate_decay_v2.piecewise_constant(x, boundaries, values,
+                                                         name=name)
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr
+
+
+@tf_export(v1=["train.polynomial_decay"])
 def polynomial_decay(learning_rate,
                      global_step,
                      decay_steps,
@@ -299,46 +236,22 @@ def polynomial_decay(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("global_step is required for polynomial_decay.")
-  with ops.name_scope(
-      name, "PolynomialDecay",
-      [learning_rate, global_step, decay_steps, end_learning_rate, power
-      ]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    end_learning_rate = math_ops.cast(end_learning_rate, dtype)
-    power = math_ops.cast(power, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      decay_steps_recomp = math_ops.cast(decay_steps, dtype)
-      if cycle:
-        # Find the first multiple of decay_steps that is bigger than
-        # global_step. If global_step is zero set the multiplier to 1
-        multiplier = control_flow_ops.cond(
-            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
-            lambda: math_ops.ceil(global_step_recomp / decay_steps))
-        decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
-      else:
-        # Make sure that the global_step used is not bigger than decay_steps.
-        global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
-
-      p = math_ops.div(global_step_recomp, decay_steps_recomp)
-      return math_ops.add(
-          math_ops.multiply(learning_rate - end_learning_rate,
-                            math_ops.pow(1 - p, power)),
-          end_learning_rate,
-          name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.natural_exp_decay")
+  decayed_lr = learning_rate_decay_v2.polynomial_decay(
+      learning_rate,
+      global_step,
+      decay_steps,
+      end_learning_rate=end_learning_rate,
+      power=power,
+      cycle=cycle,
+      name=name)
+
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr
+
+
+@tf_export(v1=["train.natural_exp_decay"])
 def natural_exp_decay(learning_rate,
                       global_step,
                       decay_steps,
@@ -410,32 +323,17 @@ def natural_exp_decay(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("global_step is required for natural_exp_decay.")
-  with ops.name_scope(name, "NaturalExpDecay",
-                      [learning_rate, global_step, decay_rate]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    decay_steps = math_ops.cast(decay_steps, dtype)
-    decay_rate = math_ops.cast(decay_rate, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      p = global_step_recomp / decay_steps
-      if staircase:
-        p = math_ops.floor(p)
-      exponent = math_ops.exp(
-          math_ops.multiply(math_ops.negative(decay_rate), p))
-      return math_ops.multiply(learning_rate, exponent, name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.inverse_time_decay")
+  decayed_lr = learning_rate_decay_v2.natural_exp_decay(
+      learning_rate, global_step, decay_steps, decay_rate, staircase=staircase,
+      name=name)
+
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr
+
+
+@tf_export(v1=["train.inverse_time_decay"])
 def inverse_time_decay(learning_rate,
                        global_step,
                        decay_steps,
@@ -507,32 +405,21 @@ def inverse_time_decay(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("global_step is required for inverse_time_decay.")
-  with ops.name_scope(name, "InverseTimeDecay",
-                      [learning_rate, global_step, decay_rate]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    decay_steps = math_ops.cast(decay_steps, dtype)
-    decay_rate = math_ops.cast(decay_rate, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      p = global_step_recomp / decay_steps
-      if staircase:
-        p = math_ops.floor(p)
-      const = math_ops.cast(constant_op.constant(1), dtype)
-      denom = math_ops.add(const, math_ops.multiply(decay_rate, p))
-      return math_ops.div(learning_rate, denom, name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.cosine_decay")
+  decayed_lr = learning_rate_decay_v2.inverse_time_decay(
+      learning_rate,
+      global_step,
+      decay_steps,
+      decay_rate,
+      staircase=staircase,
+      name=name)
+
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr
+
+
+@tf_export(v1=["train.cosine_decay"])
 def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):
   """Applies cosine decay to the learning rate.
 
@@ -581,32 +468,16 @@ def cosine_decay(learning_rate, global_step, decay_steps, alpha=0.0, name=None):
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("cosine decay requires global_step")
-  with ops.name_scope(name, "CosineDecay",
-                      [learning_rate, global_step]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    decay_steps = math_ops.cast(decay_steps, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
-      completed_fraction = global_step_recomp / decay_steps
-      cosine_decayed = 0.5 * (1.0 + math_ops.cos(
-          constant_op.constant(math.pi) * completed_fraction))
-
-      decayed = (1 - alpha) * cosine_decayed + alpha
-      return math_ops.multiply(learning_rate, decayed)
+  decayed_lr = learning_rate_decay_v2.cosine_decay(
+      learning_rate, global_step, decay_steps, alpha=alpha, name=name)
 
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
 
-    return decayed_lr
+  return decayed_lr
 
 
-@tf_export("train.cosine_decay_restarts")
+@tf_export(v1=["train.cosine_decay_restarts"])
 def cosine_decay_restarts(learning_rate,
                           global_step,
                           first_decay_steps,
@@ -664,57 +535,22 @@ def cosine_decay_restarts(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("cosine decay restarts requires global_step")
-  with ops.name_scope(name, "SGDRDecay", [learning_rate, global_step]) as name:
-    learning_rate = ops.convert_to_tensor(
-        learning_rate, name="initial_learning_rate")
-    dtype = learning_rate.dtype
-    first_decay_steps = math_ops.cast(first_decay_steps, dtype)
-    alpha = math_ops.cast(alpha, dtype)
-    t_mul = math_ops.cast(t_mul, dtype)
-    m_mul = math_ops.cast(m_mul, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      completed_fraction = global_step_recomp / first_decay_steps
-
-      def compute_step(completed_fraction, geometric=False):
-        """Helper for `cond` operation."""
-        if geometric:
-          i_restart = math_ops.floor(
-              math_ops.log(1.0 - completed_fraction * (1.0 - t_mul)) /
-              math_ops.log(t_mul))
-
-          sum_r = (1.0 - t_mul**i_restart) / (1.0 - t_mul)
-          completed_fraction = (completed_fraction - sum_r) / t_mul**i_restart
-
-        else:
-          i_restart = math_ops.floor(completed_fraction)
-          completed_fraction -= i_restart
+  decayed_lr = learning_rate_decay_v2.cosine_decay_restarts(
+      learning_rate,
+      global_step,
+      first_decay_steps,
+      t_mul=t_mul,
+      m_mul=m_mul,
+      alpha=alpha,
+      name=name)
 
-        return i_restart, completed_fraction
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
 
-      i_restart, completed_fraction = control_flow_ops.cond(
-          math_ops.equal(t_mul, 1.0),
-          lambda: compute_step(completed_fraction, geometric=False),
-          lambda: compute_step(completed_fraction, geometric=True))
+  return decayed_lr
 
-      m_fac = m_mul**i_restart
-      cosine_decayed = 0.5 * m_fac * (1.0 + math_ops.cos(
-          constant_op.constant(math.pi) * completed_fraction))
-      decayed = (1 - alpha) * cosine_decayed + alpha
 
-      return math_ops.multiply(learning_rate, decayed, name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.linear_cosine_decay")
+@tf_export(v1=["train.linear_cosine_decay"])
 def linear_cosine_decay(learning_rate,
                         global_step,
                         decay_steps,
@@ -781,37 +617,22 @@ def linear_cosine_decay(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("linear cosine decay requires global_step")
-  with ops.name_scope(name, "LinearCosineDecay",
-                      [learning_rate, global_step]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    decay_steps = math_ops.cast(decay_steps, dtype)
-    num_periods = math_ops.cast(num_periods, dtype)
-    alpha = math_ops.cast(alpha, dtype)
-    beta = math_ops.cast(beta, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
-      linear_decayed = (decay_steps - global_step_recomp) / decay_steps
-      completed_fraction = global_step_recomp / decay_steps
-      fraction = 2.0 * num_periods * completed_fraction
-      cosine_decayed = 0.5 * (
-          1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
-
-      linear_cosine_decayed = (alpha + linear_decayed) * cosine_decayed + beta
-      return math_ops.multiply(learning_rate, linear_cosine_decayed, name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
-
-
-@tf_export("train.noisy_linear_cosine_decay")
+  decayed_lr = learning_rate_decay_v2.linear_cosine_decay(
+      learning_rate,
+      global_step,
+      decay_steps,
+      num_periods=num_periods,
+      alpha=alpha,
+      beta=beta,
+      name=name)
+
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr
+
+
+@tf_export(v1=["train.noisy_linear_cosine_decay"])
 def noisy_linear_cosine_decay(learning_rate,
                               global_step,
                               decay_steps,
@@ -886,42 +707,17 @@ def noisy_linear_cosine_decay(learning_rate,
   the learning rate value across different invocations of optimizer functions.
   @end_compatibility
   """
-  if global_step is None:
-    raise ValueError("noisy linear cosine decay requires global_step")
-  with ops.name_scope(name, "NoisyLinearCosineDecay",
-                      [learning_rate, global_step]) as name:
-    learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
-    dtype = learning_rate.dtype
-    decay_steps = math_ops.cast(decay_steps, dtype)
-    initial_variance = math_ops.cast(initial_variance, dtype)
-    variance_decay = math_ops.cast(variance_decay, dtype)
-    num_periods = math_ops.cast(num_periods, dtype)
-    alpha = math_ops.cast(alpha, dtype)
-    beta = math_ops.cast(beta, dtype)
-
-    def decayed_lr():
-      """Helper to recompute learning rate; most helpful in eager-mode."""
-      global_step_recomp = math_ops.cast(global_step, dtype)
-      global_step_recomp = math_ops.minimum(global_step_recomp, decay_steps)
-      linear_decayed = (decay_steps - global_step_recomp) / decay_steps
-      variance = initial_variance / (
-          math_ops.pow(1.0 + global_step_recomp, variance_decay))
-      std = math_ops.sqrt(variance)
-      noisy_linear_decayed = (
-          linear_decayed + random_ops.random_normal(
-              linear_decayed.shape, stddev=std))
-
-      completed_fraction = global_step_recomp / decay_steps
-      fraction = 2.0 * num_periods * completed_fraction
-      cosine_decayed = 0.5 * (
-          1.0 + math_ops.cos(constant_op.constant(math.pi) * fraction))
-      noisy_linear_cosine_decayed = (
-          (alpha + noisy_linear_decayed) * cosine_decayed + beta)
-
-      return math_ops.multiply(
-          learning_rate, noisy_linear_cosine_decayed, name=name)
-
-    if not context.executing_eagerly():
-      decayed_lr = decayed_lr()
-
-    return decayed_lr
+  decayed_lr = learning_rate_decay_v2.noisy_linear_cosine_decay(
+      learning_rate, global_step,
+      decay_steps,
+      initial_variance=initial_variance,
+      variance_decay=variance_decay,
+      num_periods=num_periods,
+      alpha=alpha,
+      beta=beta,
+      name=name)
+
+  if not context.executing_eagerly():
+    decayed_lr = decayed_lr()
+
+  return decayed_lr