Fix FTRL L2-shrinkage behavior: the gradient from the L2 shrinkage term should not end up in the accumulator.

PiperOrigin-RevId: 210648271
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-08-28 19:01:26 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-08-28 19:08:59 -0700
commit: 20d5683b826be03776978af3b8108fc3b5dc9cb8 (patch)
tree: 2742a2f95489ec391db1583a36d193bfc30cc338 /tensorflow/python/training
parent: 069f808e5c0462819bcd6c73c75491b00cdd42c2 (diff)
1 files changed, 85 insertions, 16 deletions
diff --git a/tensorflow/python/training/ftrl_test.py b/tensorflow/python/training/ftrl_test.py
index 775bdb3f60..76ca5b45c9 100644
--- a/tensorflow/python/training/ftrl_test.py
+++ b/tensorflow/python/training/ftrl_test.py
@@ -117,8 +117,7 @@ class FtrlOptimizerTest(test.TestCase):
         # Run 1 step of sgd
         sgd_op.run()
         # Validate updated params
-        self.assertAllCloseAccordingToType(
-            [[0, 1]], var0.eval(), atol=0.01)
+        self.assertAllCloseAccordingToType([[0, 1]], var0.eval(), atol=0.01)
 
   def testFtrlWithL1(self):
     for dtype in [dtypes.half, dtypes.float32]:
@@ -212,24 +211,96 @@ class FtrlOptimizerTest(test.TestCase):
 
         v0_val, v1_val = sess.run([var0, var1])
         self.assertAllCloseAccordingToType(
-            np.array([-0.22078767, -0.41378114]), v0_val)
+            np.array([-0.22578995, -0.44345796]), v0_val)
         self.assertAllCloseAccordingToType(
-            np.array([-0.02919818, -0.07343706]), v1_val)
+            np.array([-0.14378493, -0.13229476]), v1_val)
+
+  def testFtrlWithL1_L2_L2ShrinkageSparse(self):
+    """Tests the new FTRL op with support for l2 shrinkage on sparse grads."""
+    for dtype in [dtypes.half, dtypes.float32]:
+      with self.test_session() as sess:
+        var0 = variables.Variable([[1.0], [2.0]], dtype=dtype)
+        var1 = variables.Variable([[4.0], [3.0]], dtype=dtype)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
+            constant_op.constant([0]), constant_op.constant([2, 1]))
+        grads1 = ops.IndexedSlices(
+            constant_op.constant([0.02], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]), constant_op.constant([2, 1]))
+
+        opt = ftrl.FtrlOptimizer(
+            3.0,
+            initial_accumulator_value=0.1,
+            l1_regularization_strength=0.001,
+            l2_regularization_strength=2.0,
+            l2_shrinkage_regularization_strength=0.1)
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+        variables.global_variables_initializer().run()
+
+        v0_val, v1_val = sess.run([var0, var1])
+        self.assertAllCloseAccordingToType([[1.0], [2.0]], v0_val)
+        self.assertAllCloseAccordingToType([[4.0], [3.0]], v1_val)
+
+        # Run 10 steps FTRL
+        for _ in range(10):
+          update.run()
+
+        v0_val, v1_val = sess.run([var0, var1])
+        self.assertAllCloseAccordingToType([[-0.22578995], [2.]], v0_val)
+        self.assertAllCloseAccordingToType([[4.], [-0.13229476]], v1_val)
+
+  def testFtrlWithL2ShrinkageDoesNotChangeLrSchedule(self):
+    """Verifies that l2 shrinkage in FTRL does not change lr schedule."""
+    for dtype in [dtypes.half, dtypes.float32]:
+      with self.test_session() as sess:
+        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+        var1 = variables.Variable([1.0, 2.0], dtype=dtype)
+        grads0 = constant_op.constant([0.1, 0.2], dtype=dtype)
+        grads1 = constant_op.constant([0.1, 0.2], dtype=dtype)
+
+        opt0 = ftrl.FtrlOptimizer(
+            3.0,
+            initial_accumulator_value=0.1,
+            l1_regularization_strength=0.001,
+            l2_regularization_strength=2.0,
+            l2_shrinkage_regularization_strength=0.1)
+        opt1 = ftrl.FtrlOptimizer(
+            3.0,
+            initial_accumulator_value=0.1,
+            l1_regularization_strength=0.001,
+            l2_regularization_strength=2.0)
+        update0 = opt0.apply_gradients([(grads0, var0)])
+        update1 = opt1.apply_gradients([(grads1, var1)])
+        variables.global_variables_initializer().run()
+
+        v0_val, v1_val = sess.run([var0, var1])
+        self.assertAllCloseAccordingToType([1.0, 2.0], v0_val)
+        self.assertAllCloseAccordingToType([1.0, 2.0], v1_val)
+
+        # Run 10 steps FTRL
+        for _ in range(10):
+          update0.run()
+          update1.run()
+
+        v0_val, v1_val = sess.run([var0, var1])
+        # var0 is experiencing L2 shrinkage so it should be smaller than var1
+        # in magnitude.
+        self.assertTrue((v0_val**2 < v1_val**2).all())
+        accum0 = list(sess.run(opt0._slots)["accum"].values())[0]
+        accum1 = list(sess.run(opt1._slots)["accum"].values())[0]
+        # L2 shrinkage should not change how we update grad accumulator.
+        self.assertAllCloseAccordingToType(accum0, accum1)
 
   def applyOptimizer(self, opt, dtype, steps=5, is_sparse=False):
     if is_sparse:
       var0 = variables.Variable([[0.0], [0.0]], dtype=dtype)
       var1 = variables.Variable([[0.0], [0.0]], dtype=dtype)
       grads0 = ops.IndexedSlices(
-          constant_op.constant(
-              [0.1], shape=[1, 1], dtype=dtype),
-          constant_op.constant([0]),
-          constant_op.constant([2, 1]))
+          constant_op.constant([0.1], shape=[1, 1], dtype=dtype),
+          constant_op.constant([0]), constant_op.constant([2, 1]))
       grads1 = ops.IndexedSlices(
-          constant_op.constant(
-              [0.02], shape=[1, 1], dtype=dtype),
-          constant_op.constant([1]),
-          constant_op.constant([2, 1]))
+          constant_op.constant([0.02], shape=[1, 1], dtype=dtype),
+          constant_op.constant([1]), constant_op.constant([2, 1]))
     else:
       var0 = variables.Variable([0.0, 0.0], dtype=dtype)
       var1 = variables.Variable([0.0, 0.0], dtype=dtype)
@@ -277,8 +348,7 @@ class FtrlOptimizerTest(test.TestCase):
 
       with self.test_session():
         val2, val3 = self.applyOptimizer(
-            adagrad.AdagradOptimizer(
-                3.0, initial_accumulator_value=0.1), dtype)
+            adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1), dtype)
 
       self.assertAllCloseAccordingToType(val0, val2)
       self.assertAllCloseAccordingToType(val1, val3)
@@ -299,8 +369,7 @@ class FtrlOptimizerTest(test.TestCase):
 
       with self.test_session():
         val2, val3 = self.applyOptimizer(
-            adagrad.AdagradOptimizer(
-                3.0, initial_accumulator_value=0.1),
+            adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1),
             dtype,
             is_sparse=True)
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-08-28 19:01:26 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-08-28 19:08:59 -0700
commit	20d5683b826be03776978af3b8108fc3b5dc9cb8 (patch)
tree	2742a2f95489ec391db1583a36d193bfc30cc338 /tensorflow/python/training
parent	069f808e5c0462819bcd6c73c75491b00cdd42c2 (diff)