aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/contrib/optimizer_v2/BUILD1
-rw-r--r--tensorflow/contrib/optimizer_v2/rmsprop_test.py718
-rw-r--r--tensorflow/python/framework/test_util.py73
-rw-r--r--tensorflow/python/keras/layers/cudnn_recurrent_test.py4
-rw-r--r--tensorflow/python/keras/testing_utils.py73
5 files changed, 435 insertions, 434 deletions
diff --git a/tensorflow/contrib/optimizer_v2/BUILD b/tensorflow/contrib/optimizer_v2/BUILD
index 5225ecc14f..3ba3ee29ec 100644
--- a/tensorflow/contrib/optimizer_v2/BUILD
+++ b/tensorflow/contrib/optimizer_v2/BUILD
@@ -193,6 +193,7 @@ cuda_py_test(
srcs = ["rmsprop_test.py"],
additional_deps = [
":training",
+ "@absl_py//absl/testing:parameterized",
"//tensorflow/python:array_ops",
"//tensorflow/python:embedding_ops",
"//tensorflow/python:framework",
diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
index ed68f6afbf..dc23ef241a 100644
--- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py
@@ -19,15 +19,16 @@ from __future__ import division
from __future__ import print_function
import copy
-import itertools
import math
+from absl.testing import parameterized
import numpy as np
from tensorflow.contrib.optimizer_v2 import rmsprop
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
from tensorflow.python.ops import embedding_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
@@ -48,13 +49,8 @@ _TEST_PARAM_VALUES = [
[0.5, 0.95, 0.9, 1e-5, True, False],
]
-_TESTPARAMS = [
- [data_type] + values
- for data_type, values in itertools.product(_DATA_TYPES, _TEST_PARAM_VALUES)
-]
-
-class RMSPropOptimizerTest(test.TestCase):
+class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase):
def _rmsprop_update_numpy(self, var, g, mg, rms, mom, lr, decay, momentum,
epsilon, centered):
@@ -87,362 +83,366 @@ class RMSPropOptimizerTest(test.TestCase):
var_t[gindex] = var[gindex] - mom_t[gindex]
return var_t, mg_t, rms_t, mom_t
- def testDense(self):
- # TODO(yori): Use ParameterizedTest when available
- for (dtype, learning_rate, decay, momentum,
- epsilon, centered, use_resource) in _TESTPARAMS:
- with self.test_session(use_gpu=True):
- # Initialize variables for numpy implementation.
- var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
- grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
- var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
- grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)
-
- if use_resource:
- var0 = resource_variable_ops.ResourceVariable(var0_np)
- var1 = resource_variable_ops.ResourceVariable(var1_np)
- else:
- var0 = variables.Variable(var0_np)
- var1 = variables.Variable(var1_np)
- grads0 = constant_op.constant(grads0_np)
- grads1 = constant_op.constant(grads1_np)
- opt = rmsprop.RMSPropOptimizer(
- learning_rate=learning_rate,
- decay=decay,
- momentum=momentum,
- epsilon=epsilon,
- centered=centered)
-
- update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
- variables.global_variables_initializer().run()
-
- mg0 = opt.get_slot(var0, "mg")
- self.assertEqual(mg0 is not None, centered)
- mg1 = opt.get_slot(var1, "mg")
- self.assertEqual(mg1 is not None, centered)
- rms0 = opt.get_slot(var0, "rms")
- self.assertTrue(rms0 is not None)
- rms1 = opt.get_slot(var1, "rms")
- self.assertTrue(rms1 is not None)
- mom0 = opt.get_slot(var0, "momentum")
- self.assertTrue(mom0 is not None)
- mom1 = opt.get_slot(var1, "momentum")
- self.assertTrue(mom1 is not None)
-
- mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
- mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
- rms0_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
- rms1_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
- mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
- mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
-
- # Fetch params to validate initial values
- self.assertAllClose([1.0, 2.0], var0.eval())
- self.assertAllClose([3.0, 4.0], var1.eval())
-
- # Run 4 steps of RMSProp
- for _ in range(1, 5):
- update.run()
-
- var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
- var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate,
- decay, momentum, epsilon, centered)
- var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
- var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate,
- decay, momentum, epsilon, centered)
-
- # Validate updated params
- if centered:
- self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
- self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
- self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
- self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
- self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
- self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
- self.assertAllCloseAccordingToType(var0_np, var0.eval())
- self.assertAllCloseAccordingToType(var1_np, var1.eval())
-
- def testMinimizeSparseResourceVariable(self):
- for dtype in [dtypes.float32, dtypes.float64]:
- with self.test_session():
- var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
- x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
- pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
- loss = pred * pred
- sgd_op = rmsprop.RMSPropOptimizer(
- learning_rate=1.0,
- decay=0.0,
- momentum=0.0,
- epsilon=0.0,
- centered=False).minimize(loss)
- variables.global_variables_initializer().run()
- # Fetch params to validate initial values
- self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
- # Run 1 step of sgd
- sgd_op.run()
- # Validate updated params
- self.assertAllCloseAccordingToType(
- [[0., 1.]], var0.eval(), atol=0.01)
-
- def testMinimizeSparseResourceVariableCentered(self):
- for dtype in [dtypes.float32, dtypes.float64]:
- with self.test_session():
- var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
- x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
- pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
- loss = pred * pred
- sgd_op = rmsprop.RMSPropOptimizer(
- learning_rate=1.0,
- decay=0.0,
- momentum=0.0,
- epsilon=1.0,
- centered=True).minimize(loss)
- variables.global_variables_initializer().run()
- # Fetch params to validate initial values
- self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
- # Run 1 step of sgd
- sgd_op.run()
- # Validate updated params
- self.assertAllCloseAccordingToType(
- [[-111, -138]], var0.eval(), atol=0.01)
-
- def testSparse(self):
- # TODO(yori): Use ParameterizedTest when available
- for (dtype, learning_rate, decay,
- momentum, epsilon, centered, _) in _TESTPARAMS:
- with self.test_session(use_gpu=True):
- # Initialize variables for numpy implementation.
- var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
- grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
- var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
- grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)
-
+ @parameterized.named_parameters(
+ *test_util.generate_combinations_with_testcase_name(
+ dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
+ def testDense(self, dtype, param_value):
+ (learning_rate, decay, momentum, epsilon, centered, use_resource) = tuple(
+ param_value)
+ with self.test_session(use_gpu=True):
+ # Initialize variables for numpy implementation.
+ var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+ grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype)
+ var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+ grads1_np = np.array([0.01, 0.2], dtype=dtype.as_numpy_dtype)
+
+ if use_resource:
+ var0 = resource_variable_ops.ResourceVariable(var0_np)
+ var1 = resource_variable_ops.ResourceVariable(var1_np)
+ else:
var0 = variables.Variable(var0_np)
var1 = variables.Variable(var1_np)
- grads0_np_indices = np.array([0], dtype=np.int32)
- grads0 = ops.IndexedSlices(
- constant_op.constant(grads0_np),
- constant_op.constant(grads0_np_indices), constant_op.constant([1]))
- grads1_np_indices = np.array([1], dtype=np.int32)
- grads1 = ops.IndexedSlices(
- constant_op.constant(grads1_np),
- constant_op.constant(grads1_np_indices), constant_op.constant([1]))
- opt = rmsprop.RMSPropOptimizer(
- learning_rate=learning_rate,
- decay=decay,
- momentum=momentum,
- epsilon=epsilon,
- centered=centered)
- update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
- variables.global_variables_initializer().run()
-
- mg0 = opt.get_slot(var0, "mg")
- self.assertEqual(mg0 is not None, centered)
- mg1 = opt.get_slot(var1, "mg")
- self.assertEqual(mg1 is not None, centered)
- rms0 = opt.get_slot(var0, "rms")
- self.assertTrue(rms0 is not None)
- rms1 = opt.get_slot(var1, "rms")
- self.assertTrue(rms1 is not None)
- mom0 = opt.get_slot(var0, "momentum")
- self.assertTrue(mom0 is not None)
- mom1 = opt.get_slot(var1, "momentum")
- self.assertTrue(mom1 is not None)
-
- mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
- mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
- rms0_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
- rms1_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
- mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
- mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
-
- # Fetch params to validate initial values
- self.assertAllClose([1.0, 2.0], var0.eval())
- self.assertAllClose([3.0, 4.0], var1.eval())
-
- # Run 4 steps of RMSProp
- for _ in range(1, 5):
- update.run()
-
- var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy(
- var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np,
- learning_rate, decay, momentum, epsilon, centered)
- var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy(
- var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np,
- learning_rate, decay, momentum, epsilon, centered)
-
- # Validate updated params
- if centered:
- self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
- self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
- self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
- self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
- self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
- self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
- self.assertAllCloseAccordingToType(var0_np, var0.eval())
- self.assertAllCloseAccordingToType(var1_np, var1.eval())
-
- def testWithoutMomentum(self):
- for dtype in [dtypes.half, dtypes.float32]:
- with self.test_session(use_gpu=True):
- var0 = variables.Variable([1.0, 2.0], dtype=dtype)
- var1 = variables.Variable([3.0, 4.0], dtype=dtype)
- grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
- grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
- opt = rmsprop.RMSPropOptimizer(
- learning_rate=2.0, decay=0.9, momentum=0.0, epsilon=1.0)
- update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
- variables.global_variables_initializer().run()
-
- rms0 = opt.get_slot(var0, "rms")
- self.assertTrue(rms0 is not None)
- rms1 = opt.get_slot(var1, "rms")
- self.assertTrue(rms1 is not None)
- mom0 = opt.get_slot(var0, "momentum")
- self.assertTrue(mom0 is not None)
- mom1 = opt.get_slot(var1, "momentum")
- self.assertTrue(mom1 is not None)
-
- # Fetch params to validate initial values
- self.assertAllClose([1.0, 2.0], var0.eval())
- self.assertAllClose([3.0, 4.0], var1.eval())
- # Step 1: the rms accumulators where 1. So we should see a normal
- # update: v -= grad * learning_rate
- update.run()
- # Check the root mean square accumulators.
- self.assertAllCloseAccordingToType(
- np.array([0.901, 0.901]), rms0.eval())
- self.assertAllCloseAccordingToType(
- np.array([0.90001, 0.90001]), rms1.eval())
- # Check the parameters.
- self.assertAllCloseAccordingToType(
- np.array([
- 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)),
- 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0))
- ]), var0.eval())
- self.assertAllCloseAccordingToType(
- np.array([
- 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)),
- 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0))
- ]), var1.eval())
- # Step 2: the root mean square accumulators contain the previous update.
- update.run()
- # Check the rms accumulators.
- self.assertAllCloseAccordingToType(
- np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
- self.assertAllCloseAccordingToType(
- np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
- # Check the parameters.
- self.assertAllCloseAccordingToType(
- np.array([
- 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) -
- (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1.0)),
- 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) -
- (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1.0))
- ]), var0.eval())
- self.assertAllCloseAccordingToType(
- np.array([
- 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) -
- (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5 + 1.0)),
- 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) -
- (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5 + 1.0))
- ]), var1.eval())
-
- def testWithMomentum(self):
- for dtype in [dtypes.half, dtypes.float32]:
- with self.test_session(use_gpu=True):
- var0 = variables.Variable([1.0, 2.0], dtype=dtype)
- var1 = variables.Variable([3.0, 4.0], dtype=dtype)
- grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
- grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
-
- opt = rmsprop.RMSPropOptimizer(
- learning_rate=2.0, decay=0.9, momentum=0.5, epsilon=1e-5)
- update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
- variables.global_variables_initializer().run()
-
- rms0 = opt.get_slot(var0, "rms")
- self.assertTrue(rms0 is not None)
- rms1 = opt.get_slot(var1, "rms")
- self.assertTrue(rms1 is not None)
- mom0 = opt.get_slot(var0, "momentum")
- self.assertTrue(mom0 is not None)
- mom1 = opt.get_slot(var1, "momentum")
- self.assertTrue(mom1 is not None)
-
- # Fetch params to validate initial values
- self.assertAllClose([1.0, 2.0], var0.eval())
- self.assertAllClose([3.0, 4.0], var1.eval())
- # Step 1: rms = 1, mom = 0. So we should see a normal
- # update: v -= grad * learning_rate
+ grads0 = constant_op.constant(grads0_np)
+ grads1 = constant_op.constant(grads1_np)
+ opt = rmsprop.RMSPropOptimizer(
+ learning_rate=learning_rate,
+ decay=decay,
+ momentum=momentum,
+ epsilon=epsilon,
+ centered=centered)
+
+ update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+ variables.global_variables_initializer().run()
+
+ mg0 = opt.get_slot(var0, "mg")
+ self.assertEqual(mg0 is not None, centered)
+ mg1 = opt.get_slot(var1, "mg")
+ self.assertEqual(mg1 is not None, centered)
+ rms0 = opt.get_slot(var0, "rms")
+ self.assertIsNotNone(rms0)
+ rms1 = opt.get_slot(var1, "rms")
+ self.assertIsNotNone(rms1)
+ mom0 = opt.get_slot(var0, "momentum")
+ self.assertIsNotNone(mom0)
+ mom1 = opt.get_slot(var1, "momentum")
+ self.assertIsNotNone(mom1)
+
+ mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+ mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+ rms0_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
+ rms1_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
+ mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+ mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+
+ # Fetch params to validate initial values
+ self.assertAllClose([1.0, 2.0], var0.eval())
+ self.assertAllClose([3.0, 4.0], var1.eval())
+
+ # Run 4 steps of RMSProp
+ for _ in range(4):
update.run()
- # Check the root mean square accumulators.
- self.assertAllCloseAccordingToType(
- np.array([0.901, 0.901]), rms0.eval())
- self.assertAllCloseAccordingToType(
- np.array([0.90001, 0.90001]), rms1.eval())
- # Check the momentum accumulators
- self.assertAllCloseAccordingToType(
- np.array([(0.1 * 2.0 / math.sqrt(0.901 + 1e-5)),
- (0.1 * 2.0 / math.sqrt(0.901 + 1e-5))]), mom0.eval())
- self.assertAllCloseAccordingToType(
- np.array([(0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)),
- (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5))]), mom1.eval())
-
- # Check that the parameters.
- self.assertAllCloseAccordingToType(
- np.array([
- 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)),
- 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5))
- ]), var0.eval())
- self.assertAllCloseAccordingToType(
- np.array([
- 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)),
- 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5))
- ]), var1.eval())
-
- # Step 2: the root mean square accumulators contain the previous update.
+
+ var0_np, mg0_np, rms0_np, mom0_np = self._rmsprop_update_numpy(
+ var0_np, grads0_np, mg0_np, rms0_np, mom0_np, learning_rate,
+ decay, momentum, epsilon, centered)
+ var1_np, mg1_np, rms1_np, mom1_np = self._rmsprop_update_numpy(
+ var1_np, grads1_np, mg1_np, rms1_np, mom1_np, learning_rate,
+ decay, momentum, epsilon, centered)
+
+ # Validate updated params
+ if centered:
+ self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
+ self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
+ self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
+ self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
+ self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
+ self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
+ self.assertAllCloseAccordingToType(var0_np, var0.eval())
+ self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+ @parameterized.parameters([dtypes.float32, dtypes.float64])
+ def testMinimizeSparseResourceVariable(self, dtype):
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = rmsprop.RMSPropOptimizer(
+ learning_rate=1.0,
+ decay=0.0,
+ momentum=0.0,
+ epsilon=0.0,
+ centered=False).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[0., 1.]], var0.eval(), atol=0.01)
+
+ @parameterized.parameters([dtypes.float32, dtypes.float64])
+ def testMinimizeSparseResourceVariableCentered(self, dtype):
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = rmsprop.RMSPropOptimizer(
+ learning_rate=1.0,
+ decay=0.0,
+ momentum=0.0,
+ epsilon=1.0,
+ centered=True).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[-111, -138]], var0.eval(), atol=0.01)
+
+ @parameterized.named_parameters(
+ *test_util.generate_combinations_with_testcase_name(
+ dtype=_DATA_TYPES, param_value=_TEST_PARAM_VALUES))
+ def testSparse(self, dtype, param_value):
+ (learning_rate, decay, momentum, epsilon, centered, _) = tuple(
+ param_value)
+ with self.test_session(use_gpu=True):
+ # Initialize variables for numpy implementation.
+ var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+ grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype)
+ var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+ grads1_np = np.array([0.01], dtype=dtype.as_numpy_dtype)
+
+ var0 = variables.Variable(var0_np)
+ var1 = variables.Variable(var1_np)
+ grads0_np_indices = np.array([0], dtype=np.int32)
+ grads0 = ops.IndexedSlices(
+ constant_op.constant(grads0_np),
+ constant_op.constant(grads0_np_indices), constant_op.constant([1]))
+ grads1_np_indices = np.array([1], dtype=np.int32)
+ grads1 = ops.IndexedSlices(
+ constant_op.constant(grads1_np),
+ constant_op.constant(grads1_np_indices), constant_op.constant([1]))
+ opt = rmsprop.RMSPropOptimizer(
+ learning_rate=learning_rate,
+ decay=decay,
+ momentum=momentum,
+ epsilon=epsilon,
+ centered=centered)
+ update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+ variables.global_variables_initializer().run()
+
+ mg0 = opt.get_slot(var0, "mg")
+ self.assertEqual(mg0 is not None, centered)
+ mg1 = opt.get_slot(var1, "mg")
+ self.assertEqual(mg1 is not None, centered)
+ rms0 = opt.get_slot(var0, "rms")
+ self.assertIsNotNone(rms0)
+ rms1 = opt.get_slot(var1, "rms")
+ self.assertIsNotNone(rms1)
+ mom0 = opt.get_slot(var0, "momentum")
+ self.assertIsNotNone(mom0)
+ mom1 = opt.get_slot(var1, "momentum")
+ self.assertIsNotNone(mom1)
+
+ mg0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+ mg1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+ rms0_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
+ rms1_np = np.array([1.0, 1.0], dtype=dtype.as_numpy_dtype)
+ mom0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+ mom1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
+
+ # Fetch params to validate initial values
+ self.assertAllClose([1.0, 2.0], var0.eval())
+ self.assertAllClose([3.0, 4.0], var1.eval())
+
+ # Run 4 steps of RMSProp
+ for _ in range(4):
update.run()
- # Check the rms accumulators.
- self.assertAllCloseAccordingToType(
- np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
- self.assertAllCloseAccordingToType(
- np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
- self.assertAllCloseAccordingToType(
- np.array([
- 0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
- (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5)),
- 0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
- (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5))
- ]), mom0.eval())
- self.assertAllCloseAccordingToType(
- np.array([
- 0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
- (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5)),
- 0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
- (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5))
- ]), mom1.eval())
-
- # Check the parameters.
- self.assertAllCloseAccordingToType(
- np.array([
- 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) -
- (0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
- (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5))),
- 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) -
- (0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
- (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5)))
- ]), var0.eval())
-
- self.assertAllCloseAccordingToType(
- np.array([
- 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) -
- (0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
- (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5))),
- 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) -
- (0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
- (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5)))
- ]), var1.eval())
+
+ var0_np, mg0_np, rms0_np, mom0_np = self._sparse_rmsprop_update_numpy(
+ var0_np, grads0_np_indices, grads0_np, mg0_np, rms0_np, mom0_np,
+ learning_rate, decay, momentum, epsilon, centered)
+ var1_np, mg1_np, rms1_np, mom1_np = self._sparse_rmsprop_update_numpy(
+ var1_np, grads1_np_indices, grads1_np, mg1_np, rms1_np, mom1_np,
+ learning_rate, decay, momentum, epsilon, centered)
+
+ # Validate updated params
+ if centered:
+ self.assertAllCloseAccordingToType(mg0_np, mg0.eval())
+ self.assertAllCloseAccordingToType(mg1_np, mg1.eval())
+ self.assertAllCloseAccordingToType(rms0_np, rms0.eval())
+ self.assertAllCloseAccordingToType(rms1_np, rms1.eval())
+ self.assertAllCloseAccordingToType(mom0_np, mom0.eval())
+ self.assertAllCloseAccordingToType(mom1_np, mom1.eval())
+ self.assertAllCloseAccordingToType(var0_np, var0.eval())
+ self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+ @parameterized.parameters(_DATA_TYPES)
+ def testWithoutMomentum(self, dtype):
+ with self.test_session(use_gpu=True):
+ var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+ var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+ grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+ grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+ opt = rmsprop.RMSPropOptimizer(
+ learning_rate=2.0, decay=0.9, momentum=0.0, epsilon=1.0)
+ update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+ variables.global_variables_initializer().run()
+
+ rms0 = opt.get_slot(var0, "rms")
+ self.assertIsNotNone(rms0)
+ rms1 = opt.get_slot(var1, "rms")
+ self.assertIsNotNone(rms1)
+ mom0 = opt.get_slot(var0, "momentum")
+ self.assertIsNotNone(mom0)
+ mom1 = opt.get_slot(var1, "momentum")
+ self.assertIsNotNone(mom1)
+
+ # Fetch params to validate initial values
+ self.assertAllClose([1.0, 2.0], var0.eval())
+ self.assertAllClose([3.0, 4.0], var1.eval())
+ # Step 1: the rms accumulators where 1. So we should see a normal
+ # update: v -= grad * learning_rate
+ update.run()
+ # Check the root mean square accumulators.
+ self.assertAllCloseAccordingToType(
+ np.array([0.901, 0.901]), rms0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([0.90001, 0.90001]), rms1.eval())
+ # Check the parameters.
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)),
+ 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0))
+ ]), var0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)),
+ 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0))
+ ]), var1.eval())
+ # Step 2: the root mean square accumulators contain the previous update.
+ update.run()
+ # Check the rms accumulators.
+ self.assertAllCloseAccordingToType(
+ np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
+ # Check the parameters.
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) -
+ (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1.0)),
+ 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1.0)) -
+ (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1.0))
+ ]), var0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) -
+ (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5 + 1.0)),
+ 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1.0)) -
+ (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 1e-5 + 1.0))
+ ]), var1.eval())
+
+ @parameterized.parameters(_DATA_TYPES)
+ def testWithMomentum(self, dtype):
+ with self.test_session(use_gpu=True):
+ var0 = variables.Variable([1.0, 2.0], dtype=dtype)
+ var1 = variables.Variable([3.0, 4.0], dtype=dtype)
+ grads0 = constant_op.constant([0.1, 0.1], dtype=dtype)
+ grads1 = constant_op.constant([0.01, 0.01], dtype=dtype)
+
+ opt = rmsprop.RMSPropOptimizer(
+ learning_rate=2.0, decay=0.9, momentum=0.5, epsilon=1e-5)
+ update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+ variables.global_variables_initializer().run()
+
+ rms0 = opt.get_slot(var0, "rms")
+ self.assertIsNotNone(rms0)
+ rms1 = opt.get_slot(var1, "rms")
+ self.assertIsNotNone(rms1)
+ mom0 = opt.get_slot(var0, "momentum")
+ self.assertIsNotNone(mom0)
+ mom1 = opt.get_slot(var1, "momentum")
+ self.assertIsNotNone(mom1)
+
+ # Fetch params to validate initial values
+ self.assertAllClose([1.0, 2.0], var0.eval())
+ self.assertAllClose([3.0, 4.0], var1.eval())
+ # Step 1: rms = 1, mom = 0. So we should see a normal
+ # update: v -= grad * learning_rate
+ update.run()
+ # Check the root mean square accumulators.
+ self.assertAllCloseAccordingToType(
+ np.array([0.901, 0.901]), rms0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([0.90001, 0.90001]), rms1.eval())
+ # Check the momentum accumulators
+ self.assertAllCloseAccordingToType(
+ np.array([(0.1 * 2.0 / math.sqrt(0.901 + 1e-5)),
+ (0.1 * 2.0 / math.sqrt(0.901 + 1e-5))]), mom0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([(0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)),
+ (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5))]), mom1.eval())
+
+ # Check that the parameters.
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)),
+ 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5))
+ ]), var0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)),
+ 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5))
+ ]), var1.eval())
+
+ # Step 2: the root mean square accumulators contain the previous update.
+ update.run()
+ # Check the rms accumulators.
+ self.assertAllCloseAccordingToType(
+ np.array([0.901 * 0.9 + 0.001, 0.901 * 0.9 + 0.001]), rms0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([0.90001 * 0.9 + 1e-5, 0.90001 * 0.9 + 1e-5]), rms1.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
+ (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5)),
+ 0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
+ (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5))
+ ]), mom0.eval())
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
+ (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5)),
+ 0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
+ (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5))
+ ]), mom1.eval())
+
+ # Check the parameters.
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 1.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) -
+ (0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
+ (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5))),
+ 2.0 - (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) -
+ (0.5 * (0.1 * 2.0 / math.sqrt(0.901 + 1e-5)) +
+ (0.1 * 2.0 / math.sqrt(0.901 * 0.9 + 0.001 + 1e-5)))
+ ]), var0.eval())
+
+ self.assertAllCloseAccordingToType(
+ np.array([
+ 3.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) -
+ (0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
+ (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5))),
+ 4.0 - (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) -
+ (0.5 * (0.01 * 2.0 / math.sqrt(0.90001 + 1e-5)) +
+ (0.01 * 2.0 / math.sqrt(0.90001 * 0.9 + 2e-5)))
+ ]), var1.eval())
if __name__ == "__main__":
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index d7e7a2c111..fc47b1cca5 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -20,6 +20,7 @@ from __future__ import division
from __future__ import print_function
import collections
+from collections import OrderedDict
import contextlib
import gc
import itertools
@@ -572,6 +573,78 @@ def assert_no_garbage_created(f):
return decorator
+def _combine_named_parameters(**kwargs):
+ """Generate combinations based on its keyword arguments.
+
+ Two sets of returned combinations can be concatenated using +. Their product
+ can be computed using `times()`.
+
+ Args:
+ **kwargs: keyword arguments of form `option=[possibilities, ...]`
+ or `option=the_only_possibility`.
+
+ Returns:
+ a list of dictionaries for each combination. Keys in the dictionaries are
+ the keyword argument names. Each key has one value - one of the
+ corresponding keyword argument values.
+ """
+ if not kwargs:
+ return [OrderedDict()]
+
+ sort_by_key = lambda k: k[0][0]
+ kwargs = OrderedDict(sorted(kwargs.items(), key=sort_by_key))
+ first = list(kwargs.items())[0]
+
+ rest = dict(list(kwargs.items())[1:])
+ rest_combined = _combine_named_parameters(**rest)
+
+ key = first[0]
+ values = first[1]
+ if not isinstance(values, list):
+ values = [values]
+
+ combinations = [
+ OrderedDict(sorted(list(combined.items()) + [(key, v)], key=sort_by_key))
+ for v in values
+ for combined in rest_combined
+ ]
+ return combinations
+
+
+def generate_combinations_with_testcase_name(**kwargs):
+ """Generate combinations based on its keyword arguments using combine().
+
+ This function calls combine() and appends a testcase name to the list of
+ dictionaries returned. The 'testcase_name' key is a required for named
+ parameterized tests.
+
+ Args:
+ **kwargs: keyword arguments of form `option=[possibilities, ...]`
+ or `option=the_only_possibility`.
+
+ Returns:
+ a list of dictionaries for each combination. Keys in the dictionaries are
+ the keyword argument names. Each key has one value - one of the
+ corresponding keyword argument values.
+ """
+ combinations = _combine_named_parameters(**kwargs)
+ named_combinations = []
+ for combination in combinations:
+ assert isinstance(combination, OrderedDict)
+ name = "".join([
+ "_{}_{}".format(
+ "".join(filter(str.isalnum, key)),
+ "".join(filter(str.isalnum, str(value))))
+ for key, value in combination.items()
+ ])
+ named_combinations.append(
+ OrderedDict(
+ list(combination.items()) + [("testcase_name",
+ "_test{}".format(name))]))
+
+ return named_combinations
+
+
def run_all_in_graph_and_eager_modes(cls):
"""Execute all test methods in the given class with and without eager."""
base_decorator = run_in_graph_and_eager_modes
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index 8fd970239f..2ed0aa8f26 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -220,7 +220,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
self.assertNotEqual(out4.max(), out5.max())
@parameterized.named_parameters(
- *testing_utils.generate_combinations_with_testcase_name(
+ *test_util.generate_combinations_with_testcase_name(
rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False],
bidirectional=[True, False], implementation=[1, 2],
model_nest_level=[1, 2], model_type=['seq', 'func']))
@@ -301,7 +301,7 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
os.remove(fname)
@parameterized.named_parameters(
- *testing_utils.generate_combinations_with_testcase_name(
+ *test_util.generate_combinations_with_testcase_name(
rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False]))
def test_load_weights_between_noncudnn_rnn_time_distributed(self, rnn_type,
to_cudnn):
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 17aba7d86c..6e8ee06ff5 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
-from collections import OrderedDict
import numpy as np
from tensorflow.python import keras
@@ -185,75 +184,3 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
# for further checks in the caller function
return actual_output
-
-def _combine_named_parameters(**kwargs):
- """Generate combinations based on its keyword arguments.
-
- Two sets of returned combinations can be concatenated using +. Their product
- can be computed using `times()`.
-
- Args:
- **kwargs: keyword arguments of form `option=[possibilities, ...]`
- or `option=the_only_possibility`.
-
- Returns:
- a list of dictionaries for each combination. Keys in the dictionaries are
- the keyword argument names. Each key has one value - one of the
- corresponding keyword argument values.
- """
- if not kwargs:
- return [OrderedDict()]
-
- sort_by_key = lambda k: k[0][0]
- kwargs = OrderedDict(sorted(kwargs.items(), key=sort_by_key))
- first = list(kwargs.items())[0]
-
- rest = dict(list(kwargs.items())[1:])
- rest_combined = _combine_named_parameters(**rest)
-
- key = first[0]
- values = first[1]
- if not isinstance(values, list):
- values = [values]
-
- combinations = [
- OrderedDict(sorted(list(combined.items()) + [(key, v)], key=sort_by_key))
- for v in values
- for combined in rest_combined
- ]
- return combinations
-
-
-def generate_combinations_with_testcase_name(**kwargs):
- """Generate combinations based on its keyword arguments using combine().
-
- This function calls combine() and appends a testcase name to the list of
- dictionaries returned. The 'testcase_name' key is a required for named
- parameterized tests.
-
- Args:
- **kwargs: keyword arguments of form `option=[possibilities, ...]`
- or `option=the_only_possibility`.
-
- Returns:
- a list of dictionaries for each combination. Keys in the dictionaries are
- the keyword argument names. Each key has one value - one of the
- corresponding keyword argument values.
- """
- combinations = _combine_named_parameters(**kwargs)
- named_combinations = []
- for combination in combinations:
- assert isinstance(combination, OrderedDict)
- name = ''.join([
- '_{}_{}'.format(
- ''.join(filter(str.isalnum, key)),
- ''.join(filter(str.isalnum, str(value))))
- for key, value in combination.items()
- ])
- named_combinations.append(
- OrderedDict(
- list(combination.items()) + [('testcase_name',
- '_test{}'.format(name))]))
-
- return named_combinations
-