diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-01-31 07:33:17 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-01-31 07:49:09 -0800 |
commit | 67443722b26c3585d860d44e7069d997300a7187 (patch) | |
tree | 9bb8df0e5c02d0d3a446a9a9fcd80ad86085bac6 | |
parent | 8c2b19889e49df88f3c4c0331d6930f887f5adae (diff) |
Enables sparse optimizers for resource variables.
Change: 146117209
19 files changed, 347 insertions, 7 deletions
diff --git a/tensorflow/python/training/adadelta.py b/tensorflow/python/training/adadelta.py index ad78c8cfdd..0b021d8ce8 100644 --- a/tensorflow/python/training/adadelta.py +++ b/tensorflow/python/training/adadelta.py @@ -105,3 +105,17 @@ class AdadeltaOptimizer(optimizer.Optimizer): grad.values, grad.indices, use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices): + accum = self.get_slot(var, "accum") + accum_update = self.get_slot(var, "accum_update") + return training_ops.resource_sparse_apply_adadelta( + var, + accum.handle, + accum_update.handle, + math_ops.cast(self._lr_t, grad.dtype), + math_ops.cast(self._rho_t, grad.dtype), + math_ops.cast(self._epsilon_t, grad.dtype), + grad, + indices, + use_locking=self._use_locking) diff --git a/tensorflow/python/training/adadelta_test.py b/tensorflow/python/training/adadelta_test.py index c48bc24188..fe3333bac4 100644 --- a/tensorflow/python/training/adadelta_test.py +++ b/tensorflow/python/training/adadelta_test.py @@ -22,6 +22,8 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -136,6 +138,24 @@ class AdadeltaOptimizerTest(test.TestCase): def testResourceBasic(self): self.doTestBasic(use_resource=True) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = adadelta.AdadeltaOptimizer( + 1.0, 1.0, 1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], var0.eval()) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/training/adagrad.py b/tensorflow/python/training/adagrad.py index e23b0d4381..2b8728e481 100644 --- a/tensorflow/python/training/adagrad.py +++ b/tensorflow/python/training/adagrad.py @@ -96,3 +96,13 @@ class AdagradOptimizer(optimizer.Optimizer): grad.values, grad.indices, use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices): + acc = self.get_slot(var, "accumulator") + return training_ops.resource_sparse_apply_adagrad( + var, + acc.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + grad, + indices, + use_locking=self._use_locking) diff --git a/tensorflow/python/training/adagrad_da.py b/tensorflow/python/training/adagrad_da.py index 94a9df9a92..2e0c8f0c19 100644 --- a/tensorflow/python/training/adagrad_da.py +++ b/tensorflow/python/training/adagrad_da.py @@ -154,3 +154,22 @@ class AdagradDAOptimizer(optimizer.Optimizer): math_ops.cast(self._l2_regularization_strength, var.dtype.base_dtype), global_step, use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices): + g_acc = self.get_slot(var, "gradient_accumulator") + gg_acc = self.get_slot(var, "gradient_squared_accumulator") + # Performance optimization so that worker creates a copy of the global step + # to avoid overloading the parameter server holding the global step. + with ops.device(grad[0].device): + global_step = array_ops.identity(self._global_step) + 1 + return training_ops.resource_sparse_apply_adagrad_da( + var, + g_acc.handle, + gg_acc.handle, + grad, + indices, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + math_ops.cast(self._l1_regularization_strength, grad.dtype), + math_ops.cast(self._l2_regularization_strength, grad.dtype), + global_step, + use_locking=self._use_locking) diff --git a/tensorflow/python/training/adagrad_da_test.py b/tensorflow/python/training/adagrad_da_test.py index 9ab745f63a..c3a242a75e 100644 --- a/tensorflow/python/training/adagrad_da_test.py +++ b/tensorflow/python/training/adagrad_da_test.py @@ -22,6 +22,8 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -77,6 +79,26 @@ class AdagradDAOptimizerTest(test.TestCase): def testResourceAdagradDAWithoutRegularizationBasic1(self): self.doTestAdagradDAwithoutRegularizationBasic1(use_resource=True) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + global_step = resource_variable_ops.ResourceVariable( + 0, dtype=dtypes.int64) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = adagrad_da.AdagradDAOptimizer( + 1.0, global_step).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-1, -1]], var0.eval(), rtol=0.01) + def testAdagradDAwithoutRegularizationBasic2(self): for dtype in [dtypes.float64, dtypes.float32]: with self.test_session() as sess: diff --git a/tensorflow/python/training/adagrad_test.py b/tensorflow/python/training/adagrad_test.py index 745879bb01..195239352d 100644 --- a/tensorflow/python/training/adagrad_test.py +++ b/tensorflow/python/training/adagrad_test.py @@ -23,6 +23,8 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -68,6 +70,23 @@ class AdagradOptimizerTest(test.TestCase): def testBasicLocked(self): self.doTestBasic(use_locking=True) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = adagrad.AdagradOptimizer(1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1]], var0.eval(), atol=0.01) + def testTensorLearningRate(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py index 22d1d7acdb..c8a3014d53 100644 --- a/tensorflow/python/training/ftrl.py +++ b/tensorflow/python/training/ftrl.py @@ -145,3 +145,16 @@ class FtrlOptimizer(optimizer.Optimizer): var.dtype.base_dtype), math_ops.cast(self._learning_rate_power_tensor, var.dtype.base_dtype), use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices): + accum = self.get_slot(var, "accum") + linear = self.get_slot(var, "linear") + return training_ops.resource_sparse_apply_ftrl( + var, accum.handle, linear.handle, grad, indices, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + math_ops.cast(self._l1_regularization_strength_tensor, + grad.dtype), + math_ops.cast(self._l2_regularization_strength_tensor, + grad.dtype), + math_ops.cast(self._learning_rate_power_tensor, grad.dtype), + use_locking=self._use_locking) diff --git a/tensorflow/python/training/ftrl_test.py b/tensorflow/python/training/ftrl_test.py index 78f3e945b9..f4cf17f6f0 100644 --- a/tensorflow/python/training/ftrl_test.py +++ b/tensorflow/python/training/ftrl_test.py @@ -23,6 +23,8 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -101,6 +103,23 @@ class FtrlOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType( np.array([-0.28232238, -0.56096673]), v1_val) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = ftrl.FtrlOptimizer(1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1]], var0.eval(), atol=0.01) + def testFtrlWithL1(self): for dtype in [dtypes.half, dtypes.float32]: with self.test_session() as sess: diff --git a/tensorflow/python/training/gradient_descent_test.py b/tensorflow/python/training/gradient_descent_test.py index 8b2d17eefb..0c022d572b 100644 --- a/tensorflow/python/training/gradient_descent_test.py +++ b/tensorflow/python/training/gradient_descent_test.py @@ -111,7 +111,7 @@ class GradientDescentOptimizerTest(test.TestCase): var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype) x = constant_op.constant([[4.0], [5.0]], dtype=dtype) pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) - pred = math_ops.matmul(var0, x) + var1 + pred += var1 loss = pred * pred sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss) # TODO(apassos) calling initialize_resources on all resources here diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py index 8bf3b19f32..a057ce3850 100644 --- a/tensorflow/python/training/momentum.py +++ b/tensorflow/python/training/momentum.py @@ -89,3 +89,13 @@ class MomentumOptimizer(optimizer.Optimizer): math_ops.cast(self._momentum_tensor, var.dtype.base_dtype), use_locking=self._use_locking, use_nesterov=self._use_nesterov).op + + def _resource_apply_sparse(self, grad, var, indices): + mom = self.get_slot(var, "momentum") + return training_ops.resource_sparse_apply_momentum( + var, mom.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + grad, indices, + math_ops.cast(self._momentum_tensor, grad.dtype), + use_locking=self._use_locking, + use_nesterov=self._use_nesterov) diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index f7b852896a..9d6221b560 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -25,6 +25,8 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -171,6 +173,24 @@ class MomentumOptimizerTest(test.TestCase): self.assertAllClose(var0_np, var0.eval()) self.assertAllClose(var1_np, var1.eval()) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = momentum_lib.MomentumOptimizer( + learning_rate=1.0, momentum=0.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], var0.eval()) + def testTensorLearningRateAndMomentum(self): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: with self.test_session(): diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 9e029389f2..e862cb87b1 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -34,6 +34,8 @@ from tensorflow.python.training import slot_creator def _var_key(var): + if var.op.type == "ResourceGather": + var = var.op.inputs[0] return (var.op.graph, var.op.name) @@ -530,6 +532,33 @@ class Optimizer(object): raise NotImplementedError() def _resource_apply_dense(self, grad, handle): + """Add ops to apply dense gradients to the variable `handle`. + + Args: + grad: a `Tensor` representing the gradient. + handle: a `Tensor` of dtype `resource` which points to the variable + to be updated. + + Returns: + An `Operation` which updates the value of the variable. + """ + raise NotImplementedError() + + def _resource_apply_sparse(self, grad, handle, indices): + """Add ops to apply sparse gradients to the variable `handle`. + + + Args: + grad: a `Tensor` representing the gradient for the affected indices. + handle: a `Tensor` of dtype `resource` which points to the variable + to be updated. + indices: a `Tensor` of integral type representing the indices for + which the gradient is nonzero. + + Returns: + An `Operation` which updates the value of the variable. + + """ raise NotImplementedError() def _apply_sparse_duplicate_indices(self, grad, var): diff --git a/tensorflow/python/training/proximal_adagrad.py b/tensorflow/python/training/proximal_adagrad.py index fe19899579..68fd544486 100644 --- a/tensorflow/python/training/proximal_adagrad.py +++ b/tensorflow/python/training/proximal_adagrad.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops from tensorflow.python.training import optimizer from tensorflow.python.training import training_ops @@ -70,7 +71,8 @@ class ProximalAdagradOptimizer(optimizer.Optimizer): for v in var_list: with ops.colocate_with(v): val = constant_op.constant(self._initial_accumulator_value, - shape=v.get_shape()) + shape=v.get_shape(), + dtype=v.dtype.base_dtype) self._get_or_make_slot(v, val, "accumulator", self._name) def _prepare(self): @@ -107,3 +109,13 @@ class ProximalAdagradOptimizer(optimizer.Optimizer): self._l2_regularization_strength_tensor, grad.values, grad.indices, use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices): + acc = self.get_slot(var, "accumulator") + return training_ops.resource_sparse_apply_proximal_adagrad( + var, acc.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype), + math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype), + grad, indices, + use_locking=self._use_locking) diff --git a/tensorflow/python/training/proximal_adagrad_test.py b/tensorflow/python/training/proximal_adagrad_test.py index d67d138446..28e28687f4 100644 --- a/tensorflow/python/training/proximal_adagrad_test.py +++ b/tensorflow/python/training/proximal_adagrad_test.py @@ -21,7 +21,11 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import adagrad @@ -88,6 +92,23 @@ class ProximalAdagradOptimizerTest(test.TestCase): self.assertAllClose(np.array([-1.60261, -2.296985]), v0_val) self.assertAllClose(np.array([3.715679, 2.433051]), v1_val) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = proximal_adagrad.ProximalAdagradOptimizer(1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0, 1]], var0.eval(), atol=0.01) + def testProximalAdagradWithL1(self): with self.test_session() as sess: var0 = variables.Variable([1.0, 2.0]) diff --git a/tensorflow/python/training/proximal_gradient_descent.py b/tensorflow/python/training/proximal_gradient_descent.py index 0ccfe779d8..dd10c960e1 100644 --- a/tensorflow/python/training/proximal_gradient_descent.py +++ b/tensorflow/python/training/proximal_gradient_descent.py @@ -86,6 +86,16 @@ class ProximalGradientDescentOptimizer(optimizer.Optimizer): grad.indices, use_locking=self._use_locking).op + def _resource_apply_sparse(self, grad, var, indices): + return training_ops.resource_sparse_apply_proximal_gradient_descent( + var, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype), + math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype), + grad, + indices, + use_locking=self._use_locking) + def _prepare(self): self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, name="learning_rate") diff --git a/tensorflow/python/training/proximal_gradient_descent_test.py b/tensorflow/python/training/proximal_gradient_descent_test.py index 78a660dfad..9c5ea67015 100644 --- a/tensorflow/python/training/proximal_gradient_descent_test.py +++ b/tensorflow/python/training/proximal_gradient_descent_test.py @@ -21,7 +21,10 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -89,6 +92,24 @@ class ProximalGradientDescentOptimizerTest(test.TestCase): self.assertAllClose(np.array([0.1, 0.2]), v0_val) self.assertAllClose(np.array([3.91, 2.82]), v1_val) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = proximal_gradient_descent.ProximalGradientDescentOptimizer( + 1.0).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], var0.eval(), atol=0.01) + def testProximalGradientDescentWithL1_L2(self): with self.test_session() as sess: var0 = variables.Variable([1.0, 2.0]) diff --git a/tensorflow/python/training/rmsprop.py b/tensorflow/python/training/rmsprop.py index e91cc32ad0..46aa27a052 100644 --- a/tensorflow/python/training/rmsprop.py +++ b/tensorflow/python/training/rmsprop.py @@ -198,3 +198,33 @@ class RMSPropOptimizer(optimizer.Optimizer): grad.values, grad.indices, use_locking=self._use_locking) + + def _resource_apply_sparse(self, grad, var, indices): + rms = self.get_slot(var, "rms") + mom = self.get_slot(var, "momentum") + if self._centered: + mg = self.get_slot(var, "mg") + return training_ops.resource_sparse_apply_centered_rms_prop( + var, + mg.handle, + rms.handle, + mom.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + math_ops.cast(self._decay_tensor, grad.dtype), + math_ops.cast(self._momentum_tensor, grad.dtype), + math_ops.cast(self._epsilon_tensor, grad.dtype), + grad, + indices, + use_locking=self._use_locking) + else: + return training_ops.resource_sparse_apply_rms_prop( + var, + rms.handle, + mom.handle, + math_ops.cast(self._learning_rate_tensor, grad.dtype), + math_ops.cast(self._decay_tensor, grad.dtype), + math_ops.cast(self._momentum_tensor, grad.dtype), + math_ops.cast(self._epsilon_tensor, grad.dtype), + grad, + indices, + use_locking=self._use_locking) diff --git a/tensorflow/python/training/rmsprop_test.py b/tensorflow/python/training/rmsprop_test.py index e60e666879..ee5385596c 100644 --- a/tensorflow/python/training/rmsprop_test.py +++ b/tensorflow/python/training/rmsprop_test.py @@ -27,6 +27,8 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -160,6 +162,50 @@ class RMSPropOptimizerTest(test.TestCase): self.assertAllCloseAccordingToType(var0_np, var0.eval()) self.assertAllCloseAccordingToType(var1_np, var1.eval()) + def testMinimizeSparseResourceVariable(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = rmsprop.RMSPropOptimizer( + learning_rate=1.0, + decay=0.0, + momentum=0.0, + epsilon=0.0, + centered=False).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[0., 1.]], var0.eval(), atol=0.01) + + def testMinimizeSparseResourceVariableCentered(self): + for dtype in [dtypes.float32, dtypes.float64]: + with self.test_session(): + var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype) + x = constant_op.constant([[4.0], [5.0]], dtype=dtype) + pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x) + loss = pred * pred + sgd_op = rmsprop.RMSPropOptimizer( + learning_rate=1.0, + decay=0.0, + momentum=0.0, + epsilon=1.0, + centered=True).minimize(loss) + variables.global_variables_initializer().run() + # Fetch params to validate initial values + self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval()) + # Run 1 step of sgd + sgd_op.run() + # Validate updated params + self.assertAllCloseAccordingToType( + [[-111, -138]], var0.eval(), atol=0.01) + def testSparse(self): # TODO(yori): Use ParameterizedTest when available for (dtype, learning_rate, decay, diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py index 088c540ec9..cfb4deb49c 100644 --- a/tensorflow/python/training/slot_creator.py +++ b/tensorflow/python/training/slot_creator.py @@ -46,6 +46,12 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +def _is_resource(v): + """Returns true if v is something you get from a resource variable.""" + return (isinstance(v, resource_variable_ops.ResourceVariable) or + (isinstance(v, ops.Tensor) and v.op.type == "ResourceGather")) + + def _create_slot_var(primary, val, scope): """Helper function for creating a slot variable.""" @@ -55,8 +61,7 @@ def _create_slot_var(primary, val, scope): variable_scope.get_variable_scope().set_partitioner(None) slot = variable_scope.get_variable( scope, initializer=val, trainable=False, - use_resource=isinstance( - primary, resource_variable_ops.ResourceVariable)) + use_resource=_is_resource(primary)) variable_scope.get_variable_scope().set_partitioner(current_partitioner) # pylint: disable=protected-access @@ -99,12 +104,12 @@ def create_slot(primary, val, name, colocate_with_primary=True): # optimizer can be shared when reuse is True. Meanwhile when reuse is False # and the same name has been previously used, the scope name will add '_N' # as suffix for unique identifications. - with variable_scope.variable_scope(None, primary.op.name + '/' + name): + with variable_scope.variable_scope(None, primary.op.name + "/" + name): if colocate_with_primary: with ops.colocate_with(primary): - return _create_slot_var(primary, val, '') + return _create_slot_var(primary, val, "") else: - return _create_slot_var(primary, val, '') + return _create_slot_var(primary, val, "") def create_zeros_slot(primary, name, dtype=None, colocate_with_primary=True): |