aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-01-31 07:33:17 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-01-31 07:49:09 -0800
commit67443722b26c3585d860d44e7069d997300a7187 (patch)
tree9bb8df0e5c02d0d3a446a9a9fcd80ad86085bac6
parent8c2b19889e49df88f3c4c0331d6930f887f5adae (diff)
Enables sparse optimizers for resource variables.
Change: 146117209
-rw-r--r--tensorflow/python/training/adadelta.py14
-rw-r--r--tensorflow/python/training/adadelta_test.py20
-rw-r--r--tensorflow/python/training/adagrad.py10
-rw-r--r--tensorflow/python/training/adagrad_da.py19
-rw-r--r--tensorflow/python/training/adagrad_da_test.py22
-rw-r--r--tensorflow/python/training/adagrad_test.py19
-rw-r--r--tensorflow/python/training/ftrl.py13
-rw-r--r--tensorflow/python/training/ftrl_test.py19
-rw-r--r--tensorflow/python/training/gradient_descent_test.py2
-rw-r--r--tensorflow/python/training/momentum.py10
-rw-r--r--tensorflow/python/training/momentum_test.py20
-rw-r--r--tensorflow/python/training/optimizer.py29
-rw-r--r--tensorflow/python/training/proximal_adagrad.py14
-rw-r--r--tensorflow/python/training/proximal_adagrad_test.py21
-rw-r--r--tensorflow/python/training/proximal_gradient_descent.py10
-rw-r--r--tensorflow/python/training/proximal_gradient_descent_test.py21
-rw-r--r--tensorflow/python/training/rmsprop.py30
-rw-r--r--tensorflow/python/training/rmsprop_test.py46
-rw-r--r--tensorflow/python/training/slot_creator.py15
19 files changed, 347 insertions, 7 deletions
diff --git a/tensorflow/python/training/adadelta.py b/tensorflow/python/training/adadelta.py
index ad78c8cfdd..0b021d8ce8 100644
--- a/tensorflow/python/training/adadelta.py
+++ b/tensorflow/python/training/adadelta.py
@@ -105,3 +105,17 @@ class AdadeltaOptimizer(optimizer.Optimizer):
grad.values,
grad.indices,
use_locking=self._use_locking)
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ accum = self.get_slot(var, "accum")
+ accum_update = self.get_slot(var, "accum_update")
+ return training_ops.resource_sparse_apply_adadelta(
+ var,
+ accum.handle,
+ accum_update.handle,
+ math_ops.cast(self._lr_t, grad.dtype),
+ math_ops.cast(self._rho_t, grad.dtype),
+ math_ops.cast(self._epsilon_t, grad.dtype),
+ grad,
+ indices,
+ use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adadelta_test.py b/tensorflow/python/training/adadelta_test.py
index c48bc24188..fe3333bac4 100644
--- a/tensorflow/python/training/adadelta_test.py
+++ b/tensorflow/python/training/adadelta_test.py
@@ -22,6 +22,8 @@ import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -136,6 +138,24 @@ class AdadeltaOptimizerTest(test.TestCase):
def testResourceBasic(self):
self.doTestBasic(use_resource=True)
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = adadelta.AdadeltaOptimizer(
+ 1.0, 1.0, 1.0).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[-111, -138]], var0.eval())
+
if __name__ == "__main__":
test.main()
diff --git a/tensorflow/python/training/adagrad.py b/tensorflow/python/training/adagrad.py
index e23b0d4381..2b8728e481 100644
--- a/tensorflow/python/training/adagrad.py
+++ b/tensorflow/python/training/adagrad.py
@@ -96,3 +96,13 @@ class AdagradOptimizer(optimizer.Optimizer):
grad.values,
grad.indices,
use_locking=self._use_locking)
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ acc = self.get_slot(var, "accumulator")
+ return training_ops.resource_sparse_apply_adagrad(
+ var,
+ acc.handle,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ grad,
+ indices,
+ use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adagrad_da.py b/tensorflow/python/training/adagrad_da.py
index 94a9df9a92..2e0c8f0c19 100644
--- a/tensorflow/python/training/adagrad_da.py
+++ b/tensorflow/python/training/adagrad_da.py
@@ -154,3 +154,22 @@ class AdagradDAOptimizer(optimizer.Optimizer):
math_ops.cast(self._l2_regularization_strength, var.dtype.base_dtype),
global_step,
use_locking=self._use_locking)
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ g_acc = self.get_slot(var, "gradient_accumulator")
+ gg_acc = self.get_slot(var, "gradient_squared_accumulator")
+ # Performance optimization so that worker creates a copy of the global step
+ # to avoid overloading the parameter server holding the global step.
+ with ops.device(grad[0].device):
+ global_step = array_ops.identity(self._global_step) + 1
+ return training_ops.resource_sparse_apply_adagrad_da(
+ var,
+ g_acc.handle,
+ gg_acc.handle,
+ grad,
+ indices,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ math_ops.cast(self._l1_regularization_strength, grad.dtype),
+ math_ops.cast(self._l2_regularization_strength, grad.dtype),
+ global_step,
+ use_locking=self._use_locking)
diff --git a/tensorflow/python/training/adagrad_da_test.py b/tensorflow/python/training/adagrad_da_test.py
index 9ab745f63a..c3a242a75e 100644
--- a/tensorflow/python/training/adagrad_da_test.py
+++ b/tensorflow/python/training/adagrad_da_test.py
@@ -22,6 +22,8 @@ import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -77,6 +79,26 @@ class AdagradDAOptimizerTest(test.TestCase):
def testResourceAdagradDAWithoutRegularizationBasic1(self):
self.doTestAdagradDAwithoutRegularizationBasic1(use_resource=True)
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ global_step = resource_variable_ops.ResourceVariable(
+ 0, dtype=dtypes.int64)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = adagrad_da.AdagradDAOptimizer(
+ 1.0, global_step).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[-1, -1]], var0.eval(), rtol=0.01)
+
def testAdagradDAwithoutRegularizationBasic2(self):
for dtype in [dtypes.float64, dtypes.float32]:
with self.test_session() as sess:
diff --git a/tensorflow/python/training/adagrad_test.py b/tensorflow/python/training/adagrad_test.py
index 745879bb01..195239352d 100644
--- a/tensorflow/python/training/adagrad_test.py
+++ b/tensorflow/python/training/adagrad_test.py
@@ -23,6 +23,8 @@ import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -68,6 +70,23 @@ class AdagradOptimizerTest(test.TestCase):
def testBasicLocked(self):
self.doTestBasic(use_locking=True)
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = adagrad.AdagradOptimizer(1.0).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[0, 1]], var0.eval(), atol=0.01)
+
def testTensorLearningRate(self):
for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
with self.test_session():
diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py
index 22d1d7acdb..c8a3014d53 100644
--- a/tensorflow/python/training/ftrl.py
+++ b/tensorflow/python/training/ftrl.py
@@ -145,3 +145,16 @@ class FtrlOptimizer(optimizer.Optimizer):
var.dtype.base_dtype),
math_ops.cast(self._learning_rate_power_tensor, var.dtype.base_dtype),
use_locking=self._use_locking)
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ accum = self.get_slot(var, "accum")
+ linear = self.get_slot(var, "linear")
+ return training_ops.resource_sparse_apply_ftrl(
+ var, accum.handle, linear.handle, grad, indices,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ math_ops.cast(self._l1_regularization_strength_tensor,
+ grad.dtype),
+ math_ops.cast(self._l2_regularization_strength_tensor,
+ grad.dtype),
+ math_ops.cast(self._learning_rate_power_tensor, grad.dtype),
+ use_locking=self._use_locking)
diff --git a/tensorflow/python/training/ftrl_test.py b/tensorflow/python/training/ftrl_test.py
index 78f3e945b9..f4cf17f6f0 100644
--- a/tensorflow/python/training/ftrl_test.py
+++ b/tensorflow/python/training/ftrl_test.py
@@ -23,6 +23,8 @@ import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -101,6 +103,23 @@ class FtrlOptimizerTest(test.TestCase):
self.assertAllCloseAccordingToType(
np.array([-0.28232238, -0.56096673]), v1_val)
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = ftrl.FtrlOptimizer(1.0).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[0, 1]], var0.eval(), atol=0.01)
+
def testFtrlWithL1(self):
for dtype in [dtypes.half, dtypes.float32]:
with self.test_session() as sess:
diff --git a/tensorflow/python/training/gradient_descent_test.py b/tensorflow/python/training/gradient_descent_test.py
index 8b2d17eefb..0c022d572b 100644
--- a/tensorflow/python/training/gradient_descent_test.py
+++ b/tensorflow/python/training/gradient_descent_test.py
@@ -111,7 +111,7 @@ class GradientDescentOptimizerTest(test.TestCase):
var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
- pred = math_ops.matmul(var0, x) + var1
+ pred += var1
loss = pred * pred
sgd_op = gradient_descent.GradientDescentOptimizer(1.0).minimize(loss)
# TODO(apassos) calling initialize_resources on all resources here
diff --git a/tensorflow/python/training/momentum.py b/tensorflow/python/training/momentum.py
index 8bf3b19f32..a057ce3850 100644
--- a/tensorflow/python/training/momentum.py
+++ b/tensorflow/python/training/momentum.py
@@ -89,3 +89,13 @@ class MomentumOptimizer(optimizer.Optimizer):
math_ops.cast(self._momentum_tensor, var.dtype.base_dtype),
use_locking=self._use_locking,
use_nesterov=self._use_nesterov).op
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ mom = self.get_slot(var, "momentum")
+ return training_ops.resource_sparse_apply_momentum(
+ var, mom.handle,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ grad, indices,
+ math_ops.cast(self._momentum_tensor, grad.dtype),
+ use_locking=self._use_locking,
+ use_nesterov=self._use_nesterov)
diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py
index f7b852896a..9d6221b560 100644
--- a/tensorflow/python/training/momentum_test.py
+++ b/tensorflow/python/training/momentum_test.py
@@ -25,6 +25,8 @@ from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -171,6 +173,24 @@ class MomentumOptimizerTest(test.TestCase):
self.assertAllClose(var0_np, var0.eval())
self.assertAllClose(var1_np, var1.eval())
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = momentum_lib.MomentumOptimizer(
+ learning_rate=1.0, momentum=0.0).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[-111, -138]], var0.eval())
+
def testTensorLearningRateAndMomentum(self):
for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
with self.test_session():
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 9e029389f2..e862cb87b1 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -34,6 +34,8 @@ from tensorflow.python.training import slot_creator
def _var_key(var):
+ if var.op.type == "ResourceGather":
+ var = var.op.inputs[0]
return (var.op.graph, var.op.name)
@@ -530,6 +532,33 @@ class Optimizer(object):
raise NotImplementedError()
def _resource_apply_dense(self, grad, handle):
+ """Add ops to apply dense gradients to the variable `handle`.
+
+ Args:
+ grad: a `Tensor` representing the gradient.
+ handle: a `Tensor` of dtype `resource` which points to the variable
+ to be updated.
+
+ Returns:
+ An `Operation` which updates the value of the variable.
+ """
+ raise NotImplementedError()
+
+ def _resource_apply_sparse(self, grad, handle, indices):
+ """Add ops to apply sparse gradients to the variable `handle`.
+
+
+ Args:
+ grad: a `Tensor` representing the gradient for the affected indices.
+ handle: a `Tensor` of dtype `resource` which points to the variable
+ to be updated.
+ indices: a `Tensor` of integral type representing the indices for
+ which the gradient is nonzero.
+
+ Returns:
+ An `Operation` which updates the value of the variable.
+
+ """
raise NotImplementedError()
def _apply_sparse_duplicate_indices(self, grad, var):
diff --git a/tensorflow/python/training/proximal_adagrad.py b/tensorflow/python/training/proximal_adagrad.py
index fe19899579..68fd544486 100644
--- a/tensorflow/python/training/proximal_adagrad.py
+++ b/tensorflow/python/training/proximal_adagrad.py
@@ -20,6 +20,7 @@ from __future__ import print_function
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.training import optimizer
from tensorflow.python.training import training_ops
@@ -70,7 +71,8 @@ class ProximalAdagradOptimizer(optimizer.Optimizer):
for v in var_list:
with ops.colocate_with(v):
val = constant_op.constant(self._initial_accumulator_value,
- shape=v.get_shape())
+ shape=v.get_shape(),
+ dtype=v.dtype.base_dtype)
self._get_or_make_slot(v, val, "accumulator", self._name)
def _prepare(self):
@@ -107,3 +109,13 @@ class ProximalAdagradOptimizer(optimizer.Optimizer):
self._l2_regularization_strength_tensor,
grad.values, grad.indices,
use_locking=self._use_locking)
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ acc = self.get_slot(var, "accumulator")
+ return training_ops.resource_sparse_apply_proximal_adagrad(
+ var, acc.handle,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype),
+ math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype),
+ grad, indices,
+ use_locking=self._use_locking)
diff --git a/tensorflow/python/training/proximal_adagrad_test.py b/tensorflow/python/training/proximal_adagrad_test.py
index d67d138446..28e28687f4 100644
--- a/tensorflow/python/training/proximal_adagrad_test.py
+++ b/tensorflow/python/training/proximal_adagrad_test.py
@@ -21,7 +21,11 @@ from __future__ import print_function
import numpy as np
from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
from tensorflow.python.training import adagrad
@@ -88,6 +92,23 @@ class ProximalAdagradOptimizerTest(test.TestCase):
self.assertAllClose(np.array([-1.60261, -2.296985]), v0_val)
self.assertAllClose(np.array([3.715679, 2.433051]), v1_val)
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = proximal_adagrad.ProximalAdagradOptimizer(1.0).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[0, 1]], var0.eval(), atol=0.01)
+
def testProximalAdagradWithL1(self):
with self.test_session() as sess:
var0 = variables.Variable([1.0, 2.0])
diff --git a/tensorflow/python/training/proximal_gradient_descent.py b/tensorflow/python/training/proximal_gradient_descent.py
index 0ccfe779d8..dd10c960e1 100644
--- a/tensorflow/python/training/proximal_gradient_descent.py
+++ b/tensorflow/python/training/proximal_gradient_descent.py
@@ -86,6 +86,16 @@ class ProximalGradientDescentOptimizer(optimizer.Optimizer):
grad.indices,
use_locking=self._use_locking).op
+ def _resource_apply_sparse(self, grad, var, indices):
+ return training_ops.resource_sparse_apply_proximal_gradient_descent(
+ var,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ math_ops.cast(self._l1_regularization_strength_tensor, grad.dtype),
+ math_ops.cast(self._l2_regularization_strength_tensor, grad.dtype),
+ grad,
+ indices,
+ use_locking=self._use_locking)
+
def _prepare(self):
self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
name="learning_rate")
diff --git a/tensorflow/python/training/proximal_gradient_descent_test.py b/tensorflow/python/training/proximal_gradient_descent_test.py
index 78a660dfad..9c5ea67015 100644
--- a/tensorflow/python/training/proximal_gradient_descent_test.py
+++ b/tensorflow/python/training/proximal_gradient_descent_test.py
@@ -21,7 +21,10 @@ from __future__ import print_function
import numpy as np
from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -89,6 +92,24 @@ class ProximalGradientDescentOptimizerTest(test.TestCase):
self.assertAllClose(np.array([0.1, 0.2]), v0_val)
self.assertAllClose(np.array([3.91, 2.82]), v1_val)
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = proximal_gradient_descent.ProximalGradientDescentOptimizer(
+ 1.0).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[-111, -138]], var0.eval(), atol=0.01)
+
def testProximalGradientDescentWithL1_L2(self):
with self.test_session() as sess:
var0 = variables.Variable([1.0, 2.0])
diff --git a/tensorflow/python/training/rmsprop.py b/tensorflow/python/training/rmsprop.py
index e91cc32ad0..46aa27a052 100644
--- a/tensorflow/python/training/rmsprop.py
+++ b/tensorflow/python/training/rmsprop.py
@@ -198,3 +198,33 @@ class RMSPropOptimizer(optimizer.Optimizer):
grad.values,
grad.indices,
use_locking=self._use_locking)
+
+ def _resource_apply_sparse(self, grad, var, indices):
+ rms = self.get_slot(var, "rms")
+ mom = self.get_slot(var, "momentum")
+ if self._centered:
+ mg = self.get_slot(var, "mg")
+ return training_ops.resource_sparse_apply_centered_rms_prop(
+ var,
+ mg.handle,
+ rms.handle,
+ mom.handle,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ math_ops.cast(self._decay_tensor, grad.dtype),
+ math_ops.cast(self._momentum_tensor, grad.dtype),
+ math_ops.cast(self._epsilon_tensor, grad.dtype),
+ grad,
+ indices,
+ use_locking=self._use_locking)
+ else:
+ return training_ops.resource_sparse_apply_rms_prop(
+ var,
+ rms.handle,
+ mom.handle,
+ math_ops.cast(self._learning_rate_tensor, grad.dtype),
+ math_ops.cast(self._decay_tensor, grad.dtype),
+ math_ops.cast(self._momentum_tensor, grad.dtype),
+ math_ops.cast(self._epsilon_tensor, grad.dtype),
+ grad,
+ indices,
+ use_locking=self._use_locking)
diff --git a/tensorflow/python/training/rmsprop_test.py b/tensorflow/python/training/rmsprop_test.py
index e60e666879..ee5385596c 100644
--- a/tensorflow/python/training/rmsprop_test.py
+++ b/tensorflow/python/training/rmsprop_test.py
@@ -27,6 +27,8 @@ import numpy as np
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import math_ops
from tensorflow.python.ops import resource_variable_ops
from tensorflow.python.ops import variables
from tensorflow.python.platform import test
@@ -160,6 +162,50 @@ class RMSPropOptimizerTest(test.TestCase):
self.assertAllCloseAccordingToType(var0_np, var0.eval())
self.assertAllCloseAccordingToType(var1_np, var1.eval())
+ def testMinimizeSparseResourceVariable(self):
+ for dtype in [dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = rmsprop.RMSPropOptimizer(
+ learning_rate=1.0,
+ decay=0.0,
+ momentum=0.0,
+ epsilon=0.0,
+ centered=False).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[0., 1.]], var0.eval(), atol=0.01)
+
+ def testMinimizeSparseResourceVariableCentered(self):
+ for dtype in [dtypes.float32, dtypes.float64]:
+ with self.test_session():
+ var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
+ x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
+ pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
+ loss = pred * pred
+ sgd_op = rmsprop.RMSPropOptimizer(
+ learning_rate=1.0,
+ decay=0.0,
+ momentum=0.0,
+ epsilon=1.0,
+ centered=True).minimize(loss)
+ variables.global_variables_initializer().run()
+ # Fetch params to validate initial values
+ self.assertAllCloseAccordingToType([[1.0, 2.0]], var0.eval())
+ # Run 1 step of sgd
+ sgd_op.run()
+ # Validate updated params
+ self.assertAllCloseAccordingToType(
+ [[-111, -138]], var0.eval(), atol=0.01)
+
def testSparse(self):
# TODO(yori): Use ParameterizedTest when available
for (dtype, learning_rate, decay,
diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py
index 088c540ec9..cfb4deb49c 100644
--- a/tensorflow/python/training/slot_creator.py
+++ b/tensorflow/python/training/slot_creator.py
@@ -46,6 +46,12 @@ from tensorflow.python.ops import variable_scope
from tensorflow.python.ops import variables
+def _is_resource(v):
+ """Returns true if v is something you get from a resource variable."""
+ return (isinstance(v, resource_variable_ops.ResourceVariable) or
+ (isinstance(v, ops.Tensor) and v.op.type == "ResourceGather"))
+
+
def _create_slot_var(primary, val, scope):
"""Helper function for creating a slot variable."""
@@ -55,8 +61,7 @@ def _create_slot_var(primary, val, scope):
variable_scope.get_variable_scope().set_partitioner(None)
slot = variable_scope.get_variable(
scope, initializer=val, trainable=False,
- use_resource=isinstance(
- primary, resource_variable_ops.ResourceVariable))
+ use_resource=_is_resource(primary))
variable_scope.get_variable_scope().set_partitioner(current_partitioner)
# pylint: disable=protected-access
@@ -99,12 +104,12 @@ def create_slot(primary, val, name, colocate_with_primary=True):
# optimizer can be shared when reuse is True. Meanwhile when reuse is False
# and the same name has been previously used, the scope name will add '_N'
# as suffix for unique identifications.
- with variable_scope.variable_scope(None, primary.op.name + '/' + name):
+ with variable_scope.variable_scope(None, primary.op.name + "/" + name):
if colocate_with_primary:
with ops.colocate_with(primary):
- return _create_slot_var(primary, val, '')
+ return _create_slot_var(primary, val, "")
else:
- return _create_slot_var(primary, val, '')
+ return _create_slot_var(primary, val, "")
def create_zeros_slot(primary, name, dtype=None, colocate_with_primary=True):