Merge branch 'master' into fix_expand_dims

author: Martin Wicke <577277+martinwicke@users.noreply.github.com> 2018-09-22 09:45:11 -0700
committer: GitHub <noreply@github.com> 2018-09-22 09:45:11 -0700
commit: 413ac36f33deb0c354dd687963d2410eab048970 (patch)
tree: fd4dc4e9fc5a76efd62c78c213b0e34983359256 /tensorflow/contrib/layers
parent: c22d996c3d6a16db292bd3464b2ef7b91adae676 (diff)
parent: e692dda4c8b199555e2fa32132a7784e0893c870 (diff)
24 files changed, 1046 insertions, 827 deletions
diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD
index d5b3b279a1..b4fe8cac74 100644
--- a/tensorflow/contrib/layers/BUILD
+++ b/tensorflow/contrib/layers/BUILD
@@ -185,7 +185,7 @@ py_test(
 
 py_test(
     name = "normalization_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/layers/normalization_test.py"],
     srcs_version = "PY2AND3",
     tags = ["no_windows"],  # TODO: needs investigation on Windows
@@ -381,7 +381,7 @@ py_test(
 
 py_test(
     name = "rev_block_lib_test",
-    size = "small",
+    size = "medium",
     srcs = ["python/layers/rev_block_lib_test.py"],
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow/contrib/layers/__init__.py b/tensorflow/contrib/layers/__init__.py
index 00f03a111a..af8e673f59 100644
--- a/tensorflow/contrib/layers/__init__.py
+++ b/tensorflow/contrib/layers/__init__.py
@@ -14,11 +14,15 @@
 # ==============================================================================
 """Ops for building neural network layers, regularizers, summaries, etc.
 
-See the @{$python/contrib.layers} guide.
+See the
+[Contrib Layers](https://tensorflow.org/api_guides/python/contrib.layers)
+guide.
 
 @@avg_pool2d
 @@avg_pool3d
 @@batch_norm
+@@convolution
+@@convolution1d
 @@convolution2d
 @@convolution3d
 @@conv2d_in_plane
@@ -119,6 +123,7 @@ from tensorflow.contrib.layers.python.layers import *
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = ['bias_add',
+                    'conv1d',
                     'conv2d',
                     'conv3d',
                     'elu',
diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
index 28ddaa69a1..155d06a08e 100644
--- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
+++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
@@ -45,7 +45,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_dense(self):
@@ -66,7 +66,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
         'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_mixed_string_sparse(self):
@@ -80,7 +80,7 @@ class SparseCrossOpTest(test.TestCase):
         '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2', '55555_X_batch2-FC2-F1',
         '55555_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_mixed_string_dense(self):
@@ -99,7 +99,7 @@ class SparseCrossOpTest(test.TestCase):
         '55555_X_batch2-FC2-F1', '55555_X_batch2-FC2-F2',
         '999999_X_batch2-FC2-F1', '999999_X_batch2-FC2-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_sparse_cross_dense(self):
@@ -117,7 +117,7 @@ class SparseCrossOpTest(test.TestCase):
             'batch2-FC1-F1_X_batch2-FC2-F1', 'batch2-FC1-F1_X_batch2-FC2-F2',
             'batch2-FC1-F2_X_batch2-FC2-F1', 'batch2-FC1-F2_X_batch2-FC2-F2'
         ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_integer_sparse_input(self):
@@ -133,7 +133,7 @@ class SparseCrossOpTest(test.TestCase):
             '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2',
             '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2'
         ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_permutation_3x3x3(self):
@@ -176,7 +176,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F2',
         'batch1-FC1-F3_X_batch1-FC2-F3_X_batch1-FC3-F3'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_permutation_3x1x2(self):
@@ -196,7 +196,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_large_batch(self):
@@ -229,7 +229,7 @@ class SparseCrossOpTest(test.TestCase):
       ])
 
     expected_out = self._sparse_tensor(col_out)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_one_column_empty(self):
@@ -242,7 +242,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([], 1),
         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_empty(sess.run(op))
 
   def test_some_columns_empty(self):
@@ -261,7 +261,7 @@ class SparseCrossOpTest(test.TestCase):
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]], 2)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_all_columns_empty(self):
@@ -273,7 +273,7 @@ class SparseCrossOpTest(test.TestCase):
         self._sparse_tensor([]), self._sparse_tensor([]),
         self._sparse_tensor([])
     ])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_empty(sess.run(op))
 
   def test_hashed_output_zero_bucket(self):
@@ -288,7 +288,7 @@ class SparseCrossOpTest(test.TestCase):
         hashed_output=True)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[3735511728867393167]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output_zero_bucket_v2(self):
@@ -304,7 +304,7 @@ class SparseCrossOpTest(test.TestCase):
         hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[1971693436396284976]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed.
@@ -321,7 +321,7 @@ class SparseCrossOpTest(test.TestCase):
         num_buckets=100)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[74]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output_v2(self):
@@ -338,7 +338,7 @@ class SparseCrossOpTest(test.TestCase):
         hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[83]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       self._assert_sparse_tensor_equals(expected_out, sess.run(op))
 
   def test_hashed_output_v1_has_collision(self):
@@ -384,7 +384,7 @@ class SparseCrossOpTest(test.TestCase):
         ],
         hashed_output=True,
         num_buckets=1000)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       out = sess.run(op)
       self.assertEqual(6, len(out.values))
       self.assertAllEqual([[0, i] for i in range(6)], out.indices)
diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py
index 49c3faf3b7..60e1d85ea9 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py
@@ -458,7 +458,7 @@ def scattered_embedding_lookup_sparse(params,
     return embeddings
 
 
-def embedding_lookup_unique(params, ids, name=None):
+def embedding_lookup_unique(params, ids, partition_strategy="mod", name=None):
   """Version of embedding_lookup that avoids duplicate lookups.
 
   This can save communication in the case of repeated ids.
@@ -470,6 +470,9 @@ def embedding_lookup_unique(params, ids, name=None):
       `PartitionedVariable`. Shape `[index, d1, d2, ...]`.
     ids: A one-dimensional `Tensor` with type `int32` or `int64` containing
       the ids to be looked up in `params`. Shape `[ids1, ids2, ...]`.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
+      is `"mod"`.
     name: A name for this operation (optional).
 
   Returns:
@@ -485,7 +488,8 @@ def embedding_lookup_unique(params, ids, name=None):
     ids_flat = array_ops.reshape(
         ids, math_ops.reduce_prod(shape, keepdims=True))
     unique_ids, idx = array_ops.unique(ids_flat)
-    unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids)
+    unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids,
+                                                       partition_strategy)
     embeds_flat = array_ops.gather(unique_embeddings, idx)
     embed_shape = array_ops.concat(
         [shape, array_ops.shape(unique_embeddings)[1:]], 0)
diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py
index bf25144982..124515e5a6 100644
--- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 
 import itertools
 import math
-import sys
 
 import numpy as np
 
@@ -31,6 +30,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import init_ops
@@ -109,7 +109,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
     return sparse_ids, sparse_weights
 
   def test_safe_embedding_lookup_sparse_return_zero_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_2d()
 
@@ -122,7 +122,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
            3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4])
 
   def test_safe_embedding_lookup_sparse_return_special_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_2d()
 
@@ -136,7 +136,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
            embedding_weights[0][2], embedding_weights[0][3]])
 
   def test_safe_embedding_lookup_sparse_no_weights(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, _ = self._ids_and_weights_2d()
 
@@ -150,7 +150,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
                embedding_weights[0][0] + embedding_weights[0][1]) / 2.0])
 
   def test_safe_embedding_lookup_sparse_partitioned(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, _ = self._ids_and_weights_2d()
 
@@ -164,7 +164,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
                            (embedding_weights[0] + embedding_weights[1]) / 2.0])
 
   def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, sparse_weights = self._ids_and_weights_2d()
 
@@ -179,7 +179,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
                         embedding_weights, sparse_ids, sparse_weights)
 
   def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_3d()
 
@@ -192,7 +192,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
       ], [embedding_weights[0][2], [0] * 4, [0] * 4]])
 
   def test_safe_embedding_lookup_sparse_3d_return_special_vector(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, sparse_weights = self._ids_and_weights_3d()
 
@@ -208,7 +208,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
             ]])
 
   def test_safe_embedding_lookup_sparse_3d_no_weights(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       sparse_ids, _ = self._ids_and_weights_3d()
 
@@ -224,7 +224,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
           ]])
 
   def test_safe_embedding_lookup_sparse_3d_partitioned(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, _ = self._ids_and_weights_3d()
 
@@ -241,7 +241,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase):
 
   def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights(
       self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_ids, sparse_weights = self._ids_and_weights_3d()
 
@@ -276,7 +276,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
     return embedding_weights
 
   def test_scattered_embedding_consistency(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       values = constant_op.constant(["foo", "foo"])
 
@@ -288,7 +288,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
                           embedding_lookup_result[1])
 
   def test_scattered_embedding_multiple_partition(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=7)
       values = constant_op.constant([4, 4, 5])
 
@@ -304,7 +304,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
       self.assertGreater(embedding_diff, 0)
 
   def test_scattered_embedding_coverage(self):
-    with self.test_session():
+    with self.cached_session():
       size = 8
       embedding_weights = self._random_weights(size=size, num_shards=3)
       values = constant_op.constant(["foo"])
@@ -316,7 +316,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
       self.assertEqual(len(np.unique(embedding_lookup_result[0])), size)
 
   def test_scattered_embedding_multi_dimension(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       values = constant_op.constant([["foo", "bar", "bar"],
                                      ["bar", "bar", "foo"]])
@@ -329,7 +329,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
                           embedding_lookup_result[1][2])
 
   def test_scattered_embedding_lookup_sparse(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights(num_shards=3)
       sparse_tensor = sparse_tensor_lib.SparseTensor(
           values=["foo", "bar", "foo", "bar"],
@@ -358,7 +358,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
     embeds = np.random.randn(n_embed, d_embed)
     idx = np.random.randint(0, n_embed, idx_shape)
 
-    with self.test_session():
+    with self.cached_session():
       embedded_np = embeds[idx]
       embedded_tf = embedding_ops.embedding_lookup_unique(embeds, idx).eval()
 
@@ -370,7 +370,7 @@ class ScatteredEmbeddingLookupTest(test.TestCase):
     idx = np.random.randint(0, 5, 10)
     idx2d = np.random.randint(0, 5, (10, 2))
 
-    with self.test_session():
+    with self.cached_session():
       embedded_np = embeds[idx]
       embedded_np2d = embeds[idx2d]
       embedded_tf = embedding_ops.embedding_lookup_unique(embeds, idx).eval()
@@ -408,7 +408,7 @@ class SampledScatteredEmbeddingLookupTest(test.TestCase):
     return embedding_weights
 
   def test_hashed_embedding_consistency(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       values = constant_op.constant(["foo", "foo"])
       # The first three sampled_candidates are equal, so the first three
@@ -429,7 +429,7 @@ class SampledScatteredEmbeddingLookupTest(test.TestCase):
                           embedding_lookup_result[1][3])
 
   def test_hashed_embedding_multi_dimension(self):
-    with self.test_session():
+    with self.cached_session():
       embedding_weights = self._random_weights()
       values = constant_op.constant([["foo", "bar", "bar"],
                                      ["bar", "bar", "foo"]])
@@ -467,7 +467,7 @@ class SampledScatteredEmbeddingLookupSparseTest(test.TestCase):
 
   def test_output_shape(self):
     """Verifies the shape of the output tensor."""
-    with self.test_session():
+    with self.cached_session():
       sp_values = sparse_tensor_lib.SparseTensor(
           values=["a", "a", "b", "c", "d", "e", "f"],
           indices=[[1, 0], [2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5]],
@@ -481,7 +481,7 @@ class SampledScatteredEmbeddingLookupSparseTest(test.TestCase):
 
   def test_output_values(self):
     """Verifies the values in a trivial case."""
-    with self.test_session():
+    with self.cached_session():
       sp_values = sparse_tensor_lib.SparseTensor(
           values=["a"], indices=[[1, 0]], dense_shape=[3, 1])
       params = constant_op.constant([.1, .2, .3])
@@ -495,7 +495,7 @@ class SampledScatteredEmbeddingLookupSparseTest(test.TestCase):
 
   def test_output_values_with_sampled_candidates(self):
     """Verifies the values for given sampled_candidates."""
-    with self.test_session():
+    with self.cached_session():
       sp_values = sparse_tensor_lib.SparseTensor(
           values=["a", "a", "b", "c", "d", "e", "f"],
           indices=[[1, 0], [2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5]],
@@ -520,7 +520,7 @@ class SampledScatteredEmbeddingLookupSparseTest(test.TestCase):
 
   def test_output_values_with_sign_hash(self):
     """Verifies the values in a trivial case with hash_signs=True."""
-    with self.test_session():
+    with self.cached_session():
       sp_values = sparse_tensor_lib.SparseTensor(
           values=["a"], indices=[[1, 0]], dense_shape=[3, 1])
       params = constant_op.constant([.1, .1, .1])
@@ -537,7 +537,7 @@ class SampledScatteredEmbeddingLookupSparseTest(test.TestCase):
 
   def test_distributive_property(self):
     """Verifies the distributive property of matrix multiplication."""
-    with self.test_session():
+    with self.cached_session():
       params = constant_op.constant([.1, .2, .3])
       sp_values_a = sparse_tensor_lib.SparseTensor(
           values=["a"], indices=[[0, 0]], dense_shape=[3, 1])
@@ -691,11 +691,12 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase):
       index += num_val
     return grouped_vals
 
+  @test_util.enable_c_shapes
   def testEmbeddingLookupSparse(self):
     vocab_size = 13
     batch_size = 10
     param_shape = [2, 5]
-    expected_lookup_result_shape = [None] + param_shape
+    expected_lookup_result_shape = param_shape
 
     sp_ids, sp_weights, ids, weights, vals_per_batch_entry = (
         self._RandomIdsAndWeights(batch_size, vocab_size))
@@ -709,7 +710,7 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase):
         [1, 5], ["sum", "mean", "sqrtn"], [dtypes.float32,
                                            dtypes.float64], [True, False]):
 
-      with self.test_session():
+      with self.cached_session():
         p, params, feed_dict = _EmbeddingParams(
             num_shards, vocab_size, shape=param_shape, dtype=dtype)
         embedding_sum = \
@@ -719,7 +720,7 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase):
                 None if ignore_weights else sp_weights,
                 combiner=combiner)
 
-        self.assertEqual(embedding_sum.get_shape().as_list(),
+        self.assertEqual(embedding_sum.get_shape().as_list()[1:],
                          expected_lookup_result_shape)
 
         tf_embedding_sum = embedding_sum.eval(feed_dict=feed_dict)
@@ -748,7 +749,7 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase):
     for num_shards, combiner, dtype, ignore_weights in itertools.product(
         [1, 3], ["sum", "mean", "sqrtn"], [dtypes.float32,
                                            dtypes.float64], [True, False]):
-      with self.test_session():
+      with self.cached_session():
         x, params, _ = _EmbeddingParams(
             num_shards, vocab_size, shape=param_shape, dtype=dtype)
 
@@ -766,7 +767,7 @@ class EmbeddingLookupSparseWithDistributedAggregationTest(test.TestCase):
       self.assertLess(err, 1e-5 if dtype == dtypes.float64 else 2e-3)
 
   def testIncompatibleShapes(self):
-    with self.test_session():
+    with self.cached_session():
       x, _, _ = _EmbeddingParams(1, 10, dtype=dtypes.float32)
       sp_ids = sparse_tensor_lib.SparseTensor(
           constant_op.constant([[0, 0], [0, 1], [1, 0]], dtypes.int64),
diff --git a/tensorflow/contrib/layers/python/layers/encoders_test.py b/tensorflow/contrib/layers/python/layers/encoders_test.py
index e8528e9890..1a2aa710d5 100644
--- a/tensorflow/contrib/layers/python/layers/encoders_test.py
+++ b/tensorflow/contrib/layers/python/layers/encoders_test.py
@@ -34,14 +34,14 @@ def _get_const_var(name, shape, value):
 class EncodersTest(test.TestCase):
 
   def testBowEncoderSparse(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3]]
       enc = encoders.bow_encoder(docs, 4, 3)
       sess.run(variables.global_variables_initializer())
       self.assertAllEqual([2, 3], enc.eval().shape)
 
   def testBowEncoderSparseTensor(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3]]
       sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
       enc = encoders.bow_encoder(sparse_docs, 4, 3)
@@ -49,28 +49,28 @@ class EncodersTest(test.TestCase):
       self.assertAllEqual([2, 3], enc.eval().shape)
 
   def testBowEncoderSparseEmptyRow(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3], [0, 0]]
       enc = encoders.bow_encoder(docs, 4, 5)
       sess.run(variables.global_variables_initializer())
       self.assertAllEqual([3, 5], enc.eval().shape)
 
   def testBowEncoderDense(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3], [0, 0], [0, 0]]
       enc = encoders.bow_encoder(docs, 4, 3, sparse_lookup=False)
       sess.run(variables.global_variables_initializer())
       self.assertAllEqual([4, 3], enc.eval().shape)
 
   def testBowEncoderSparseTensorDenseLookup(self):
-    with self.test_session():
+    with self.cached_session():
       docs = [[0, 1]]
       sparse_docs = sparse_ops.dense_to_sparse_tensor(docs)
       with self.assertRaises(TypeError):
         encoders.bow_encoder(sparse_docs, 4, 3, sparse_lookup=False)
 
   def testBowEncodersSharingEmbeddings(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3]]
       enc_1 = encoders.bow_encoder(docs, 4, 3, scope='test')
       enc_2 = encoders.bow_encoder(docs, 4, 3, scope='test', reuse=True)
@@ -79,7 +79,7 @@ class EncodersTest(test.TestCase):
       self.assertAllEqual(avg_1, avg_2)
 
   def testBowEncodersSharingEmbeddingsInheritedScopes(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3]]
       with variable_scope.variable_scope('test'):
         enc_1 = encoders.bow_encoder(docs, 4, 3)
@@ -90,7 +90,7 @@ class EncodersTest(test.TestCase):
       self.assertAllEqual(avg_1, avg_2)
 
   def testBowEncodersSharingEmbeddingsSharedScope(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[0, 1], [2, 3]]
       enc_1 = encoders.bow_encoder(docs, 4, 3, scope='bow')
       variable_scope.get_variable_scope().reuse_variables()
@@ -100,7 +100,7 @@ class EncodersTest(test.TestCase):
       self.assertAllEqual(avg_1, avg_2)
 
   def testBowEncoderReuseEmbeddingsVariable(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[1, 1], [2, 3]]
       with variable_scope.variable_scope('test'):
         v = _get_const_var('embeddings', (4, 3),
@@ -111,7 +111,7 @@ class EncodersTest(test.TestCase):
       self.assertAllClose([[3., 4., 5.], [7.5, 8.5, 9.5]], enc.eval())
 
   def testEmbedSequence(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       docs = [[1, 1], [2, 3]]
       with variable_scope.variable_scope('test'):
         v = _get_const_var('embeddings', (4, 3),
diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 3ae07cedab..53c8ae5d08 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -997,9 +997,14 @@ class _OneHotColumn(
       # Remove (?, -1) index
       weighted_column = sparse_ops.sparse_slice(
           weighted_column,
-          [0, 0],
+          array_ops.zeros_like(weighted_column.dense_shape),
           weighted_column.dense_shape)
-      return sparse_ops.sparse_tensor_to_dense(weighted_column)
+      dense_tensor = sparse_ops.sparse_tensor_to_dense(weighted_column)
+      batch_shape = array_ops.shape(dense_tensor)[:-1]
+      dense_tensor_shape = array_ops.concat(
+          [batch_shape, [self.length]], axis=0)
+      dense_tensor = array_ops.reshape(dense_tensor, dense_tensor_shape)
+      return dense_tensor
 
     dense_id_tensor = sparse_ops.sparse_tensor_to_dense(sparse_id_column,
                                                         default_value=-1)
@@ -1095,9 +1100,9 @@ class _EmbeddingColumn(
       raise ValueError("Must specify both `ckpt_to_load_from` and "
                        "`tensor_name_in_ckpt` or none of them.")
     if initializer is None:
-      logging.warn("The default stddev value of initializer will change from "
-                   "\"1/sqrt(vocab_size)\" to \"1/sqrt(dimension)\" after "
-                   "2017/02/25.")
+      logging.warn("The default stddev value of initializer was changed from "
+                   "\"1/sqrt(vocab_size)\" to \"1/sqrt(dimension)\" in core "
+                   "implementation (tf.feature_column.embedding_column).")
       stddev = 1 / math.sqrt(sparse_id_column.length)
       initializer = init_ops.truncated_normal_initializer(
           mean=0.0, stddev=stddev)
@@ -1496,8 +1501,6 @@ class _ScatteredEmbeddingColumn(
       raise ValueError("initializer must be callable if specified. "
                        "column_name: {}".format(column_name))
     if initializer is None:
-      logging.warn("The default stddev value of initializer will change from "
-                   "\"0.1\" to \"1/sqrt(dimension)\" after 2017/02/25.")
       stddev = 0.1
       initializer = init_ops.truncated_normal_initializer(
           mean=0.0, stddev=stddev)
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
index 06060b99e7..a85cff4f70 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py
@@ -683,11 +683,12 @@ def parse_feature_columns_from_sequence_examples(
       the serialized proto.
 
   Returns:
-    A tuple consisting of:
-    context_features: a dict mapping `FeatureColumns` from
-      `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
-    sequence_features: a dict mapping `FeatureColumns` from
-      `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
+    A tuple consisting of (context_features, sequence_features)
+
+    *  context_features: a dict mapping `FeatureColumns` from
+        `context_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
+    *  sequence_features: a dict mapping `FeatureColumns` from
+        `sequence_feature_columns` to their parsed `Tensors`/`SparseTensor`s.
   """
   # Sequence example parsing requires a single (scalar) example.
   try:
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
index e6bbd86ab7..6fb4b9ff35 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
@@ -49,7 +49,7 @@ class TransformerTest(test.TestCase):
     real_valued = feature_column.real_valued_column("price")
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     output = feature_column_ops._Transformer(features).transform(real_valued)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(output.eval(), [[20.], [110], [-3]])
 
   def testSparseRealValuedColumnIdentityTransformation(self):
@@ -60,7 +60,7 @@ class TransformerTest(test.TestCase):
     features = {"rating": rating_tensor}
     output = feature_column_ops._Transformer(features).transform(
         sparse_real_valued)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(output.values.eval(), rating_tensor.values.eval())
       self.assertAllEqual(output.indices.eval(), rating_tensor.indices.eval())
       self.assertAllEqual(output.dense_shape.eval(),
@@ -80,7 +80,7 @@ class TransformerTest(test.TestCase):
                                                         [sparse_real_valued])
     self.assertTrue(sparse_real_valued in output_dict)
     output = output_dict[sparse_real_valued]
-    with self.test_session():
+    with self.cached_session():
       self.assertArrayNear(output.values.eval(), [4.0, 25.0], 1e-5)
       self.assertAllEqual(output.indices.eval(), rating_tensor.indices.eval())
       self.assertAllEqual(output.dense_shape.eval(),
@@ -97,7 +97,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[bucket])
     self.assertEqual(len(output), 1)
     self.assertIn(bucket, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(output[bucket].eval(), [[2], [3], [0]])
 
   def testBucketizedColumnWithMultiDimensions(self):
@@ -109,7 +109,7 @@ class TransformerTest(test.TestCase):
         "price": constant_op.constant([[20., 110], [110., 20], [-3, -3]])
     }
     output = feature_column_ops._Transformer(features).transform(bucket)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(output.eval(), [[2, 3], [3, 2], [0, 0]])
 
   def testCachedTransformation(self):
@@ -118,7 +118,7 @@ class TransformerTest(test.TestCase):
     # buckets 2, 3, 0
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     transformer = feature_column_ops._Transformer(features)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       transformer.transform(bucket)
       num_of_ops = len(sess.graph.get_operations())
       # Verify that the second call to transform the same feature
@@ -138,7 +138,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[hashed_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(hashed_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int64)
       self.assertTrue(
           all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval()))
@@ -161,7 +161,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[hashed_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(hashed_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int64)
       self.assertTrue(
           all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval()))
@@ -177,7 +177,7 @@ class TransformerTest(test.TestCase):
     features = {"wire": wire_tensor}
     output = feature_column_ops._Transformer(features).transform(hashed_sparse)
 
-    with self.test_session():
+    with self.cached_session():
       # While the input is a dense Tensor, the output should be a SparseTensor.
       self.assertIsInstance(output, sparse_tensor.SparseTensor)
       self.assertEqual(output.values.dtype, dtypes.int64)
@@ -203,7 +203,7 @@ class TransformerTest(test.TestCase):
     self.assertEqual(len(output), 2)
     self.assertIn(hashed_sparse, output)
     self.assertIn(wire_embedding, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertAllEqual(output[wire_embedding].indices.eval(),
                           wire_tensor.indices.eval())
       self.assertAllEqual(output[wire_embedding].dense_shape.eval(), [2, 2])
@@ -223,7 +223,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[keys_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(keys_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertEqual(output[keys_sparse].values.dtype, dtypes.int64)
       self.assertAllEqual(output[keys_sparse].values.eval(), [1, 2, 0])
@@ -241,7 +241,7 @@ class TransformerTest(test.TestCase):
     features = {"wire": wire_tensor}
     output = feature_column_ops._Transformer(features).transform(keys_sparse)
 
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       # While the input is a dense Tensor, the output should be a SparseTensor.
       self.assertIsInstance(output, sparse_tensor.SparseTensor)
@@ -264,7 +264,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[hashed_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(hashed_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(output[hashed_sparse].values.dtype, dtypes.int32)
       self.assertTrue(
           all(x < 10 and x >= 0 for x in output[hashed_sparse].values.eval()))
@@ -282,7 +282,7 @@ class TransformerTest(test.TestCase):
     wire_tensor = constant_op.constant([[100, 0], [1, 25]])
     features = {"wire": wire_tensor}
     output = feature_column_ops._Transformer(features).transform(hashed_sparse)
-    with self.test_session():
+    with self.cached_session():
       # While the input is a dense Tensor, the output should be a SparseTensor.
       self.assertIsInstance(output, sparse_tensor.SparseTensor)
       self.assertEqual(output.values.dtype, dtypes.int32)
@@ -310,7 +310,7 @@ class TransformerTest(test.TestCase):
     self.assertEqual(len(output), 1)
     self.assertIn(weighted_ids, output)
 
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(output[weighted_ids][0].dense_shape.eval(),
                           ids_tensor.dense_shape.eval())
@@ -340,7 +340,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[vocab_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(vocab_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0])
@@ -362,7 +362,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[vocab_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(vocab_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0, 1])
@@ -386,7 +386,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[vocab_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(vocab_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0])
@@ -408,7 +408,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[vocab_sparse])
     self.assertEqual(len(output), 1)
     self.assertIn(vocab_sparse, output)
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertEqual(output[vocab_sparse].values.dtype, dtypes.int64)
       self.assertAllEqual(output[vocab_sparse].values.eval(), [1, 2, 0, 1])
@@ -440,7 +440,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[country_language])
     self.assertEqual(len(output), 1)
     self.assertIn(country_language, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(output[country_language].values.dtype, dtypes.int64)
       self.assertTrue(
           all(x < 15 and x >= 0 for x in output[country_language].values.eval(
@@ -467,7 +467,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[country_price])
     self.assertEqual(len(output), 1)
     self.assertIn(country_price, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(output[country_price].values.dtype, dtypes.int64)
       self.assertTrue(
           all(x < 15 and x >= 0 for x in output[country_price].values.eval()))
@@ -498,7 +498,7 @@ class TransformerTest(test.TestCase):
       weights = column_to_variable[country_price][0]
       grad = array_ops.squeeze(
           gradients_impl.gradients(output, weights)[0].values)
-      with self.test_session():
+      with self.cached_session():
         variables_lib.global_variables_initializer().run()
         self.assertEqual(len(grad.eval()), 6)
 
@@ -537,7 +537,7 @@ class TransformerTest(test.TestCase):
         features=features, feature_columns=[wire_country_price])
     self.assertEqual(len(output), 1)
     self.assertIn(wire_country_price, output)
-    with self.test_session():
+    with self.cached_session():
       self.assertEqual(output[wire_country_price].values.dtype, dtypes.int64)
       self.assertTrue(
           all(x < 15 and x >= 0 for x in output[wire_country_price].values.eval(
@@ -600,7 +600,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     columns = [one_hot_column, embedding_column, real_valued_column]
     output = feature_column_ops.input_from_feature_columns(features, columns)
     output_core = fc_core.input_layer(features, columns)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(output.eval().shape, [3, 2 + 4 + 10])
@@ -626,7 +626,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     cols_to_outs = {}
     feature_column_ops.input_from_feature_columns(
         features, columns, cols_to_outs=cols_to_outs)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       for column in columns:
@@ -637,7 +637,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [real_valued])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(output.eval(), features["price"].eval())
       # Verify cross compatibility: Core builder output should equal to contrib.
       self.assertAllClose(output.eval(),
@@ -650,7 +650,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     }
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [real_valued])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(output.eval(), features["price"].eval())
       # Verify cross compatibility: Core builder output should equal to contrib.
       self.assertAllClose(output.eval(),
@@ -662,7 +662,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     rating = np.array([[0., 1., 2., -1.],
                        [3., 4., 5., 6.]])
     features = {"rating": constant_op.constant(rating)}
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output = sess.run(feature_column_ops.input_from_feature_columns(
           features, [var_len_real_valued]))
     self.assertAllClose(rating, output)
@@ -673,7 +673,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     rating = np.array([[0, 1, 2, -1],
                        [3, 4, 5, 6]])
     features = {"rating": constant_op.constant(rating, dtype=dtypes.int64)}
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output = sess.run(feature_column_ops.input_from_feature_columns(
           features, [var_len_real_valued]))
     self.assertAllClose(rating.astype(np.float32), output)
@@ -684,7 +684,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [real_valued])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(output.eval(), features["price"].eval() - 2)
       # Verify cross compatibility: Core builder output should equal to contrib.
       self.assertAllClose(output.eval(),
@@ -698,7 +698,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     }
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [real_valued])
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(output.eval(), features["price"].eval() - 2)
       # Verify cross compatibility: Core builder output should equal to contrib.
       self.assertAllClose(output.eval(),
@@ -713,7 +713,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     features = {"price": constant_op.constant([[20.], [110], [-3]])}
     output = feature_column_ops.input_from_feature_columns(features, [bucket])
     expected = [[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(output.eval(), expected)
       self.assertAllClose(output.eval(),
                           fc_core.input_layer(features, [bucket]).eval())
@@ -729,7 +729,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     output = feature_column_ops.input_from_feature_columns(features, [bucket])
     expected = [[0, 0, 1, 0, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 1, 0],
                 [1, 0, 0, 0, 1, 0, 0, 0]]
-    with self.test_session():
+    with self.cached_session():
       self.assertAllClose(output.eval(), expected)
       self.assertAllClose(output.eval(),
                           fc_core.input_layer(features, [bucket]).eval())
@@ -752,7 +752,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [one_hot_column])
     output_core = fc_core.input_layer(features, [one_hot_column])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual([[0, 0, 10., 0], [0, 20., 0, 0], [30., 0, 40., 0]],
@@ -773,7 +773,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
                                                            [one_hot_sparse])
     output_core = fc_core.input_layer(features, [one_hot_sparse])
 
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]],
@@ -794,7 +794,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
                                                            [one_hot_sparse])
     output_core = fc_core.input_layer(features, [one_hot_sparse])
 
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]],
@@ -816,7 +816,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [one_hot_sparse])
     output_core = fc_core.input_layer(features, [one_hot_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual([[0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 1, 0]],
                           output.eval())
@@ -834,7 +834,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [one_hot_sparse])
     output_core = fc_core.input_layer(features, [one_hot_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual([3, 10], output.eval().shape)
@@ -852,7 +852,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [embeded_sparse])
     output_core = fc_core.input_layer(features, [embeded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(output.eval().shape, [4, 10])
       # Verify cross compatibility: Core builder output should equal to contrib.
@@ -878,7 +878,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         features, [embedded_sparse], weight_collections=["my_collection_core"])
     weights_core = ops.get_collection("my_collection_core")
     grad_core = gradients_impl.gradients(output_core, weights_core)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       gradient_values = []
       gradient_values_core = []
@@ -907,7 +907,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
                                                            [embeded_sparse])
     output_core = fc_core.input_layer(features, [embeded_sparse])
 
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       output_eval = output.eval()
       self.assertAllEqual(output_eval.shape, [2, 10])
@@ -935,7 +935,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
 
     # Makes sure that trying to use different initializers with the same
     # embedding column explicitly fails.
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError,
           "Duplicate feature column key found for column: wire_embedding"):
@@ -961,7 +961,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
                                                            [embeded_sparse])
     output_core = fc_core.input_layer(features, [embeded_sparse])
 
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(output.eval().shape, [2, 10])
@@ -986,7 +986,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     embeded_sparse = feature_column.embedding_column(weighted_ids, 10)
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [embeded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(output.eval().shape, [2, 10])
@@ -1005,7 +1005,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     embeded_sparse = feature_column.embedding_column(crossed, 10)
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [embeded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(output.eval().shape, [2, 10])
 
@@ -1016,7 +1016,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
     features = {"wire": wire_tensor}
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, "Error creating input layer for column: wire"):
         variables_lib.global_variables_initializer().run()
@@ -1035,7 +1035,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
     features = {"ids": ids_tensor, "weights": weights_tensor}
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError,
           "Error creating input layer for column: ids_weighted_by_weights"):
@@ -1053,7 +1053,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         indices=[[0, 0], [1, 0], [1, 1]],
         dense_shape=[2, 2])
     features = {"aaa": wire_tensor, "bbb": wire_tensor}
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, "Error creating input layer for column: aaa_X_bbb"):
         variables_lib.global_variables_initializer().run()
@@ -1080,7 +1080,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         hashed_sparse, 10, initializer=init_ops.constant_initializer(133.7))
     output = feature_column_ops.input_from_feature_columns(
         features, [real_valued, bucket, embeded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       # size of output = 3 (real_valued) + 2 * 4 (bucket) + 10 (embedding) = 21
       self.assertAllEqual(output.eval().shape, [3, 21])
@@ -1099,7 +1099,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         initializer=init_ops.ones_initializer())
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [embeded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       # score: (number of values)
       self.assertAllEqual(output.eval(), [[1.], [2.], [0.]])
@@ -1119,7 +1119,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         max_norm=0.5)
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [embedded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       # score: (number of values * 0.5)
       self.assertAllClose(output.eval(), [[0.5], [1.], [0.]])
@@ -1144,7 +1144,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
         initializer=init_ops.ones_initializer())
     output = feature_column_ops.input_from_feature_columns(features,
                                                            [embeded_sparse])
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       # score: (sum of weights)
@@ -1236,7 +1236,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
     # There should be one trainable variables for sparse_2
     self.assertEqual(1, len(variables_lib.trainable_variables()))
 
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       output_1_eval = output_1.eval()
       output_2_eval = output_2.eval()
@@ -1295,7 +1295,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
         columns_to_tensors, [measurement_column])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       model_inputs = sess.run(model_input_tensor)
     self.assertAllClose(measurement_input, model_inputs)
 
@@ -1305,7 +1305,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     rating = np.array([[0., 1., 2., -1.],
                        [3., 4., 5., 6.]])
     features = {"rating": constant_op.constant(rating)}
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       output = sess.run(
           feature_column_ops.sequence_input_from_feature_columns(
               features, [var_len_real_valued]))
@@ -1329,7 +1329,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     expected_shape = [batch_size, sequence_length, np.prod(dimensions)]
     reshaped_measurements = np.reshape(measurement_input, expected_shape)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       model_inputs = sess.run(model_input_tensor)
 
     self.assertAllClose(reshaped_measurements, model_inputs)
@@ -1350,7 +1350,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
         columns_to_tensors, [measurement_column])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       model_inputs = sess.run(model_input_tensor)
     self.assertAllClose(normalizer(measurement_input), model_inputs)
 
@@ -1373,7 +1373,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     expected_shape = [batch_size, sequence_length, np.prod(dimensions)]
     reshaped_measurements = np.reshape(measurement_input, expected_shape)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       model_inputs = sess.run(model_input_tensor)
 
     self.assertAllClose(normalizer(reshaped_measurements), model_inputs)
@@ -1395,7 +1395,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
         columns_to_tensors, [one_hot_column])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       model_input = sess.run(model_input_tensor)
@@ -1429,7 +1429,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
         columns_to_tensors, [one_hot_column])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       model_input = sess.run(model_input_tensor)
@@ -1459,7 +1459,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
         columns_to_tensors, [embedded_column])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       model_input = sess.run(model_input_tensor)
@@ -1488,7 +1488,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     model_input_tensor = feature_column_ops.sequence_input_from_feature_columns(
         columns_to_tensors, [embedded_column])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       model_input = sess.run(model_input_tensor)
@@ -1518,7 +1518,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
     embedding_weights = ops.get_collection("my_collection")
     gradient_tensor = gradients_impl.gradients(model_input_tensor,
                                                embedding_weights)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       model_input, gradients = sess.run([model_input_tensor, gradient_tensor])
@@ -1585,7 +1585,7 @@ class SequenceInputFromFeatureColumnTest(test.TestCase):
         columns_to_tensors, model_input_columns)
     self.assertEqual(dtypes.float32, model_input_tensor.dtype)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       model_input = sess.run(model_input_tensor)
@@ -1622,7 +1622,7 @@ class WeightedSumTest(test.TestCase):
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [hashed_sparse], num_outputs=5)
     logits_core = fc_core.linear_model(features, [hashed_sparse], units=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [2, 5])
       # Verify cross compatibility: Core builder output should equal to contrib.
@@ -1640,7 +1640,7 @@ class WeightedSumTest(test.TestCase):
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [hashed_sparse], num_outputs=5)
     logits_core = fc_core.linear_model(features, [hashed_sparse], units=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [2, 5])
       # Verify cross compatibility: Core builder output should equal to contrib.
@@ -1654,7 +1654,7 @@ class WeightedSumTest(test.TestCase):
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [hashed_sparse], num_outputs=5)
     logits_core = fc_core.linear_model(features, [hashed_sparse], units=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [2, 5])
       # Verify cross compatibility: Core builder output should equal to contrib.
@@ -1676,7 +1676,7 @@ class WeightedSumTest(test.TestCase):
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [weighted_ids], num_outputs=5)
     logits_core = fc_core.linear_model(features, [weighted_ids], units=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [2, 5])
@@ -1695,7 +1695,7 @@ class WeightedSumTest(test.TestCase):
         features, [weighted_ids], num_outputs=5)
     logits_core = fc_core.linear_model(features, [weighted_ids], units=5)
 
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [2, 5])
@@ -1716,7 +1716,7 @@ class WeightedSumTest(test.TestCase):
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [crossed], num_outputs=5)
     logits_core = fc_core.linear_model(features, [crossed], units=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [2, 5])
       # Verify cross compatibility: Core builder output should equal to contrib.
@@ -1730,7 +1730,7 @@ class WeightedSumTest(test.TestCase):
         dense_shape=[2, 2])
     features = {"wire": wire_tensor}
     embeded_sparse = feature_column.embedding_column(hashed_sparse, 10)
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, "Error creating weighted sum for column: wire_embedding"):
         variables_lib.global_variables_initializer().run()
@@ -1756,7 +1756,7 @@ class WeightedSumTest(test.TestCase):
               features, [movies], num_outputs=1))
       logits_core = fc_core.linear_model(features, [movies])
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.initialize_all_variables().run()
         lookup_ops.tables_initializer().run()
 
@@ -1776,7 +1776,7 @@ class WeightedSumTest(test.TestCase):
     }
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [real_valued], num_outputs=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [3, 5])
 
@@ -1789,7 +1789,7 @@ class WeightedSumTest(test.TestCase):
     }
     logits, _, _ = feature_column_ops.weighted_sum_from_feature_columns(
         features, [bucket], num_outputs=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(logits.eval().shape, [3, 5])
 
@@ -1814,7 +1814,7 @@ class WeightedSumTest(test.TestCase):
         features, [real_valued, bucket, hashed_sparse, crossed], num_outputs=5)
     output_core = fc_core.linear_model(
         features, [real_valued, bucket, hashed_sparse, crossed], units=5)
-    with self.test_session():
+    with self.cached_session():
       variables_lib.global_variables_initializer().run()
       self.assertAllEqual(output.eval().shape, [3, 5])
       # Verify cross compatibility: Core builder output should equal to contrib.
@@ -1837,7 +1837,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, bias = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [age, language], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -1877,7 +1877,7 @@ class WeightedSumTest(test.TestCase):
               features, [country, language], num_outputs=1))
       # Assert that only a single weight is created.
       self.assertEqual(len(variables), 1)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -1941,7 +1941,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, bias = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [weighted_language], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -1969,7 +1969,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, bias = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [language], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -1992,7 +1992,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [movies], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2026,7 +2026,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [country_language], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2050,7 +2050,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [language_language], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2083,7 +2083,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [country_language], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2124,7 +2124,7 @@ class WeightedSumTest(test.TestCase):
                 features, [country, language, country_language],
                 num_outputs=1,
                 scope=scope))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2161,7 +2161,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [country, age, incomes], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2197,7 +2197,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [country, age, height, incomes], num_outputs=5))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2228,7 +2228,7 @@ class WeightedSumTest(test.TestCase):
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [bucket], num_outputs=1))
       output_core = fc_core.linear_model(features, [bucket])
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         # Cross compatibility: Core builder output should equal to contrib.
@@ -2259,7 +2259,7 @@ class WeightedSumTest(test.TestCase):
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [bucket, country], num_outputs=1))
       output_core = fc_core.linear_model(features, [bucket, country])
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         # Cross compatibility: Core builder output should equal to contrib.
@@ -2290,7 +2290,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [bucket, country], num_outputs=5))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2326,7 +2326,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [country_price], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2365,7 +2365,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [country_language_price], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2389,7 +2389,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [product], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         product_weights = column_to_variable[product][0]
@@ -2404,7 +2404,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [product], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         product_weights = column_to_variable[product][0]
@@ -2419,7 +2419,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [product], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         product_weights = column_to_variable[product][0]
@@ -2440,7 +2440,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [product], num_outputs=1))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         product_weights = column_to_variable[product][0]
@@ -2452,7 +2452,7 @@ class WeightedSumTest(test.TestCase):
       features = {"age": constant_op.constant([[10.], [20.], [30.], [40.]])}
       output, _, bias = feature_column_ops.weighted_sum_from_feature_columns(
           features, [feature_column.real_valued_column("age")], num_outputs=3)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         sess.run(bias.assign([0.1, 0.2, 0.3]))
@@ -2466,7 +2466,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [column], num_outputs=3))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         weights = column_to_variable[column][0]
@@ -2490,7 +2490,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [column], num_outputs=3))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
         weights = column_to_variable[column][0]
@@ -2516,7 +2516,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [column], num_outputs=3))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2556,7 +2556,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [column], num_outputs=3))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2585,7 +2585,7 @@ class WeightedSumTest(test.TestCase):
       output, column_to_variable, _ = (
           feature_column_ops.weighted_sum_from_feature_columns(
               features, [column], num_outputs=3))
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         variables_lib.global_variables_initializer().run()
         lookup_ops.tables_initializer().run()
 
@@ -2651,7 +2651,7 @@ class ParseExampleTest(test.TestCase):
         feature_columns=[bucket, wire_cast])
     self.assertIn(bucket, output)
     self.assertIn(wire_cast, output)
-    with self.test_session():
+    with self.cached_session():
       lookup_ops.tables_initializer().run()
       self.assertAllEqual(output[bucket].eval(), [[2, 3, 0]])
       self.assertAllEqual(output[wire_cast].indices.eval(), [[0, 0], [0, 1]])
@@ -2713,7 +2713,7 @@ class ParseExampleTest(test.TestCase):
     self.assertIn("measurements", seq)
     self.assertIsInstance(seq["measurements"], ops.Tensor)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       location_val, wire_cast_val, measurement_val = sess.run(
           [ctx["location"], seq["wire_cast"], seq["measurements"]])
 
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_test.py b/tensorflow/contrib/layers/python/layers/feature_column_test.py
index 1de9ab7056..d90d6ecf7f 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py
@@ -57,6 +57,29 @@ def _sparse_id_tensor(shape, vocab_size, seed=112123):
       indices=indices, values=values, dense_shape=shape)
 
 
+def _sparse_id_tensor_with_weights(shape, vocab_size, seed=112123):
+  # Returns a arbitrary `SparseTensor` with given shape and vocab size.
+  assert vocab_size >= shape[-1]
+  np.random.seed(seed)
+  indices = np.array(list(itertools.product(*[range(s) for s in shape])))
+
+  # Values must be distinct from the vocab
+  values = np.ndarray.flatten(np.array([
+      np.random.choice(vocab_size, size=shape[-1], replace=False)
+      for _ in range(np.prod(shape[:-1]))]))
+  weights = np.sort(np.random.rand(*shape), axis=len(shape)-1)
+
+  # Remove entries if weight < 0.5 for sparsity.
+  keep = np.ndarray.flatten(weights < 0.5)  # Remove half of them
+  indices = indices[keep]
+  values = values[keep]
+  weights = np.ndarray.flatten(weights)[keep]
+  return (sparse_tensor_lib.SparseTensor(
+      indices=indices, values=values, dense_shape=shape),
+          sparse_tensor_lib.SparseTensor(
+              indices=indices, values=weights, dense_shape=shape))
+
+
 class FeatureColumnTest(test.TestCase):
 
   def testImmutability(self):
@@ -178,7 +201,7 @@ class FeatureColumnTest(test.TestCase):
       b2 = feature_column_ops.input_from_feature_columns({
           b[1]: input_tensor_c2
       }, [b[1]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       b1_value = b1.eval()
       b2_value = b2.eval()
@@ -207,7 +230,7 @@ class FeatureColumnTest(test.TestCase):
       e1 = feature_column_ops.input_from_feature_columns({
           e[0]: input_tensor_c1
       }, [e[0]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       d1_value = d1.eval()
       e1_value = e1.eval()
@@ -317,7 +340,7 @@ class FeatureColumnTest(test.TestCase):
       with variable_scope.variable_scope("output_rank_{}".format(output_rank)):
         one_hot_output = one_hot._to_dnn_input_layer(
             id_tensor, output_rank=output_rank)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         one_hot_value = sess.run(one_hot_output)
         expected_shape = (id_tensor_shape[:output_rank - 1] + [vocab_size])
         self.assertEquals(expected_shape, list(one_hot_value.shape))
@@ -329,6 +352,34 @@ class FeatureColumnTest(test.TestCase):
     self.assertEqual(one_hot.sparse_id_column.name, "ids_weighted_by_weights")
     self.assertEqual(one_hot.length, 3)
 
+  def testIntegerizedOneHotColumnForWeightedSparseColumn(self):
+    vocab_size = 5
+    ids = fc.sparse_column_with_integerized_feature("ids", vocab_size)
+    weighted_ids = fc.weighted_sparse_column(ids, "weights")
+    one_hot = fc.one_hot_column(weighted_ids)
+    self.assertEqual(one_hot.sparse_id_column.name, "ids_weighted_by_weights")
+    self.assertEqual(one_hot.length, vocab_size)
+
+  def testIntegerizedOneHotWeightedSparseColumnShape(self):
+    vocab_size = 5
+    for id_tensor_shape in [[4, 3], [2, 4], [3, 3, 3]]:
+      output_rank = len(id_tensor_shape)
+      a = fc.sparse_column_with_integerized_feature("a", vocab_size)
+      weighted = fc.weighted_sparse_column(a, "weights")
+      one_hot = fc.one_hot_column(weighted)
+      id_tensor, weight_tensor = _sparse_id_tensor_with_weights(
+          id_tensor_shape, vocab_size)
+
+      one_hot_output = one_hot._to_dnn_input_layer(
+          (id_tensor, weight_tensor),
+          output_rank=output_rank)
+      one_hot_output_shape = one_hot_output.get_shape().as_list()
+      expected_shape = id_tensor_shape[:-1] + [vocab_size]
+      self.assertEquals(expected_shape, one_hot_output_shape)
+      with self.cached_session() as sess:
+        one_hot_value = sess.run(one_hot_output)
+        self.assertEquals(expected_shape, list(one_hot_value.shape))
+
   def testOneHotColumnWithSparseColumnWithHashKeys(self):
     input_values = ["marlo", "unknown", "omar"]
     inputs = constant_op.constant(input_values)
@@ -348,7 +399,7 @@ class FeatureColumnTest(test.TestCase):
     expected = np.array([[0., 1., 0., 0., 0., 0., 0., 1., 0.,
                           0.], [0., 1., 0., 0., 0., 0., 0., 0., 0., 1.],
                          [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       one_hot_value = sess.run(one_hot_output)
     self.assertTrue(np.array_equal(one_hot_value, expected))
 
@@ -389,7 +440,7 @@ class FeatureColumnTest(test.TestCase):
     }
     one_hot_tensor = feature_column_ops.input_from_feature_columns(
         features, [one_hot])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       sess.run(lookup_ops.tables_initializer())
       self.assertAllEqual([[2., 6., 0.]], one_hot_tensor.eval())
@@ -400,7 +451,7 @@ class FeatureColumnTest(test.TestCase):
     features = {"ids": constant_op.constant([["marlo", "unknown", "omar"]])}
     one_hot_tensor = feature_column_ops.input_from_feature_columns(
         features, [one_hot])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       sess.run(lookup_ops.tables_initializer())
       self.assertAllEqual([[1., 1., 0.]], one_hot_tensor.eval())
@@ -552,7 +603,7 @@ class FeatureColumnTest(test.TestCase):
         real_valued_output = real_valued_column._to_dnn_input_layer(
             constant_op.constant(real_valued_input, dtype=dtypes.float32),
             output_rank=output_rank)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         real_valued_eval = sess.run(real_valued_output)
       expected_shape = (
           input_shape[:output_rank - 1] +
@@ -746,7 +797,7 @@ class FeatureColumnTest(test.TestCase):
     sparse_column.insert_transformed_feature(features)
     sparse_output = features[sparse_column]
     expected_shape = [batch_size, 1]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sparse_result = sess.run(sparse_output)
     self.assertEquals(expected_shape, list(sparse_result.dense_shape))
 
@@ -1059,7 +1110,7 @@ class FeatureColumnTest(test.TestCase):
     ckpt_dir = tempfile.mkdtemp(prefix=ckpt_dir_prefix)
     checkpoint_path = os.path.join(ckpt_dir, "model.ckpt")
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       saved_embedding = embeddings.eval()
       save.save(sess, checkpoint_path)
@@ -1080,7 +1131,7 @@ class FeatureColumnTest(test.TestCase):
           embedding_col_initialized: input_tensor
       }, [embedding_col_initialized])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       loaded_embedding = pretrained_embeddings.eval()
 
@@ -1125,7 +1176,7 @@ class FeatureColumnTest(test.TestCase):
     ckpt_dir = tempfile.mkdtemp(prefix=ckpt_dir_prefix)
     checkpoint_path = os.path.join(ckpt_dir, "model.ckpt")
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       sess.run(assign_op)
       saved_col_weights = col_weights[crossed_col][0].eval()
@@ -1150,7 +1201,7 @@ class FeatureColumnTest(test.TestCase):
           }, [crossed_col_initialized], 1))
       col_weights_from_ckpt = col_weights[crossed_col_initialized][0]
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       loaded_col_weights = col_weights_from_ckpt.eval()
 
diff --git a/tensorflow/contrib/layers/python/layers/initializers.py b/tensorflow/contrib/layers/python/layers/initializers.py
index 51610f21b2..655f038b18 100644
--- a/tensorflow/contrib/layers/python/layers/initializers.py
+++ b/tensorflow/contrib/layers/python/layers/initializers.py
@@ -47,7 +47,7 @@ def xavier_initializer(uniform=True, seed=None, dtype=dtypes.float32):
   Args:
     uniform: Whether to use uniform or normal distributed random initialization.
     seed: A Python integer. Used to create random seeds. See
-          @{tf.set_random_seed} for behavior.
+          `tf.set_random_seed` for behavior.
     dtype: The data type. Only floating point types are supported.
 
   Returns:
@@ -98,7 +98,7 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
     mode: String.  'FAN_IN', 'FAN_OUT', 'FAN_AVG'.
     uniform: Whether to use uniform or normal distributed random initialization.
     seed: A Python integer. Used to create random seeds. See
-          @{tf.set_random_seed} for behavior.
+          `tf.set_random_seed` for behavior.
     dtype: The data type. Only floating point types are supported.
 
   Returns:
@@ -111,7 +111,7 @@ def variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False,
   if not dtype.is_floating:
     raise TypeError('Cannot create initializer for non-floating point type.')
   if mode not in ['FAN_IN', 'FAN_OUT', 'FAN_AVG']:
-    raise TypeError('Unknow mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode)
+    raise TypeError('Unknown mode %s [FAN_IN, FAN_OUT, FAN_AVG]', mode)
 
   # pylint: disable=unused-argument
   def _initializer(shape, dtype=dtype, partition_info=None):
diff --git a/tensorflow/contrib/layers/python/layers/initializers_test.py b/tensorflow/contrib/layers/python/layers/initializers_test.py
index b7fe878893..bd3692b258 100644
--- a/tensorflow/contrib/layers/python/layers/initializers_test.py
+++ b/tensorflow/contrib/layers/python/layers/initializers_test.py
@@ -85,7 +85,7 @@ class VarianceScalingInitializerTest(test.TestCase):
 
   def _test_variance(self, initializer, shape, variance, factor, mode, uniform):
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as sess:
+      with self.session(graph=g) as sess:
         var = variable_scope.get_variable(
             name='test',
             shape=shape,
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index 151fc7a0d7..a82d4c1951 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -55,12 +55,12 @@ from tensorflow.python.training import moving_averages
 # TODO(b/28426988): Replace legacy_* fns migrated from slim.
 # TODO(b/28426988): Remove legacy_* when all uses have migrated to new API.
 __all__ = [
-    'avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', 'conv2d', 'conv3d',
-    'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 'convolution',
-    'convolution2d', 'convolution2d_in_plane', 'convolution2d_transpose',
-    'convolution3d', 'convolution3d_transpose', 'dense_to_sparse',
-    'dropout', 'elu', 'flatten', 'fully_connected', 'GDN', 'gdn',
-    'images_to_sequence', 'layer_norm', 'linear', 'pool', 'max_pool2d',
+    'avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', 'conv1d', 'conv2d',
+    'conv3d', 'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose',
+    'convolution', 'convolution1d', 'convolution2d', 'convolution2d_in_plane',
+    'convolution2d_transpose', 'convolution3d', 'convolution3d_transpose',
+    'dense_to_sparse', 'dropout', 'elu', 'flatten', 'fully_connected', 'GDN',
+    'gdn', 'images_to_sequence', 'layer_norm', 'linear', 'pool', 'max_pool2d',
     'max_pool3d', 'one_hot_encoding', 'relu', 'relu6', 'repeat',
     'scale_gradient', 'separable_conv2d', 'separable_convolution2d',
     'sequence_to_images', 'softmax', 'spatial_softmax', 'stack', 'unit_norm',
@@ -1536,6 +1536,7 @@ def convolution3d_transpose(
 @add_arg_scope
 def dense_to_sparse(tensor, eos_token=0, outputs_collections=None, scope=None):
   """Converts a dense tensor into a sparse tensor.
+
   An example use would be to convert dense labels to sparse ones
   so that they can be fed to the ctc_loss.
 
@@ -1583,7 +1584,7 @@ def dropout(inputs,
     outputs_collections: Collection to add the outputs.
     scope: Optional scope for name_scope.
     seed: A Python integer. Used to create random seeds. See
-      @{tf.set_random_seed} for behavior.
+      `tf.set_random_seed` for behavior.
 
   Returns:
     A tensor representing the output of the operation.
@@ -1701,19 +1702,22 @@ def _inner_flatten(inputs, new_rank, output_collections=None, scope=None):
   return utils.collect_named_outputs(output_collections, sc, flattened)
 
 
-def _model_variable_getter(getter,
-                           name,
-                           shape=None,
-                           dtype=None,
-                           initializer=None,
-                           regularizer=None,
-                           trainable=True,
-                           collections=None,
-                           caching_device=None,
-                           partitioner=None,
-                           rename=None,
-                           use_resource=None,
-                           **_):
+def _model_variable_getter(
+    getter,
+    name,
+    shape=None,
+    dtype=None,
+    initializer=None,
+    regularizer=None,
+    trainable=True,
+    collections=None,
+    caching_device=None,
+    partitioner=None,
+    rename=None,
+    use_resource=None,
+    synchronization=tf_variables.VariableSynchronization.AUTO,
+    aggregation=tf_variables.VariableAggregation.NONE,
+    **_):
   """Getter that uses model_variable for compatibility with core layers."""
   short_name = name.split('/')[-1]
   if rename and short_name in rename:
@@ -1731,7 +1735,9 @@ def _model_variable_getter(getter,
       caching_device=caching_device,
       partitioner=partitioner,
       custom_getter=getter,
-      use_resource=use_resource)
+      use_resource=use_resource,
+      synchronization=synchronization,
+      aggregation=aggregation)
 
 
 def _build_variable_getter(rename=None):
@@ -2021,6 +2027,7 @@ class GDN(base.Layer):
 
     def beta_initializer(shape, dtype=None, partition_info=None):
       del partition_info  # unused
+      pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype)
       return math_ops.sqrt(array_ops.ones(shape, dtype=dtype) + pedestal)
 
     def gamma_initializer(shape, dtype=None, partition_info=None):
@@ -2028,6 +2035,7 @@ class GDN(base.Layer):
       assert len(shape) == 2
       assert shape[0] == shape[1]
       eye = linalg_ops.eye(shape[0], dtype=dtype)
+      pedestal = array_ops.constant(self._reparam_offset**2, dtype=self.dtype)
       return math_ops.sqrt(self._gamma_init * eye + pedestal)
 
     beta = self.add_variable(
@@ -2323,11 +2331,16 @@ def images_to_sequence(inputs,
                        outputs_collections=None,
                        scope=None):
   """Convert a batch of images into a batch of sequences.
+
   Args:
     inputs: a (num_images, height, width, depth) tensor
     data_format: A string. `NHWC` (default) and `NCHW` are supported.
     outputs_collections: The collections to which the outputs are added.
     scope: Optional scope for name_scope.
+
+  Raises:
+     ValueError: If `data_format` is not either NCHW or NHWC.
+
   Returns:
     (width, num_images*height, depth) sequence tensor
   """
@@ -2647,7 +2660,7 @@ def separable_convolution2d(
     inputs,
     num_outputs,
     kernel_size,
-    depth_multiplier,
+    depth_multiplier=1,
     stride=1,
     padding='SAME',
     data_format=DATA_FORMAT_NHWC,
@@ -2656,6 +2669,7 @@ def separable_convolution2d(
     normalizer_fn=None,
     normalizer_params=None,
     weights_initializer=initializers.xavier_initializer(),
+    pointwise_initializer=None,
     weights_regularizer=None,
     biases_initializer=init_ops.zeros_initializer(),
     biases_regularizer=None,
@@ -2697,7 +2711,9 @@ def separable_convolution2d(
       `biases_regularizer` are ignored and `biases` are not created nor added.
       default set to None for no normalizer function
     normalizer_params: Normalization function parameters.
-    weights_initializer: An initializer for the weights.
+    weights_initializer: An initializer for the depthwise weights.
+    pointwise_initializer: An initializer for the pointwise weights.
+      default set to None, means use weights_initializer.
     weights_regularizer: Optional regularizer for the weights.
     biases_initializer: An initializer for the biases. If None skip biases.
     biases_regularizer: Optional regularizer for the biases.
@@ -2729,6 +2745,9 @@ def separable_convolution2d(
       custom_getter=layer_variable_getter) as sc:
     inputs = ops.convert_to_tensor(inputs)
 
+    if pointwise_initializer is None:
+      pointwise_initializer = weights_initializer
+
     df = ('channels_first'
           if data_format and data_format.startswith('NC') else 'channels_last')
     if num_outputs is not None:
@@ -2744,7 +2763,7 @@ def separable_convolution2d(
           depth_multiplier=depth_multiplier,
           use_bias=not normalizer_fn and biases_initializer,
           depthwise_initializer=weights_initializer,
-          pointwise_initializer=weights_initializer,
+          pointwise_initializer=pointwise_initializer,
           bias_initializer=biases_initializer,
           depthwise_regularizer=weights_regularizer,
           pointwise_regularizer=weights_regularizer,
@@ -2833,6 +2852,7 @@ def sequence_to_images(inputs,
                        outputs_collections=None,
                        scope=None):
   """Convert a batch of sequences into a batch of images.
+
   Args:
     inputs: (num_steps, num_batches, depth) sequence tensor
     height: the height of the images
@@ -2840,6 +2860,7 @@ def sequence_to_images(inputs,
       Currently supports `'channels_first'` and `'channels_last'`.
     outputs_collections: The collections to which the outputs are added.
     scope: Optional scope for name_scope.
+
   Returns:
     A tensor representing the output of the operation.
   """
@@ -2849,7 +2870,7 @@ def sequence_to_images(inputs,
     if num_batches is None:
       num_batches = -1
     else:
-      num_batches = num_batches // height
+      num_batches //= height
     reshaped = array_ops.reshape(inputs,
                                  [width, num_batches, height, depth])
     if output_data_format == 'channels_first':
@@ -3088,7 +3109,7 @@ def maxout(inputs, num_units, axis=-1, scope=None):
     inputs: Tensor input
     num_units: Specifies how many features will remain after maxout
       in the `axis` dimension (usually channel).
-      This must be multiple of number of `axis`.
+      This must be a factor of number of features.
     axis: The dimension where max pooling will be performed. Default is the
     last dimension.
     scope: Optional scope for variable_scope.
@@ -3107,7 +3128,7 @@ def maxout(inputs, num_units, axis=-1, scope=None):
       raise ValueError('number of features({}) is not '
                        'a multiple of num_units({})'.format(
                            num_channels, num_units))
-    shape[axis] = -1
+    shape[axis] = num_units
     shape += [num_channels // num_units]
 
     # Dealing with batches with arbitrary sizes
@@ -3299,6 +3320,7 @@ relu6 = functools.partial(fully_connected, activation_fn=nn.relu6)
 linear = functools.partial(fully_connected, activation_fn=None)
 
 # Simple alias.
+conv1d = convolution1d
 conv2d = convolution2d
 conv3d = convolution3d
 conv2d_transpose = convolution2d_transpose
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index b01fd5d5c9..85af9de4e4 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -281,7 +281,7 @@ class BiasAddTest(test.TestCase):
 
   def testCreate(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3))
       output = _layers.bias_add(images)
       self.assertEqual(output.op.name, 'BiasAdd/BiasAdd')
@@ -289,7 +289,7 @@ class BiasAddTest(test.TestCase):
 
   def testCreateWithActivation(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = _layers.bias_add(images, activation_fn=nn_ops.relu)
       self.assertEqual(output.op.name, 'BiasAdd/Relu')
@@ -298,7 +298,7 @@ class BiasAddTest(test.TestCase):
   def testCreateDimensions(self):
     dims = (2, 3, 4)
     shape = [5, 2, 3, 4]
-    with self.test_session():
+    with self.cached_session():
       for d in dims:
         input_shape = shape[:d]
         inputs = random_ops.random_uniform(input_shape, seed=1)
@@ -311,7 +311,7 @@ class BiasAddTest(test.TestCase):
 class ConvolutionTest(test.TestCase):
 
   def testInvalidShape(self):
-    with self.test_session():
+    with self.cached_session():
       images_2d = random_ops.random_uniform((5, 7, 9, 3), seed=1)
       with self.assertRaisesRegexp(
           ValueError, 'Convolution expects input with rank 5, got 4'):
@@ -323,14 +323,14 @@ class ConvolutionTest(test.TestCase):
 
   def testInvalidDataFormat(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       with self.assertRaisesRegexp(ValueError, 'data_format'):
         layers_lib.convolution2d(images, 32, 3, data_format='CHWN')
 
   def testCreateConv(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 4)).astype(np.float32)
       output = layers_lib.convolution2d(images, 32, [3, 3])
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -342,7 +342,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateConvNCHW(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, 4, height, width)).astype(np.float32)
       output = layers_lib.convolution2d(images, 32, [3, 3], data_format='NCHW')
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -354,7 +354,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateSquareConv(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.convolution2d(images, 32, 3)
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -362,7 +362,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateConvWithTensorShape(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.convolution2d(images, 32, images.get_shape()[1:3])
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -370,7 +370,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateFullyConv(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 32), seed=1)
       output = layers_lib.convolution2d(
           images, 64, images.get_shape()[1:3], padding='VALID')
@@ -381,7 +381,7 @@ class ConvolutionTest(test.TestCase):
 
   def testFullyConvWithCustomGetter(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       called = [0]
 
       def custom_getter(getter, *args, **kwargs):
@@ -395,7 +395,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateVerticalConv(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 4), seed=1)
       output = layers_lib.convolution2d(images, 32, [3, 1])
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -407,7 +407,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateHorizontalConv(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 4), seed=1)
       output = layers_lib.convolution2d(images, 32, [1, 3])
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -417,7 +417,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateConvWithStride(self):
     height, width = 6, 8
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.convolution2d(images, 32, [3, 3], stride=2)
       self.assertEqual(output.op.name, 'Conv/Relu')
@@ -427,7 +427,7 @@ class ConvolutionTest(test.TestCase):
   def testCreateConvCreatesWeightsAndBiasesVars(self):
     height, width = 7, 9
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       self.assertFalse(variables.get_variables('conv1/weights'))
       self.assertFalse(variables.get_variables('conv1/biases'))
       layers_lib.convolution2d(images, 32, [3, 3], scope='conv1')
@@ -436,7 +436,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateConvWithScope(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.convolution2d(images, 32, [3, 3], scope='conv1')
       self.assertEqual(output.op.name, 'conv1/Relu')
@@ -453,14 +453,14 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateConvWithoutActivation(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.convolution2d(images, 32, [3, 3], activation_fn=None)
       self.assertEqual(output.op.name, 'Conv/BiasAdd')
 
   def testCreateConvValid(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.convolution2d(images, 32, [3, 3], padding='VALID')
       self.assertListEqual(output.get_shape().as_list(), [5, 5, 7, 32])
@@ -468,7 +468,7 @@ class ConvolutionTest(test.TestCase):
   def testCreateConvWithWD(self):
     height, width = 7, 9
     weight_decay = 0.01
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       regularizer = regularizers.l2_regularizer(weight_decay)
       layers_lib.convolution2d(
@@ -481,7 +481,7 @@ class ConvolutionTest(test.TestCase):
 
   def testCreateConvNoRegularizers(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       layers_lib.convolution2d(images, 32, [3, 3])
       self.assertEqual(
@@ -489,7 +489,7 @@ class ConvolutionTest(test.TestCase):
 
   def testReuseVars(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       layers_lib.convolution2d(images, 32, [3, 3], scope='conv1')
       self.assertEqual(len(variables.get_variables()), 2)
@@ -498,7 +498,7 @@ class ConvolutionTest(test.TestCase):
 
   def testNonReuseVars(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       layers_lib.convolution2d(images, 32, [3, 3])
       self.assertEqual(len(variables.get_variables()), 2)
@@ -507,7 +507,7 @@ class ConvolutionTest(test.TestCase):
 
   def testReuseConvWithWD(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       weight_decay = regularizers.l2_regularizer(0.01)
       with arg_scope(
@@ -523,7 +523,7 @@ class ConvolutionTest(test.TestCase):
 
   def testConvWithBatchNorm(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 32), seed=1)
       with arg_scope(
           [layers_lib.convolution2d],
@@ -539,7 +539,7 @@ class ConvolutionTest(test.TestCase):
 
   def testReuseConvWithBatchNorm(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 32), seed=1)
       with arg_scope(
           [layers_lib.convolution2d],
@@ -557,7 +557,7 @@ class ConvolutionTest(test.TestCase):
   def testCreateConvCreatesWeightsAndBiasesVarsWithRateTwo(self):
     height, width = 7, 9
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       self.assertFalse(variables.get_variables('conv1/weights'))
       self.assertFalse(variables.get_variables('conv1/biases'))
       layers_lib.convolution2d(images, 32, [3, 3], rate=2, scope='conv1')
@@ -573,7 +573,7 @@ class ConvolutionTest(test.TestCase):
     output = layers_lib.convolution2d(
         images, num_filters, [3, 3], rate=2, padding='SAME')
     self.assertListEqual(list(output.get_shape().as_list()), expected_size)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -587,7 +587,7 @@ class ConvolutionTest(test.TestCase):
     output = layers_lib.convolution2d(
         images, num_filters, [3, 3], rate=2, padding='VALID')
     self.assertListEqual(list(output.get_shape().as_list()), expected_size)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -601,7 +601,7 @@ class ConvolutionTest(test.TestCase):
     output = layers_lib.convolution2d(
         images, num_filters, [3, 3], rate=[2, 3], padding='VALID')
     self.assertListEqual(list(output.get_shape().as_list()), expected_size)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEquals(output.op.name, 'Conv/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -612,7 +612,7 @@ class ConvolutionTest(test.TestCase):
     expected_size = [None, None, None, num_filters]
     expected_size_dynamic = [5, 7, 9, num_filters]
 
-    with self.test_session():
+    with self.cached_session():
       images = array_ops.placeholder(np.float32,
                                      [None, None, None, input_size[3]])
       output = layers_lib.convolution2d(
@@ -651,7 +651,7 @@ class ConvolutionTest(test.TestCase):
     expected_size = [None, None, None, num_filters]
     expected_size_dynamic = [5, 5, 7, num_filters]
 
-    with self.test_session():
+    with self.cached_session():
       images = array_ops.placeholder(np.float32,
                                      [None, None, None, input_size[3]])
       output = layers_lib.convolution2d(
@@ -670,7 +670,7 @@ class ConvolutionTest(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.convolution2d(
         images, num_filters, [3, 3], rate=2, padding='VALID', scope='conv7')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'conv7/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -688,7 +688,7 @@ class ConvolutionTest(test.TestCase):
         padding='VALID',
         activation_fn=None,
         scope='conv7')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'conv7/BiasAdd')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -712,7 +712,7 @@ class Convolution2dTransposeTests(test.TestCase):
 
   def testInvalidDataFormat(self):
     height, width = 7, 9
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       with self.assertRaisesRegexp(
           ValueError, 'data_format has to be either NCHW or NHWC.'):
@@ -915,7 +915,7 @@ class Convolution2dTransposeTests(test.TestCase):
         images, num_filters, [3, 3], stride=1, padding='SAME')
     self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertListEqual(list(output.eval().shape), expected_size)
 
@@ -929,7 +929,7 @@ class Convolution2dTransposeTests(test.TestCase):
         images, num_filters, [3, 3], stride=1, padding='VALID')
     self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertListEqual(list(output.eval().shape), expected_size)
 
@@ -944,7 +944,7 @@ class Convolution2dTransposeTests(test.TestCase):
     self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
     self.assertListEqual(list(output.get_shape().as_list()), expected_size)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertListEqual(list(output.eval().shape), expected_size)
 
@@ -958,7 +958,7 @@ class Convolution2dTransposeTests(test.TestCase):
         images, num_filters, [2, 2], stride=[2, 2], padding='SAME')
     self.assertListEqual(list(output.get_shape().as_list()), expected_size)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -971,7 +971,7 @@ class Convolution2dTransposeTests(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.conv2d_transpose(
         images, num_filters, [2, 2], stride=[2, 2], padding='VALID')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -984,7 +984,7 @@ class Convolution2dTransposeTests(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.conv2d_transpose(
         images, num_filters, [2, 2], stride=[2, 2], padding='SAME')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -997,7 +997,7 @@ class Convolution2dTransposeTests(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.conv2d_transpose(
         images, num_filters, [2, 2], stride=[2, 2], padding='VALID')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -1010,7 +1010,7 @@ class Convolution2dTransposeTests(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.conv2d_transpose(
         images, num_filters, [2, 4], stride=[2, 1], padding='VALID')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -1023,7 +1023,7 @@ class Convolution2dTransposeTests(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.conv2d_transpose(
         images, num_filters, [2, 4], stride=[2, 4], padding='VALID')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -1036,7 +1036,7 @@ class Convolution2dTransposeTests(test.TestCase):
     images = random_ops.random_uniform(input_size, seed=1)
     output = layers_lib.conv2d_transpose(
         images, num_filters, [2, 4], stride=[2, 5], padding='VALID')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       self.assertListEqual(list(output.eval().shape), expected_size)
@@ -1067,7 +1067,7 @@ class Convolution2dTransposeTests(test.TestCase):
         conv = layers_lib.conv2d(
             transpose, num_filters, filter_size, stride=stride, padding='VALID')
 
-        with self.test_session(graph=graph) as sess:
+        with self.session(graph=graph) as sess:
           sess.run(variables_lib.global_variables_initializer())
           self.assertListEqual(list(conv.eval().shape), input_size)
 
@@ -1083,7 +1083,7 @@ class Convolution2dTransposeTests(test.TestCase):
         images, num_filters, [3, 3], stride=[2, 2], padding='VALID')
     self.assertListEqual(output.get_shape().as_list(), expected_size)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertEqual(output.op.name, 'Conv2d_transpose/Relu')
       eval_output = output.eval({images: np.zeros(input_size, np.float32)})
@@ -1095,7 +1095,7 @@ class Convolution2dTransposeTests(test.TestCase):
     expected_size = [None, None, None, num_filters]
     expected_size_dynamic = [5, 18, 22, num_filters]
 
-    with self.test_session():
+    with self.cached_session():
       images = array_ops.placeholder(np.float32,
                                      [None, None, None, input_size[3]])
       output = layers_lib.conv2d_transpose(
@@ -1116,7 +1116,7 @@ class Convolution2dTransposeTests(test.TestCase):
         images, num_filters, [3, 3], stride=2, padding='VALID', scope='conv7')
     self.assertEqual(output.op.name, 'conv7/Relu')
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertListEqual(list(output.eval().shape), expected_size)
 
@@ -1135,7 +1135,7 @@ class Convolution2dTransposeTests(test.TestCase):
         scope='conv7')
     self.assertEqual(output.op.name, 'conv7/BiasAdd')
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertListEqual(list(output.eval().shape), expected_size)
 
@@ -1146,7 +1146,7 @@ class Convolution2dTransposeTests(test.TestCase):
     stride = 2
     padding = 'VALID'
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(input_size, seed=1)
       output_deconv = layers_lib.conv2d_transpose(
           images,
@@ -1184,12 +1184,12 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(horz_gradients)
       expected = np.zeros((1, 10, 9, 1))
 
-      self.assertAllEqual(result, expected)
+      self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
 
   def testHorzConvWithBlankImageAndPlaceholder(self):
     image = array_ops.placeholder(dtypes.float32, shape=(None, None, None, 1))
@@ -1201,7 +1201,7 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(
           horz_gradients, feed_dict={
@@ -1209,7 +1209,7 @@ class ConvolutionInPlaneTest(test.TestCase):
           })
       expected = np.zeros((1, 10, 9, 1))
 
-      self.assertAllEqual(result, expected)
+      self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
 
   def testHorzConvWithRandomImageMultiBatch(self):
     np.random.seed(1)
@@ -1225,7 +1225,7 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(horz_gradients)
 
@@ -1245,7 +1245,7 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(horz_gradients)
 
@@ -1267,7 +1267,7 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(horz_gradients)
 
@@ -1283,12 +1283,12 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(vert_gradients)
       expected = np.zeros((1, 9, 10, 1))
 
-      self.assertAllEqual(result, expected)
+      self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
 
   def testVertConvWithVaryingImage(self):
     image = np.asmatrix(('1.0 2.0 3.0;' '1.1 2.0 4.0;' '-4.3 0.0 8.9'))
@@ -1306,12 +1306,35 @@ class ConvolutionInPlaneTest(test.TestCase):
         activation_fn=None)
     init_op = variables_lib.global_variables_initializer()
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(init_op)
       result = sess.run(vert_gradients)
 
       self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
 
+  def testConv1dShape(self):
+    width = 7
+    with self.cached_session():
+      images = random_ops.random_uniform((5, width, 3), seed=1)
+      output = layers_lib.convolution1d(images, 32, 3)
+      self.assertEqual(output.op.name, 'Conv/Relu')
+      self.assertListEqual(output.get_shape().as_list(), [5, width, 32])
+
+  def testConvInferSpatialDims(self):
+    depth, height, width = 7, 9, 11
+    with self.cached_session():
+      images = np.random.uniform(size=(5, width, 4)).astype(np.float32)
+      output = layers_lib.convolution(images, 32, [3])
+      self.assertListEqual(output.get_shape().as_list(), [5, width, 32])
+      images = np.random.uniform(size=(5, height, width, 4)).astype(np.float32)
+      output = layers_lib.convolution(images, 32, [3, 3])
+      self.assertListEqual(output.get_shape().as_list(), [5, height, width, 32])
+      images = np.random.uniform(size=(5, depth, height, width,
+                                       4)).astype(np.float32)
+      output = layers_lib.convolution(images, 32, [3, 3, 3])
+      self.assertListEqual(output.get_shape().as_list(),
+                           [5, depth, height, width, 32])
+
 
 class DenseToSparseTest(test.TestCase):
 
@@ -1321,7 +1344,7 @@ class DenseToSparseTest(test.TestCase):
     sparse = _layers.dense_to_sparse(tensor)
     dense = sparse_ops.sparse_to_dense(sparse.indices, sparse.dense_shape,
                                        sparse.values)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       constant = sess.run(dense)
       self.assertAllEqual(expected_constant, constant)
 
@@ -1330,16 +1353,16 @@ class DropoutTest(test.TestCase):
 
   def testCreateDropout(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3))
       output = _layers.dropout(images)
-      self.assertEqual(output.op.name, 'Dropout/dropout/mul')
+      self.assertEqual(output.op.name, 'Dropout/dropout_1/mul')
       output.get_shape().assert_is_compatible_with(
           ops.convert_to_tensor(images).get_shape())
 
   def testCreateDropoutWithConstantTrue(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       is_training = constant_op.constant(True)
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = _layers.dropout(images, is_training=is_training)
@@ -1347,7 +1370,7 @@ class DropoutTest(test.TestCase):
 
   def testCreateDropoutWithConstantFalse(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       is_training = constant_op.constant(False)
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = _layers.dropout(images, is_training=is_training)
@@ -1355,7 +1378,7 @@ class DropoutTest(test.TestCase):
 
   def testCreateDropoutWithPlaceholder(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       is_training = array_ops.placeholder(dtype=dtypes.bool, shape=[])
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = _layers.dropout(images, is_training=is_training)
@@ -1364,7 +1387,7 @@ class DropoutTest(test.TestCase):
 
   def testCollectOutputs(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = _layers.dropout(images, outputs_collections='outputs')
       c_output = ops.get_collection('outputs')[0]
@@ -1373,7 +1396,7 @@ class DropoutTest(test.TestCase):
 
   def testDropout(self):
     height, width = 10, 10
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0))
@@ -1386,7 +1409,7 @@ class DropoutTest(test.TestCase):
   def testDropoutSeed(self):
     """Test that providing the same seed produces the same result."""
     height, width = 10, 10
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output1 = _layers.dropout(images, seed=1)
@@ -1395,7 +1418,7 @@ class DropoutTest(test.TestCase):
 
   def testCreateDropoutNoTraining(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       num_elem_initial = math_ops.reduce_mean(math_ops.to_float(images > 0))
@@ -1408,7 +1431,7 @@ class DropoutTest(test.TestCase):
 
   def testCreateFCFollowByDropout(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output = _layers.fully_connected(images, 50)
@@ -1422,7 +1445,7 @@ class DropoutTest(test.TestCase):
 
   def testCreateFCWithDropout(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output = _layers.fully_connected(
@@ -1437,14 +1460,14 @@ class DropoutTest(test.TestCase):
 class FlattenTest(test.TestCase):
 
   def testInvalidRank(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       inputs.set_shape(tensor_shape.TensorShape((5,)))
       with self.assertRaisesRegexp(ValueError, 'incompatible with the layer'):
         _layers.flatten(inputs)
 
   def testUnknownLastDim(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       inputs.set_shape(tensor_shape.TensorShape((5, None)))
       output = _layers.flatten(inputs)
@@ -1452,7 +1475,7 @@ class FlattenTest(test.TestCase):
 
   def testCollectOutputs(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3))
       output = _layers.flatten(images, outputs_collections='outputs')
       c_output = ops.get_collection('outputs')[0]
@@ -1461,7 +1484,7 @@ class FlattenTest(test.TestCase):
 
   def testFlatten4D(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output = _layers.flatten(images)
@@ -1471,7 +1494,7 @@ class FlattenTest(test.TestCase):
 
   def testFlatten3D(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width), seed=1, name='images')
       output = _layers.flatten(images)
@@ -1481,7 +1504,7 @@ class FlattenTest(test.TestCase):
 
   def testFlattenBatchSize(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       inputs = array_ops.placeholder(dtypes.int32, (None, height, width, 3))
@@ -1493,7 +1516,7 @@ class FlattenTest(test.TestCase):
 
   def testUnknownDims(self):
     height = width = depth = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform(
           (5, height, width, depth), seed=1, name='images')
       inputs = array_ops.placeholder(dtypes.int32, (None, None, None, None))
@@ -1528,7 +1551,7 @@ class PartialFlattenTest(test.TestCase):
       flattened_t = _layers._inner_flatten(inputs, new_rank)
       static_shape = flattened_t.get_shape().as_list()
       self.assertEqual(static_shape, expected_new_shape)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         flattened = sess.run(flattened_t)
       np.testing.assert_array_equal(expected_flattened, flattened)
 
@@ -1548,7 +1571,7 @@ class PartialFlattenTest(test.TestCase):
 
       flattened_t = _layers._inner_flatten(inputs_t, new_rank)
 
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         flattened = sess.run(flattened_t)
 
       np.testing.assert_array_equal(expected_indices, flattened.indices)
@@ -1606,7 +1629,7 @@ class FCTest(test.TestCase):
   def testCreateFC(self):
     height, width = 3, 3
     for layer_fn in (_layers.fully_connected, layers_lib.relu):
-      with ops.Graph().as_default() as g, self.test_session(g):
+      with ops.Graph().as_default() as g, self.session(g):
         inputs = np.random.uniform(size=(5, height * width * 3))
         output = layer_fn(inputs, 32)
         self.assertEqual(output.op.name, 'fully_connected/Relu')
@@ -1618,7 +1641,7 @@ class FCTest(test.TestCase):
 
   def testCreateFCWithScope(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
       output = _layers.fully_connected(inputs, 32, scope='fc1')
       self.assertEqual(output.op.name, 'fc1/Relu')
@@ -1636,7 +1659,7 @@ class FCTest(test.TestCase):
   def testCreateFcCreatesWeightsAndBiasesVars(self):
     height, width = 3, 3
     inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       self.assertFalse(variables.get_variables('fc1/weights'))
       self.assertFalse(variables.get_variables('fc1/biases'))
       _layers.fully_connected(inputs, 32, scope='fc1')
@@ -1646,7 +1669,7 @@ class FCTest(test.TestCase):
   def testReuseVars(self):
     height, width = 3, 3
     inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       _layers.fully_connected(inputs, 32, scope='fc1')
       self.assertEqual(len(variables.get_variables('fc1')), 2)
       _layers.fully_connected(inputs, 32, scope='fc1', reuse=True)
@@ -1655,7 +1678,7 @@ class FCTest(test.TestCase):
   def testNonReuseVars(self):
     height, width = 3, 3
     inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       _layers.fully_connected(inputs, 32)
       self.assertEqual(len(variables.get_variables('fully_connected')), 2)
       _layers.fully_connected(inputs, 32)
@@ -1690,14 +1713,14 @@ class FCTest(test.TestCase):
 
   def testCreateFCWithoutActivation(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
       output = _layers.fully_connected(inputs, 32, activation_fn=None)
       self.assertEqual(output.op.name, 'fully_connected/BiasAdd')
 
   def testCreateFCWithWD(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
       weight_decay = regularizers.l2_regularizer(0.01)
       _layers.fully_connected(inputs, 32, weights_regularizer=weight_decay)
@@ -1709,7 +1732,7 @@ class FCTest(test.TestCase):
 
   def testCreateFCWithBD(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
       bias_decay = regularizers.l2_regularizer(0.01)
       _layers.fully_connected(inputs, 32, biases_regularizer=bias_decay)
@@ -1721,7 +1744,7 @@ class FCTest(test.TestCase):
 
   def testCreateNoRegularizers(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
       _layers.fully_connected(inputs, 32)
       self.assertEqual(
@@ -1729,7 +1752,7 @@ class FCTest(test.TestCase):
 
   def testReuseFCWithWD(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       inputs = random_ops.random_uniform((5, height * width * 3), seed=1)
       weight_decay = regularizers.l2_regularizer(0.01)
       _layers.fully_connected(
@@ -1745,7 +1768,7 @@ class FCTest(test.TestCase):
 
   def testFCWithBatchNorm(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height * width * 3), seed=1)
       with arg_scope(
           [_layers.fully_connected],
@@ -1763,7 +1786,7 @@ class FCTest(test.TestCase):
 
   def testReuseFCWithBatchNorm(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height * width * 3), seed=1)
       with arg_scope(
           [_layers.fully_connected],
@@ -1791,27 +1814,27 @@ class BatchNormTest(test.TestCase):
         a, center=False, data_format='NCHW', zero_debias_moving_mean=True)
 
   def testUnknownShape(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       with self.assertRaisesRegexp(ValueError, 'undefined rank'):
         _layers.batch_norm(inputs)
 
   def testInvalidDataFormat(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       with self.assertRaisesRegexp(
           ValueError, 'data_format has to be either NCHW or NHWC.'):
         _layers.batch_norm(inputs, data_format='CHWN')
 
   def testUnknownChannelsDimNHWC(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       inputs.set_shape(tensor_shape.TensorShape((5, 3, 3, None)))
       with self.assertRaisesRegexp(ValueError, 'undefined'):
         _layers.batch_norm(inputs, data_format='NHWC')
 
   def testUnknownChannelsDimNCHW(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       inputs.set_shape(tensor_shape.TensorShape((5, None, 3, 3)))
       with self.assertRaisesRegexp(ValueError, 'undefined'):
@@ -1821,7 +1844,7 @@ class BatchNormTest(test.TestCase):
     if dtype is None:
       dtype = dtypes.float32
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3)).astype(
           dtype.as_numpy_dtype)
       output = _layers.batch_norm(images, fused=fused)
@@ -1843,7 +1866,7 @@ class BatchNormTest(test.TestCase):
 
   def _testCreateOpBetaRegularizer(self, fused=True):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       reg = lambda x: 0.1 * math_ops.reduce_sum(x)
       images = np.random.uniform(size=(5, height, width, 3)).astype('f')
       _layers.batch_norm(images, param_regularizers={'beta': reg}, fused=fused)
@@ -1860,7 +1883,7 @@ class BatchNormTest(test.TestCase):
 
   def _testCreateOpGammaRegularizer(self, fused=True):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       reg = lambda x: 0.1 * math_ops.reduce_sum(x)
       images = np.random.uniform(size=(5, height, width, 3)).astype('f')
       _layers.batch_norm(
@@ -1878,7 +1901,7 @@ class BatchNormTest(test.TestCase):
 
   def testCreateVariables(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.batch_norm(images, scale=True)
       beta = variables.get_variables_by_name('beta')[0]
@@ -1892,7 +1915,7 @@ class BatchNormTest(test.TestCase):
 
   def testMovingAverageVariables(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.batch_norm(images, scale=True)
       self.assertEqual(len(variables.get_model_variables()), 4)
@@ -1903,7 +1926,7 @@ class BatchNormTest(test.TestCase):
 
   def testMovingAverageVariablesZeroDebias(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.batch_norm(
           images, scale=True, zero_debias_moving_mean=True, fused=False)
@@ -1920,7 +1943,7 @@ class BatchNormTest(test.TestCase):
 
   def testUpdatesCollection(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.batch_norm(images, updates_collections='my_update_ops')
       update_layers = ops.get_collection('my_update_ops')
@@ -1948,7 +1971,7 @@ class BatchNormTest(test.TestCase):
 
   def testReuseVariables(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.batch_norm(images, scale=True, scope='bn')
       _layers.batch_norm(images, scale=True, scope='bn', reuse=True)
@@ -1963,7 +1986,7 @@ class BatchNormTest(test.TestCase):
 
   def testReuseUpdateOps(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       with arg_scope([_layers.batch_norm], updates_collections='update_ops'):
         _layers.batch_norm(images, scope='bn')
@@ -1973,7 +1996,7 @@ class BatchNormTest(test.TestCase):
 
   def testCreateMovingVars(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _ = _layers.batch_norm(images)
       moving_mean = variables.get_variables('BatchNorm/moving_mean')
@@ -2006,7 +2029,7 @@ class BatchNormTest(test.TestCase):
     moving_variance = variables.get_variables_by_name('moving_variance')[0]
     biased = variables.get_variables_by_name('biased')[0]
     local_step = variables.get_variables_by_name('local_step')[0]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       self.assertAllClose(local_step.eval(), 0)
       self.assertAllClose(moving_mean.eval(), [0] * channels)
@@ -2190,7 +2213,7 @@ class BatchNormTest(test.TestCase):
 
   def _testEvalMovingVars(self, zero_debias_moving_mean=False):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       image_shape = (10, height, width, 3)
       image_values = np.random.rand(*image_shape)
       expected_mean = np.mean(image_values, axis=(0, 1, 2))
@@ -2241,7 +2264,7 @@ class BatchNormTest(test.TestCase):
     height, width = 3, 3
     batch_size = 10
     channels = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       image_shape = (batch_size, height, width, channels)
       image_values = np.random.rand(*image_shape)
       expected_mean = np.mean(image_values, axis=(0, 1, 2))
@@ -2412,7 +2435,7 @@ class BatchNormTest(test.TestCase):
 
   def testNoUpdatesWhenIsTrainingFalse(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       image_shape = (10, height, width, 3)
       image_values = np.random.rand(*image_shape)
       images = constant_op.constant(
@@ -2437,7 +2460,7 @@ class BatchNormTest(test.TestCase):
 
   def testNoneUpdatesCollectionNoTraining(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       image_shape = (10, height, width, 3)
       image_values = np.random.rand(*image_shape)
       images = constant_op.constant(
@@ -2624,7 +2647,7 @@ class BatchNormTest(test.TestCase):
   def testCustomInitializer(self):
     height, width = 3, 3
     channels = 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = (np.ones((5, height, width, channels)) * 9.0).astype('f')
       beta = init_ops.constant_initializer(
           (np.ones(channels) * 5.0).astype('f'))
@@ -2705,7 +2728,7 @@ class BatchNormTest(test.TestCase):
 
   def testBatchNormBeta(self):
     # Test case for 11673
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
       _layers.batch_norm(
           a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True)
@@ -2716,7 +2739,7 @@ class BatchNormTest(test.TestCase):
 
   def testVariablesAreFloat32(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, dtype=dtypes.float16)
       _layers.batch_norm(images, scale=True)
@@ -2787,13 +2810,13 @@ class BatchNormTest(test.TestCase):
 class LayerNormTest(test.TestCase):
 
   def testUnknownShape(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       with self.assertRaisesRegexp(ValueError, 'undefined rank'):
         _layers.layer_norm(inputs)
 
   def testParamsDimsNotFullyDefined(self):
-    with ops.Graph().as_default() as g, self.test_session(g):
+    with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
       inputs.set_shape(tensor_shape.TensorShape((5, 3, 3, None)))
       with self.assertRaisesRegexp(ValueError, 'is not fully defined'):
@@ -2801,7 +2824,7 @@ class LayerNormTest(test.TestCase):
 
   def testCreateOp(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3))
       output = _layers.layer_norm(images)
       self.assertTrue(output.op.name.startswith('LayerNorm/batchnorm'))
@@ -2809,7 +2832,7 @@ class LayerNormTest(test.TestCase):
 
   def testCreateVariables(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.layer_norm(images)
       beta = variables.get_variables_by_name('beta')[0]
@@ -2819,7 +2842,7 @@ class LayerNormTest(test.TestCase):
 
   def testReuseVariables(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       _layers.layer_norm(images, scope='ln')
       _layers.layer_norm(images, scope='ln', reuse=True)
@@ -2830,7 +2853,7 @@ class LayerNormTest(test.TestCase):
 
   def testReuseVars(self):
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       image_shape = (10, height, width, 3)
       image_values = np.random.rand(*image_shape)
       images = constant_op.constant(
@@ -2853,7 +2876,7 @@ class LayerNormTest(test.TestCase):
       for sigma in [1.0, 0.1]:
         input_values = np.random.randn(*input_shape) * sigma + mu
         with ops.Graph().as_default() as g:
-          with self.test_session(graph=g) as sess:
+          with self.session(graph=g) as sess:
             inputs = constant_op.constant(
                 input_values, shape=input_shape, dtype=dtype)
             output_t = _layers.layer_norm(
@@ -2917,7 +2940,7 @@ class GDNTest(test.TestCase):
   def _runGDN(self, x, shape, inverse, data_format):
     inputs = array_ops.placeholder(dtypes.float32, shape)
     outputs = _layers.gdn(inputs, inverse=inverse, data_format=data_format)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       y, = sess.run([outputs], {inputs: x})
     return y
@@ -3129,14 +3152,14 @@ class MaxPool3DTest(test.TestCase):
 class OneHotEncodingTest(test.TestCase):
 
   def testOneHotEncodingCreate(self):
-    with self.test_session():
+    with self.cached_session():
       labels = np.array([0, 1, 2])
       output = _layers.one_hot_encoding(labels, num_classes=3)
       self.assertEqual(output.op.name, 'OneHotEncoding/one_hot')
       self.assertListEqual(output.get_shape().as_list(), [3, 3])
 
   def testCollectOutputs(self):
-    with self.test_session():
+    with self.cached_session():
       labels = constant_op.constant([0, 1, 2])
       output = _layers.one_hot_encoding(
           labels, num_classes=3, outputs_collections='outputs')
@@ -3145,14 +3168,14 @@ class OneHotEncodingTest(test.TestCase):
       self.assertEqual(c_output, output)
 
   def testOneHotEncoding(self):
-    with self.test_session():
+    with self.cached_session():
       labels = constant_op.constant([0, 1, 2])
       one_hot_labels = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
       output = _layers.one_hot_encoding(labels, num_classes=3)
       self.assertAllClose(output.eval(), one_hot_labels.eval())
 
   def testOneHotEncodingInt32(self):
-    with self.test_session():
+    with self.cached_session():
       labels = constant_op.constant([0, 1, 2], dtype=dtypes.int32)
       one_hot_labels = constant_op.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
       output = _layers.one_hot_encoding(labels, num_classes=3)
@@ -3163,7 +3186,7 @@ class RepeatTests(test.TestCase):
 
   def testRepeat(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3)).astype(np.float32)
       output = _layers.repeat(images, 3, layers_lib.conv2d, 32, [3, 3])
       self.assertEqual(output.op.name, 'Repeat/convolution2d_3/Relu')
@@ -3171,7 +3194,7 @@ class RepeatTests(test.TestCase):
 
   def testRepeatWithScope(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output = _layers.repeat(
@@ -3184,7 +3207,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateConvInt32(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, dtype=dtypes.int32, maxval=12345)
       with self.assertRaisesRegexp(TypeError, 'non-floating point type'):
@@ -3192,7 +3215,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateConvFloat32(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, dtype=dtypes.float32)
       output = layers_lib.separable_conv2d(images, 32, [3, 3], 2)
@@ -3201,7 +3224,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateDepthwiseConv(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(images, None, [3, 3], 2)
       self.assertEqual(output.op.name, 'SeparableConv2d/Relu')
@@ -3210,7 +3233,7 @@ class SeparableConv2dTest(test.TestCase):
   def testCreateConvCreatesWeightsAndBiasesVars(self):
     height, width = 3, 3
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       self.assertFalse(variables.get_variables('conv1/depthwise_weights'))
       self.assertFalse(variables.get_variables('conv1/pointwise_weights'))
       self.assertFalse(variables.get_variables('conv1/biases'))
@@ -3222,7 +3245,7 @@ class SeparableConv2dTest(test.TestCase):
   def testCreateAtrousConvCreatesWeightsAndBiasesVars(self):
     height, width = 3, 3
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       self.assertFalse(variables.get_variables('conv1/depthwise_weights'))
       self.assertFalse(variables.get_variables('conv1/pointwise_weights'))
       self.assertFalse(variables.get_variables('conv1/biases'))
@@ -3234,7 +3257,7 @@ class SeparableConv2dTest(test.TestCase):
   def testCreateDepthwiseConvCreatesWeightsAndBiasesVars(self):
     height, width = 3, 3
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
-    with self.test_session():
+    with self.cached_session():
       self.assertFalse(variables.get_variables('conv1/depthwise_weights'))
       self.assertFalse(variables.get_variables('conv1/pointwise_weights'))
       self.assertFalse(variables.get_variables('conv1/biases'))
@@ -3245,14 +3268,14 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateConvWithScope(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(images, 32, [3, 3], 6, scope='conv1')
       self.assertEqual(output.op.name, 'conv1/Relu')
 
   def testCreateConvWithoutActivation(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(
           images, 32, [3, 3], 8, activation_fn=None)
@@ -3260,7 +3283,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateConvValid(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(
           images, 32, [3, 3], 2, padding='VALID')
@@ -3268,7 +3291,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateAtrousConvValid(self):
     height, width = 5, 5
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(
           images, 32, [3, 3], 2, padding='VALID', rate=2)
@@ -3276,7 +3299,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateDepthwiseConvValid(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(
           images, None, [3, 3], 2, padding='VALID')
@@ -3284,7 +3307,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testCreateAtrousDepthwiseConvValid(self):
     height, width = 5, 5
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       output = layers_lib.separable_conv2d(
           images, None, [3, 3], 2, padding='VALID', rate=2)
@@ -3293,7 +3316,7 @@ class SeparableConv2dTest(test.TestCase):
   def testCreateConvWithWeightDecay(self):
     random_seed.set_random_seed(0)
     height, width = 3, 3
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       regularizer = regularizers.l2_regularizer(0.01)
       layers_lib.separable_conv2d(
@@ -3337,7 +3360,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testReuseConvWithWeightDecay(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform((5, height, width, 3), seed=1)
       regularizer = regularizers.l2_regularizer(0.01)
       layers_lib.separable_conv2d(
@@ -3396,7 +3419,7 @@ class SeparableConv2dTest(test.TestCase):
         normalizer_params={},
         scope='conv1')
     init_op = variables_lib.global_variables_initializer()
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       images = np.random.rand(5, height, width, 3)
       sess.run(init_op)
       sess.run(net, feed_dict={images_placeholder: images})
@@ -3417,7 +3440,7 @@ class SeparableConv2dTest(test.TestCase):
 
   def testSepConvNCHW(self):
     for num_filters, correct_output_filters in zip((None, 5), (6, 5)):
-      with self.test_session():
+      with self.cached_session():
         batch, height, width = 4, 10, 12
         kernel_dim, stride = 3, 2
         images = random_ops.random_uniform((batch, 3, height, width), seed=1)
@@ -3439,7 +3462,7 @@ class ScaleGradientTests(test.TestCase):
   """Simple tests of the scale_gradient function."""
 
   def testBasic(self):
-    with self.test_session():
+    with self.cached_session():
       x = np.array([42], np.float32)
       gradient_scale = np.array([2], np.float32)
 
@@ -3490,7 +3513,7 @@ class SoftmaxTests(test.TestCase):
     exp_prediction = np.array([[self.low, self.high], [0.5, 0.5],
                                [self.high, self.low]])
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       prediction = sess.run(prediction)
       self.assertAllClose(exp_prediction, prediction)
 
@@ -3506,7 +3529,7 @@ class SoftmaxTests(test.TestCase):
     exp_prediction[1, 1, 1] = self.low
 
     prediction = _layers.softmax(logits)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       prediction = sess.run(prediction)
       self.assertAllClose(exp_prediction, prediction)
 
@@ -3524,7 +3547,7 @@ class SoftmaxTests(test.TestCase):
     exp_prediction[1, 1, 1] = self.low
 
     prediction = _layers.softmax(logit_placeholder)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       prediction = sess.run(prediction, feed_dict=feed_dict)
       self.assertAllClose(exp_prediction, prediction)
 
@@ -3552,7 +3575,7 @@ class SpatialSoftmaxTests(test.TestCase):
     features = array_ops.placeholder(dtypes.float32, shape=batch_shape)
     np_features = np.zeros(batch_shape, dtype=np.float32)
     spatial_softmax = _layers.spatial_softmax(features)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       keypoints = sess.run(spatial_softmax, feed_dict)
@@ -3563,7 +3586,7 @@ class SpatialSoftmaxTests(test.TestCase):
     features = array_ops.placeholder(dtypes.float32, shape=batch_shape)
     np_features = np.zeros(batch_shape, dtype=np.float32)
     spatial_softmax = _layers.spatial_softmax(features, data_format='NCHW')
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       keypoints = sess.run(spatial_softmax, feed_dict)
@@ -3590,7 +3613,7 @@ class SpatialSoftmaxTests(test.TestCase):
                                         nchannels)
 
     # Make sure expected location keypoints matches actual location keypoints.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       keypoints = sess.run(spatial_softmax, feed_dict)
@@ -3614,7 +3637,7 @@ class SpatialSoftmaxTests(test.TestCase):
                                         nchannels)
 
     # Make sure expected location keypoints matches actual location keypoints.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       keypoints = sess.run(spatial_softmax, feed_dict)
@@ -3646,7 +3669,7 @@ class SpatialSoftmaxTests(test.TestCase):
                                          batch_size, nchannels)
 
     # Make sure expected location keypoints matches actual location keypoints.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features1}
       tf_keypoints1 = sess.run(spatial_softmax, feed_dict)
@@ -3673,7 +3696,7 @@ class SpatialSoftmaxTests(test.TestCase):
                                         nchannels)
 
     # Make sure expected location keypoints matches actual location keypoints.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       keypoints = sess.run(spatial_softmax, feed_dict)
@@ -3696,7 +3719,7 @@ class SpatialSoftmaxTests(test.TestCase):
                                         nchannels)
 
     # Make sure expected location keypoints matches actual location keypoints.
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       keypoints = sess.run(spatial_softmax, feed_dict)
@@ -3708,7 +3731,7 @@ class SpatialSoftmaxTests(test.TestCase):
     spatial_softmax = _layers.spatial_softmax(features)
     net = _layers.fully_connected(spatial_softmax, 10)
     np_features = np.zeros(batch_shape, dtype=np.float32)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables_lib.global_variables_initializer())
       feed_dict = {features: np_features}
       sess.run(net, feed_dict)
@@ -3718,7 +3741,7 @@ class StackTests(test.TestCase):
 
   def testStackFullyConnected(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = np.random.uniform(size=(5, height * width * 3))
       output = _layers.stack(images, _layers.fully_connected, [10, 20, 30])
       self.assertEqual(output.op.name, 'Stack/fully_connected_3/Relu')
@@ -3726,7 +3749,7 @@ class StackTests(test.TestCase):
 
   def testStackFullyConnectedFailOnReuse(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       with variable_scope.variable_scope('test', reuse=True):
         images = np.random.uniform(size=(5, height * width * 3))
         with self.assertRaises(ValueError):
@@ -3734,7 +3757,7 @@ class StackTests(test.TestCase):
 
   def testStackRelu(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height * width * 3), seed=1, name='images')
       output = _layers.stack(images, layers_lib.relu, [10, 20, 30])
@@ -3743,7 +3766,7 @@ class StackTests(test.TestCase):
 
   def testStackElu(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height * width * 3), seed=1, name='images')
       output = _layers.stack(images, layers_lib.elu, [10, 20, 30])
@@ -3752,7 +3775,7 @@ class StackTests(test.TestCase):
 
   def testStackConvolution2d(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output = _layers.stack(
@@ -3765,7 +3788,7 @@ class StackTests(test.TestCase):
 
   def testStackWithScope(self):
     height, width = 3, 3
-    with self.test_session():
+    with self.cached_session():
       images = random_ops.random_uniform(
           (5, height, width, 3), seed=1, name='images')
       output = _layers.stack(
@@ -3794,7 +3817,7 @@ class UnitNormTests(test.TestCase):
       del shape[dim]
       expected = np.ones(shape)
 
-      with self.test_session():
+      with self.cached_session():
         actual = norms.eval()
         self.assertAllClose(expected, actual, 1e-4, 1e-4)
 
@@ -3826,7 +3849,7 @@ class UnitNormTests(test.TestCase):
       norms = math_ops.sqrt(
           math_ops.reduce_sum(math_ops.square(output), reduction_indices=dim))
 
-      with self.test_session():
+      with self.cached_session():
         actual = norms.eval({image: placeholder_value})
         self.assertAllClose(expected, actual, 1e-4, 1e-4)
 
@@ -3852,7 +3875,7 @@ class PoincareNormalizeTest(test.TestCase):
     x_np = np.random.random_sample(x_shape).astype(np.float32)
     for dim in range(len(x_shape)):
       y_np = self._PoincareNormalize(x_np, dim, epsilon)
-      with self.test_session():
+      with self.cached_session():
         x_tf = constant_op.constant(x_np, name='x')
         y_tf = _layers.poincare_normalize(x_tf, dim, epsilon)
         y_tf_eval = y_tf.eval()
@@ -3870,7 +3893,7 @@ class PoincareNormalizeTest(test.TestCase):
     x_np = np.random.random_sample(x_shape).astype(np.float32)
     dim = [1, 2]
     y_np = self._PoincareNormalize(x_np, dim, epsilon)
-    with self.test_session():
+    with self.cached_session():
       x_tf = constant_op.constant(x_np, name='x')
       y_tf = _layers.poincare_normalize(x_tf, dim, epsilon)
       y_tf_eval = y_tf.eval()
@@ -3885,7 +3908,7 @@ class PoincareNormalizeTest(test.TestCase):
     np.random.seed(1)
     x_np = np.random.random_sample(x_shape).astype(np.float64)
     for dim in range(len(x_shape)):
-      with self.test_session():
+      with self.cached_session():
         x_tf = constant_op.constant(x_np, name='x')
         y_tf = _layers.poincare_normalize(x_tf, dim)
         err = gradient_checker.compute_gradient_error(x_tf, x_shape, y_tf,
@@ -4094,7 +4117,7 @@ class LegacyFullyConnectedTest(test.TestCase):
     # Empty x is common if someone masks their input with tf.boolean_mask in
     # order to drop missing entries, and in a particular batch all entries are
     # missing.
-    with self.test_session():
+    with self.cached_session():
       x = np.array([]).reshape(0, 3)
       self.assertEqual(0, array_ops.size(x).eval())
       y = _layers.legacy_fully_connected(x, 2, activation_fn=nn_ops.softmax)
@@ -4108,7 +4131,7 @@ class LegacyFullyConnectedTest(test.TestCase):
     y = _layers.legacy_fully_connected(x, 1)
     # in the output we still only know the 2nd and 3rd dimensions statically.
     self.assertEqual(y.get_shape().as_list(), [None, 4, 1])
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       variables_lib.global_variables_initializer().run()
       # we can feed in input with first dimension 2
       shape_value = sess.run(
@@ -4139,7 +4162,7 @@ class LegacyFullyConnectedTest(test.TestCase):
       self._unknown_dim_invalid_input(last_dim=None)
 
   def test_1d_invalid_input(self):
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaisesRegexp(ValueError,
                                    'rank of x must be at least 2 not: 1'):
         x = constant_op.constant([[]], shape=[0])
diff --git a/tensorflow/contrib/layers/python/layers/normalization.py b/tensorflow/contrib/layers/python/layers/normalization.py
index c807ab0f2e..11033a2e9c 100644
--- a/tensorflow/contrib/layers/python/layers/normalization.py
+++ b/tensorflow/contrib/layers/python/layers/normalization.py
@@ -176,7 +176,8 @@ def group_norm(inputs,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
-               scope=None):
+               scope=None,
+               mean_close_to_zero=False):
   """Functional interface for the group normalization layer.
 
   Reference: https://arxiv.org/abs/1803.08494.
@@ -222,6 +223,19 @@ def group_norm(inputs,
     trainable: If `True` also add variables to the graph collection
       `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
     scope: Optional scope for `variable_scope`.
+    mean_close_to_zero: The mean of `input` before ReLU will be close to zero
+      when batch size >= 4k for Resnet-50 on TPU. If `True`, use
+      `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the
+      variance. This is the same behavior as `fused` equals `True` in batch
+      normalization. If `False`, use `nn.moments` to calculate the variance.
+      When `mean` is close to zero, like 1e-4, use `mean` to calculate the
+      variance may have poor result due to repeated roundoff error and
+      denormalization in `mean`.  When `mean` is large, like 1e2,
+      sum(`input`^2) is so large that only the high-order digits of the elements
+      are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate
+      the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2)
+      when `mean` is large.
+
 
   Returns:
     A `Tensor` representing the output of the operation.
@@ -333,7 +347,14 @@ def group_norm(inputs,
       gamma = array_ops.reshape(gamma, params_shape_broadcast)
 
     # Calculate the moments.
-    mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)
+    if mean_close_to_zero:
+      # One pass algorithm returns better result when mean is close to zero.
+      counts, means_ss, variance_ss, _ = nn.sufficient_statistics(
+          inputs, moments_axes, keep_dims=True)
+      mean, variance = nn.normalize_moments(
+          counts, means_ss, variance_ss, shift=None)
+    else:
+      mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)
 
     # Compute normalization.
     # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor
diff --git a/tensorflow/contrib/layers/python/layers/normalization_test.py b/tensorflow/contrib/layers/python/layers/normalization_test.py
index b6e96350db..c8d3c91b10 100644
--- a/tensorflow/contrib/layers/python/layers/normalization_test.py
+++ b/tensorflow/contrib/layers/python/layers/normalization_test.py
@@ -106,7 +106,7 @@ class InstanceNormTest(test.TestCase):
     images = random_ops.random_uniform(image_shape, seed=1)
     output_train = normalization.instance_norm(images, scope='IN')
     output_eval = normalization.instance_norm(images, scope='IN', reuse=True)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       # output_train and output_eval should be the same.
       train_np, eval_np = sess.run([output_train, output_eval])
@@ -130,7 +130,7 @@ class InstanceNormTest(test.TestCase):
         inputs = random_ops.random_uniform(input_shape, seed=0) * sigma + mu
         output_op = normalization.instance_norm(
             inputs, center=False, scale=False, data_format=data_format)
-        with self.test_session() as sess:
+        with self.cached_session() as sess:
           sess.run(variables.global_variables_initializer())
           outputs = sess.run(output_op)
           # Make sure that there are no NaNs
@@ -287,14 +287,19 @@ class GroupNormTest(test.TestCase):
     output_train = normalization.group_norm(images, groups=2, scope='IN')
     output_eval = normalization.group_norm(images, groups=2, scope='IN',
                                            reuse=True)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       # output_train and output_eval should be the same.
       train_np, eval_np = sess.run([output_train, output_eval])
       self.assertAllClose(train_np, eval_np)
 
-  def doOutputTest(self, input_shape, channels_axis=None, reduction_axes=None,
-                   groups=2, tol=1e-2):
+  def doOutputTest(self,
+                   input_shape,
+                   channels_axis=None,
+                   reduction_axes=None,
+                   mean_close_to_zero=False,
+                   groups=2,
+                   tol=1e-2):
     # Select the axis for the channel and the dimensions along which statistics
     # are accumulated.
     if channels_axis < 0:
@@ -322,18 +327,29 @@ class GroupNormTest(test.TestCase):
       if i not in reduced_axes:
         reduced_shape.append(a)
 
-    for mu in (0.0, 1e2):
-      for sigma in (1.0, 0.1):
+    if mean_close_to_zero:
+      mu_tuple = (1e-4, 1e-2, 1.0)
+      sigma_tuple = (1e-2, 0.1, 1.0)
+    else:
+      mu_tuple = (1.0, 1e2)
+      sigma_tuple = (1.0, 0.1)
+
+    for mu in mu_tuple:
+      for sigma in sigma_tuple:
         # Determine shape of Tensor after normalization.
         expected_mean = np.zeros(reduced_shape)
         expected_var = np.ones(reduced_shape)
 
-        inputs = random_ops.random_uniform(input_shape, seed=0) * sigma + mu
+        inputs = random_ops.random_normal(input_shape, seed=0) * sigma + mu
         output_op = normalization.group_norm(
-            inputs, groups=groups, center=False, scale=False,
+            inputs,
+            groups=groups,
+            center=False,
+            scale=False,
             channels_axis=channels_axis,
-            reduction_axes=reduction_axes)
-        with self.test_session() as sess:
+            reduction_axes=reduction_axes,
+            mean_close_to_zero=mean_close_to_zero)
+        with self.cached_session() as sess:
           sess.run(variables.global_variables_initializer())
           outputs = sess.run(output_op)
           # Make sure that there are no NaNs
@@ -347,12 +363,32 @@ class GroupNormTest(test.TestCase):
           self.assertAllClose(expected_mean, mean, rtol=tol, atol=tol)
           self.assertAllClose(expected_var, var, rtol=tol, atol=tol)
 
+  def doOutputTestForMeanCloseToZero(self,
+                                     input_shape,
+                                     channels_axis=None,
+                                     reduction_axes=None,
+                                     groups=2,
+                                     tol=5e-2):
+    self.doOutputTest(
+        input_shape,
+        channels_axis=channels_axis,
+        reduction_axes=reduction_axes,
+        groups=groups,
+        tol=tol,
+        mean_close_to_zero=True)
+
   def testOutputSmallInput4D_NHWC(self):
     input_shape = [10, 10, 10, 30]
     # Specify axes with positive values.
     self.doOutputTest(input_shape, channels_axis=3, reduction_axes=[1, 2])
     # Specify axes with negative values.
     self.doOutputTest(input_shape, channels_axis=-1, reduction_axes=[-3, -2])
+    # Specify axes with positive values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=3, reduction_axes=[1, 2])
+    # Specify axes with negative values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=-1, reduction_axes=[-3, -2])
 
   def testOutputSmallInput3D_NHWC(self):
     input_shape = [10, 10, 30]
@@ -360,6 +396,12 @@ class GroupNormTest(test.TestCase):
     self.doOutputTest(input_shape, channels_axis=2, reduction_axes=[0, 1])
     # Specify axes with negative values.
     self.doOutputTest(input_shape, channels_axis=-1, reduction_axes=[-3, -2])
+    # Specify axes with positive values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=2, reduction_axes=[0, 1])
+    # Specify axes with negative values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=-1, reduction_axes=[-3, -2])
 
   def testOutputSmallInput4D_NCHW(self):
     input_shape = [10, 10, 10, 30]
@@ -367,6 +409,12 @@ class GroupNormTest(test.TestCase):
     self.doOutputTest(input_shape, channels_axis=1, reduction_axes=[2, 3])
     # Specify axes with negative values.
     self.doOutputTest(input_shape, channels_axis=-3, reduction_axes=[-2, -1])
+    # Specify axes with positive values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=1, reduction_axes=[2, 3])
+    # Specify axes with negative values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=-3, reduction_axes=[-2, -1])
 
   def testOutputSmallInput3D_NCHW(self):
     input_shape = [10, 10, 30]
@@ -374,23 +422,43 @@ class GroupNormTest(test.TestCase):
     self.doOutputTest(input_shape, channels_axis=0, reduction_axes=[1, 2])
     # Specify axes with negative values.
     self.doOutputTest(input_shape, channels_axis=-3, reduction_axes=[-2, -1])
+    # Specify axes with positive values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=0, reduction_axes=[1, 2])
+    # Specify axes with negative values.
+    self.doOutputTestForMeanCloseToZero(
+        input_shape, channels_axis=-3, reduction_axes=[-2, -1])
 
   def testOutputBigInput4D_NHWC(self):
-    self.doOutputTest([5, 100, 100, 1], channels_axis=3, reduction_axes=[1, 2],
-                      groups=1)
+    self.doOutputTest(
+        [5, 100, 100, 1], channels_axis=3, reduction_axes=[1, 2], groups=1)
+    self.doOutputTestForMeanCloseToZero(
+        [5, 100, 100, 1], channels_axis=3, reduction_axes=[1, 2], groups=1)
 
   def testOutputBigInput4D_NCHW(self):
-    self.doOutputTest([1, 100, 100, 4], channels_axis=1, reduction_axes=[2, 3],
-                      groups=4)
+    self.doOutputTest(
+        [1, 100, 100, 4], channels_axis=1, reduction_axes=[2, 3], groups=4)
+    self.doOutputTestForMeanCloseToZero(
+        [1, 100, 100, 4], channels_axis=1, reduction_axes=[2, 3], groups=4)
 
   def testOutputSmallInput2D_NC(self):
-    self.doOutputTest([10, 7*100], channels_axis=1, reduction_axes=[], groups=7)
+    self.doOutputTest(
+        [10, 7 * 100], channels_axis=1, reduction_axes=[], groups=7)
+    self.doOutputTestForMeanCloseToZero(
+        [10, 7 * 100], channels_axis=1, reduction_axes=[], groups=7)
 
   def testOutputSmallInput5D_NCXXX(self):
-    self.doOutputTest([10, 10, 20, 40, 5],
-                      channels_axis=1,
-                      reduction_axes=[2, 3, 4],
-                      groups=5)
+    self.doOutputTest(
+        [10, 10, 20, 40, 5],
+        channels_axis=1,
+        reduction_axes=[2, 3, 4],
+        groups=5)
+    self.doOutputTestForMeanCloseToZero(
+        [10, 10, 20, 40, 5],
+        channels_axis=1,
+        reduction_axes=[2, 3, 4],
+        groups=5)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py
index a4461a20e5..29dede2a49 100644
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
@@ -66,7 +66,7 @@ class OptimizersTest(test.TestCase):
     ]
     for optimizer in optimizers:
       with ops.Graph().as_default() as g:
-        with self.test_session(graph=g) as session:
+        with self.session(graph=g) as session:
           x, var, loss, global_step = _setup_model()
           train = optimizers_lib.optimize_loss(
               loss, global_step, learning_rate=0.1, optimizer=optimizer)
@@ -82,7 +82,7 @@ class OptimizersTest(test.TestCase):
       return gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
 
     with ops.Graph().as_default() as g:
-      with self.test_session(graph=g) as session:
+      with self.session(graph=g) as session:
         x, var, loss, global_step = _setup_model()
         train = optimizers_lib.optimize_loss(
             loss, global_step, learning_rate=None, optimizer=optimizer_fn)
@@ -96,14 +96,14 @@ class OptimizersTest(test.TestCase):
     optimizers = ["blah", variables.Variable, object(), lambda x: None]
     for optimizer in optimizers:
       with ops.Graph().as_default() as g:
-        with self.test_session(graph=g):
+        with self.session(graph=g):
           _, _, loss, global_step = _setup_model()
           with self.assertRaises(ValueError):
             optimizers_lib.optimize_loss(
                 loss, global_step, learning_rate=0.1, optimizer=optimizer)
 
   def testBadSummaries(self):
-    with ops.Graph().as_default() as g, self.test_session(graph=g):
+    with ops.Graph().as_default() as g, self.session(graph=g):
       _, _, loss, global_step = _setup_model()
       with self.assertRaises(ValueError):
         optimizers_lib.optimize_loss(
@@ -111,7 +111,7 @@ class OptimizersTest(test.TestCase):
             summaries=["loss", "bad_summary"])
 
   def testInvalidLoss(self):
-    with ops.Graph().as_default() as g, self.test_session(graph=g):
+    with ops.Graph().as_default() as g, self.session(graph=g):
       _, _, _, global_step = _setup_model()
       with self.assertRaises(ValueError):
         optimizers_lib.optimize_loss(
@@ -121,7 +121,7 @@ class OptimizersTest(test.TestCase):
             [[1.0]], global_step, learning_rate=0.1, optimizer="SGD")
 
   def testInvalidGlobalStep(self):
-    with ops.Graph().as_default() as g, self.test_session(graph=g):
+    with ops.Graph().as_default() as g, self.session(graph=g):
       x = array_ops.placeholder(dtypes.float32, [])
       var = variable_scope.get_variable(
           "test", [], initializer=init_ops.constant_initializer(10))
@@ -157,7 +157,7 @@ class OptimizersTest(test.TestCase):
             optimizer="SGD")
 
   def testInvalidLearningRate(self):
-    with ops.Graph().as_default() as g, self.test_session(graph=g):
+    with ops.Graph().as_default() as g, self.session(graph=g):
       _, _, loss, global_step = _setup_model()
       with self.assertRaises(ValueError):
         optimizers_lib.optimize_loss(
@@ -165,7 +165,7 @@ class OptimizersTest(test.TestCase):
 
   def testGradientNoise(self):
     random_seed.set_random_seed(42)
-    with self.test_session() as session:
+    with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       train = optimizers_lib.optimize_loss(
           loss,
@@ -182,7 +182,7 @@ class OptimizersTest(test.TestCase):
 
   def testGradientNoiseWithClipping(self):
     random_seed.set_random_seed(42)
-    with self.test_session() as session:
+    with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       train = optimizers_lib.optimize_loss(
           loss,
@@ -198,7 +198,7 @@ class OptimizersTest(test.TestCase):
       self.assertEqual(global_step_value, 1)
 
   def testGradientClip(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       train = optimizers_lib.optimize_loss(
           loss,
@@ -213,7 +213,7 @@ class OptimizersTest(test.TestCase):
       self.assertEqual(global_step_value, 1)
 
   def testAdaptiveGradientClip(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       clip_gradients = optimizers_lib.adaptive_clipping_fn()
       train = optimizers_lib.optimize_loss(
@@ -234,7 +234,7 @@ class OptimizersTest(test.TestCase):
       self.assertEqual(2, var_count)
 
   def testGradientMultiply(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       train = optimizers_lib.optimize_loss(
           loss,
@@ -270,7 +270,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      with ops.Graph().as_default() as g, self.session(graph=g) as session:
         x = array_ops.placeholder(dtypes.float32, [])
         var = variable_scope.get_variable(
             "test", [], initializer=init_ops.constant_initializer(10))
@@ -295,7 +295,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g):
+      with ops.Graph().as_default() as g, self.session(graph=g):
         x = array_ops.placeholder(dtypes.float32, [])
         var = variable_scope.get_variable(
             "test", [], initializer=init_ops.constant_initializer(10))
@@ -319,7 +319,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      with ops.Graph().as_default() as g, self.session(graph=g) as session:
         x, var, loss, global_step = _setup_model()
         update_var = variable_scope.get_variable(
             "update", [], initializer=init_ops.constant_initializer(10))
@@ -342,7 +342,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      with ops.Graph().as_default() as g, self.session(graph=g) as session:
         x, var, loss, global_step = _setup_model()
         update_var = variable_scope.get_variable(
             "update", [], initializer=init_ops.constant_initializer(10))
@@ -365,7 +365,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      with ops.Graph().as_default() as g, self.session(graph=g) as session:
         x, var, loss, global_step = _setup_model()
         update_var = variable_scope.get_variable(
             "update", [], initializer=init_ops.constant_initializer(10))
@@ -389,7 +389,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      with ops.Graph().as_default() as g, self.session(graph=g) as session:
         x, var, loss, global_step = _setup_model()
         update_var = variable_scope.get_variable(
             "update", [], initializer=init_ops.constant_initializer(10))
@@ -413,7 +413,7 @@ class OptimizersTest(test.TestCase):
         gradient_descent.GradientDescentOptimizer(learning_rate=0.1)
     ]
     for optimizer in optimizers:
-      with ops.Graph().as_default() as g, self.test_session(graph=g) as session:
+      with ops.Graph().as_default() as g, self.session(graph=g) as session:
         x, var, loss, global_step = _setup_model()
         update_var = variable_scope.get_variable(
             "update", [], initializer=init_ops.constant_initializer(10))
@@ -433,7 +433,7 @@ class OptimizersTest(test.TestCase):
 class AdaptiveClipping(test.TestCase):
 
   def testAverages(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       scale = 2.
       grad = array_ops.ones([3, 4]) * scale
       log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements()))
@@ -463,7 +463,7 @@ class AdaptiveClipping(test.TestCase):
       self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4)
 
   def testClip(self):
-    with self.test_session() as session:
+    with self.cached_session() as session:
       spike = 1000.
       multiplier = array_ops.placeholder(dtypes.float32, [], "multiplier")
       step = array_ops.placeholder(dtypes.int32, [], "step")
diff --git a/tensorflow/contrib/layers/python/layers/regularizers_test.py b/tensorflow/contrib/layers/python/layers/regularizers_test.py
index 07191eeda7..51faba30c7 100644
--- a/tensorflow/contrib/layers/python/layers/regularizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/regularizers_test.py
@@ -71,7 +71,7 @@ class RegularizerTest(test.TestCase):
     with self.assertRaises(ValueError):
       regularizers.l1_l2_regularizer(0.5, 0)
 
-    with self.test_session():
+    with self.cached_session():
       shape = [5, 5, 5]
       num_elem = 5 * 5 * 5
       tensor = constant_op.constant(1.0, shape=shape)
@@ -84,7 +84,7 @@ class RegularizerTest(test.TestCase):
     num_elem = 5 * 5 * 5
     tensor = constant_op.constant(1.0, shape=shape)
     loss = regularizers.l1_l2_regularizer(0.0, 1.0)(tensor)
-    with self.test_session():
+    with self.cached_session():
       self.assertEquals(loss.op.name, 'l1_l2_regularizer')
       self.assertAlmostEqual(loss.eval(), num_elem / 2, 5)
 
@@ -93,7 +93,7 @@ class RegularizerTest(test.TestCase):
     num_elem = 5 * 5 * 5
     tensor = constant_op.constant(1.0, shape=shape)
     loss = regularizers.l1_l2_regularizer(1.0, 0.0)(tensor)
-    with self.test_session():
+    with self.cached_session():
       self.assertEquals(loss.op.name, 'l1_l2_regularizer')
       self.assertAlmostEqual(loss.eval(), num_elem, 5)
 
@@ -104,7 +104,7 @@ class RegularizerTest(test.TestCase):
     self.assertEquals(loss, None)
 
   def testL1L2RegularizerWithScope(self):
-    with self.test_session():
+    with self.cached_session():
       shape = [5, 5, 5]
       num_elem = 5 * 5 * 5
       tensor = constant_op.constant(1.0, shape=shape)
@@ -142,7 +142,7 @@ class RegularizerTest(test.TestCase):
     array_weights_list = [[1.5], [2, 3, 4.2], [10, 42, 666.6]]
     tensor_weights_list = [constant_op.constant(x) for x in array_weights_list]
     expected = sum([2 * x for l in array_weights_list for x in l])
-    with self.test_session():
+    with self.cached_session():
       result = regularizers.apply_regularization(dummy_regularizer,
                                                  tensor_weights_list)
       self.assertAllClose(expected, result.eval())
@@ -151,7 +151,7 @@ class RegularizerTest(test.TestCase):
     regularizer = regularizers.l2_regularizer(0.0)
     array_weights_list = [[1.5], [2, 3, 4.2], [10, 42, 666.6]]
     tensor_weights_list = [constant_op.constant(x) for x in array_weights_list]
-    with self.test_session():
+    with self.cached_session():
       result = regularizers.apply_regularization(regularizer,
                                                  tensor_weights_list)
       self.assertAllClose(0.0, result.eval())
@@ -161,7 +161,7 @@ class RegularizerTest(test.TestCase):
     tensor_weights_list = [
         constant_op.constant(x) for x in [[1.5], [2, 3, 4.2], [10, 42, 666.6]]
     ]
-    with self.test_session():
+    with self.cached_session():
       with self.assertRaises(ValueError):
         regularizers.apply_regularization(non_scalar_regularizer,
                                           tensor_weights_list)
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
index 02d294c68f..06da32072f 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py
@@ -30,26 +30,37 @@ import functools
 import re
 
 import numpy as np
+import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.contrib.framework.python import ops as contrib_framework_ops
+from tensorflow.python.eager import backprop
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import function
 from tensorflow.python.framework import ops as framework_ops
 from tensorflow.python.layers import base
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
+from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_inspect
 
 __all__ = ["rev_block", "RevBlock", "recompute_grad"]
 
 LAYER_RE = re.compile(".*revlayer_([0-9]*)/([fg])/.*")
 _USE_DEFAULT = "__rev_block_lib_default"
+_WRONG_VARS_ERR = """\
+The variables used on recompute were different than the variables originally
+used. The function wrapped with @recompute_grad likley creates its own variable
+scope with a default name and has been called twice in the same enclosing scope.
+To fix, ensure each call to the function happens in its own unique variable
+scope.
+"""
 
 
 def _acc_grads(*lists_of_grads):
@@ -142,11 +153,21 @@ def _rev_block_forward(x1,
   return y1, y2
 
 
+def _safe_wraps(fn):
+  if isinstance(fn, functools.partial):
+    # functools.partial objects cannot be wrapped as they are missing the
+    # necessary properties (__name__, __module__, __doc__).
+    def passthrough(f):
+      return f
+    return passthrough
+  return functools.wraps(fn)
+
+
 def _scope_wrap(fn, scope):
 
-  @functools.wraps(fn)
+  @_safe_wraps(fn)
   def wrap(*args, **kwargs):
-    with variable_scope.variable_scope(scope):
+    with variable_scope.variable_scope(scope, use_resource=True):
       return fn(*args, **kwargs)
 
   return wrap
@@ -221,95 +242,95 @@ class RevBlock(base.Layer):
                  "build.")
     self.built = True
 
-  def _efficient_grad_fn(self, inputs, variables, ys, grad_ys):
-    """Custom gradient fn for a block of reversible residual layers."""
-    # Inputs have passed through an Identity. Recover the original Tensors to
-    # be able to match up side inputs.
-    assert [u"Identity"] == list(set([x.op.type for x in inputs]))
-    inputs = [x.op.inputs[0] for x in inputs]
-    side_inputs = inputs[2:]
-    del inputs
-
-    f_side_idxs = [None] * len(self.f_side_input)
-    g_side_idxs = [None] * len(self.g_side_input)
-    assert len(side_inputs) == len(self.f_side_input) + len(self.g_side_input)
-
-    for i, t in enumerate(side_inputs):
-      if t in self.f_side_input:
-        f_side_idxs[self.f_side_input.index(t)] = i
-      elif t in self.g_side_input:
-        g_side_idxs[self.g_side_input.index(t)] = i
-      else:
-        assert False
-
-    f_vars = [[] for _ in range(self.num_layers)]
-    g_vars = [[] for _ in range(self.num_layers)]
-    f_vars_idxs = [[] for _ in range(self.num_layers)]
-    g_vars_idxs = [[] for _ in range(self.num_layers)]
-
-    for i, ref in enumerate(variables):
-      # Use the name to identify the layer number and function (f or g)
-      regex = LAYER_RE.match(ref.name)
-      layer_no = int(regex.group(1))
-      fn_name = regex.group(2)
-      if fn_name == "f":
-        f_vars[layer_no].append(ref)
-        f_vars_idxs[layer_no].append(i)
-      else:
-        assert fn_name == "g"
-        g_vars[layer_no].append(ref)
-        g_vars_idxs[layer_no].append(i)
-
-    f_var_grads = []
-    g_var_grads = []
-    f_side_grads = []
-    g_side_grads = []
-
-    # Reverse variable containers to go backward
-    f_vars.reverse()
-    g_vars.reverse()
-    f = list(self.f)
-    g = list(self.g)
-    f.reverse()
-    g.reverse()
-
-    with variable_scope.variable_scope(self.scope_name, reuse=True):
-      for i in xrange(self.num_layers):
-        ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
-            ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
-            self.g_side_input)
-
-        grad_f_vars, grad_f_side = f_ret
-        grad_g_vars, grad_g_side = g_ret
-        f_var_grads.append(grad_f_vars)
-        g_var_grads.append(grad_g_vars)
-        f_side_grads.append(grad_f_side)
-        g_side_grads.append(grad_g_side)
-
-    # Accumulate layer gradients for f_side_input and g_side_input
-    acc_f_side_grads = _acc_grads(*f_side_grads)
-    acc_g_side_grads = _acc_grads(*g_side_grads)
-
-    # Use the stored idxs to put gradients in the passed-in order.
-    side_input_grads = [None] * len(side_inputs)
-    variable_grads = [None] * len(variables)
-
-    # Variable gradients were collected in reverse layer order. Reverse to match
-    # idxs.
-    f_var_grads.reverse()
-    g_var_grads.reverse()
-    for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list(
-        zip(g_vars_idxs, g_var_grads)):
-      for i, grad in zip(idxs, grads):
-        variable_grads[i] = grad
-
-    for i, grad in zip(f_side_idxs, acc_f_side_grads):
-      side_input_grads[i] = grad
-    for i, grad in zip(g_side_idxs, acc_g_side_grads):
-      side_input_grads[i] = grad
-
-    grad_x1, grad_x2 = grad_ys
-    return [grad_x1, grad_x2] + side_input_grads, variable_grads
+  def _make_efficient_grad_fn(self, inputs_, ys_):
+    def _efficient_grad_fn(*grad_ys, **kwargs):
+      """Custom gradient fn for a block of reversible residual layers."""
+      inputs = inputs_
+      ys = ys_
+      variables = kwargs["variables"]
+      side_inputs = inputs[2:]
+
+      f_side_idxs = [None] * len(self.f_side_input)
+      g_side_idxs = [None] * len(self.g_side_input)
+      assert len(side_inputs) == len(self.f_side_input) + len(self.g_side_input)
+
+      for i, t in enumerate(side_inputs):
+        if t in self.f_side_input:
+          f_side_idxs[self.f_side_input.index(t)] = i
+        elif t in self.g_side_input:
+          g_side_idxs[self.g_side_input.index(t)] = i
+        else:
+          assert False
+
+      f_vars = [[] for _ in range(self.num_layers)]
+      g_vars = [[] for _ in range(self.num_layers)]
+      f_vars_idxs = [[] for _ in range(self.num_layers)]
+      g_vars_idxs = [[] for _ in range(self.num_layers)]
+
+      for i, ref in enumerate(variables):
+        # Use the name to identify the layer number and function (f or g)
+        regex = LAYER_RE.match(ref.name)
+        layer_no = int(regex.group(1))
+        fn_name = regex.group(2)
+        if fn_name == "f":
+          f_vars[layer_no].append(ref)
+          f_vars_idxs[layer_no].append(i)
+        else:
+          assert fn_name == "g"
+          g_vars[layer_no].append(ref)
+          g_vars_idxs[layer_no].append(i)
+
+      f_var_grads = []
+      g_var_grads = []
+      f_side_grads = []
+      g_side_grads = []
+
+      # Reverse variable containers to go backward
+      f_vars.reverse()
+      g_vars.reverse()
+      f = list(self.f)
+      g = list(self.g)
+      f.reverse()
+      g.reverse()
+
+      with variable_scope.variable_scope(self.scope_name, reuse=True):
+        for i in xrange(self.num_layers):
+          ys, grad_ys, f_ret, g_ret = _rev_layer_backward(
+              ys, grad_ys, f[i], g[i], f_vars[i], self.f_side_input, g_vars[i],
+              self.g_side_input)
+
+          grad_f_vars, grad_f_side = f_ret
+          grad_g_vars, grad_g_side = g_ret
+          f_var_grads.append(grad_f_vars)
+          g_var_grads.append(grad_g_vars)
+          f_side_grads.append(grad_f_side)
+          g_side_grads.append(grad_g_side)
+
+      # Accumulate layer gradients for f_side_input and g_side_input
+      acc_f_side_grads = _acc_grads(*f_side_grads)
+      acc_g_side_grads = _acc_grads(*g_side_grads)
+
+      # Use the stored idxs to put gradients in the passed-in order.
+      side_input_grads = [None] * len(side_inputs)
+      variable_grads = [None] * len(variables)
+
+      # Variable gradients were collected in reverse layer order. Reverse to
+      # match idxs.
+      f_var_grads.reverse()
+      g_var_grads.reverse()
+      for idxs, grads in list(zip(f_vars_idxs, f_var_grads)) + list(
+          zip(g_vars_idxs, g_var_grads)):
+        for i, grad in zip(idxs, grads):
+          variable_grads[i] = grad
+
+      for i, grad in zip(f_side_idxs, acc_f_side_grads):
+        side_input_grads[i] = grad
+      for i, grad in zip(g_side_idxs, acc_g_side_grads):
+        side_input_grads[i] = grad
+
+      grad_x1, grad_x2 = grad_ys
+      return [grad_x1, grad_x2] + side_input_grads, variable_grads
+    return _efficient_grad_fn
 
   def _forward(self, x1, x2):
     """Run forward through the reversible layers."""
@@ -317,10 +338,6 @@ class RevBlock(base.Layer):
     side_inputs = [self.f_side_input, self.g_side_input]
     flat_side_inputs = nest.flatten(side_inputs)
 
-    custom_grad_fn = (
-        self._efficient_grad_fn if self._use_efficient_backprop else None)
-
-    @_fn_with_custom_grad(custom_grad_fn)
     def _forward_wrap(x1_, x2_, *flat_side_inputs):
       f_side, g_side = nest.pack_sequence_as(side_inputs, flat_side_inputs)
       return _rev_block_forward(
@@ -333,7 +350,16 @@ class RevBlock(base.Layer):
           g_side_input=g_side,
           gate_outputs=self._use_efficient_backprop)
 
-    return _forward_wrap(x1, x2, *flat_side_inputs)
+    @custom_gradient.custom_gradient
+    def _forward_with_custom_grad(*args):
+      out = _forward_wrap(*args)  # pylint: disable=no-value-for-parameter
+      grad_fn = self._make_efficient_grad_fn(args, out)
+      return out, grad_fn
+
+    if self._use_efficient_backprop:
+      return _forward_with_custom_grad(x1, x2, *flat_side_inputs)
+    else:
+      return _forward_wrap(x1, x2, *flat_side_inputs)
 
   def _backward(self, y1, y2):
     """Run backward through the reversible layers."""
@@ -416,7 +442,7 @@ def rev_block(x1,
 def enable_with_args(dec):
   """A decorator for decorators to enable their usage with or without args."""
 
-  @functools.wraps(dec)
+  @_safe_wraps(dec)
   def new_dec(*args, **kwargs):
     if len(args) == 1 and not kwargs and callable(args[0]):
       # Used as decorator without args
@@ -432,9 +458,23 @@ def enable_with_args(dec):
 def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """Decorator that recomputes the function on the backwards pass.
 
+  To use this function, you must use `ResourceVariable`s (i.e.
+  `variable_scope(name, use_resource=True), which are the default in Eager mode
+  and when running on TPU.
+
+  Warning: Because the function will be called again on the backwards pass, the
+  user should be careful to not use ops in their function that mutate state or
+  have randomness (for example, batch normalization or dropout). If the function
+  does have such operations, it is recommended that the function take the
+  `is_recomputing` keyword argument which will be `False` on the forward pass
+  and `True` on the backwards pass so that it can disable state changes when
+  `is_recomputing=True` (for example, not updating the moving averages in batch
+  normalization).
+
   Args:
     fn: a function that takes Tensors (all as positional arguments) and returns
-      a tuple of Tensors.
+      a tuple of Tensors. Note that `fn` should not close over any other
+      Tensors or Variables.
     use_data_dep: `bool`, if `True` will use a dummy data dependency to force
       the recompute to happen. If `False` will use a control dependency. By
       default will be `True` if in an XLA context and `False` otherwise. XLA
@@ -448,9 +488,24 @@ def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
     A wrapped fn that is identical to fn when called, but its activations will
     be discarded and recomputed on the backwards pass (i.e. on a call to
     tf.gradients).
-  """
 
-  @functools.wraps(fn)
+  Raises:
+    ValueError: if `fn` closes over any Tensors or Variables.
+  """
+  # Check for closed-over Tensors/Variables
+  if fn.__code__.co_freevars:
+    closed_over_vars = dict(zip(fn.__code__.co_freevars,
+                                [c.cell_contents for c in fn.__closure__]))
+    for var_name, value in six.iteritems(closed_over_vars):
+      if isinstance(value, (framework_ops.Tensor, variables_lib.Variable)):
+        raise ValueError(
+            "fn decorated with @recompute_grad closes over Tensor %s "
+            "(local variable name: %s). The decorated fn must not close over "
+            "Tensors or Variables because gradients will NOT be computed for "
+            "them through fn. To ensure correct gradients, make the "
+            "Tensor an input to fn." % (value.name, var_name))
+
+  @_safe_wraps(fn)
   def wrapped(*args):
     return _recompute_grad(
         fn, args, use_data_dep=use_data_dep, tupleize_grads=tupleize_grads)
@@ -463,8 +518,65 @@ def _is_on_tpu():
   return control_flow_util.GetContainingXLAContext(ctxt) is not None
 
 
+def _recomputing_grad_fn(compute_fn,
+                         original_args,
+                         original_vars,
+                         output_grads,
+                         grad_fn_variables,
+                         use_data_dep,
+                         tupleize_grads,
+                         arg_scope,
+                         var_scope,
+                         has_is_recompute_kwarg):
+  """Grad fn for recompute_grad."""
+  variables = grad_fn_variables or []
+
+  # Identity ops around the inputs ensures correct gradient graph-walking.
+  inputs = [array_ops.identity(x) for x in list(original_args)]
+
+  # Recompute outputs
+  # Use a control dependency to ensure that the recompute is not eliminated by
+  # CSE and that it happens on the backwards pass.
+  ctrl_dep_grads = [g for g in output_grads if g is not None]
+  with framework_ops.control_dependencies(ctrl_dep_grads):
+    if use_data_dep:
+      inputs = _force_data_dependency(output_grads, inputs)
+    # Re-enter scopes
+    with contrib_framework_ops.arg_scope(arg_scope):
+      with variable_scope.variable_scope(var_scope, reuse=True):
+        # Re-call the function and ensure that the touched variables are the
+        # same as in the first call.
+        with backprop.GradientTape() as tape:
+          fn_kwargs = {}
+          if has_is_recompute_kwarg:
+            fn_kwargs["is_recomputing"] = True
+          outputs = compute_fn(*inputs, **fn_kwargs)
+        recompute_vars = set(tape.watched_variables())
+        if original_vars != recompute_vars:
+          raise ValueError(_WRONG_VARS_ERR)
+
+  if not isinstance(outputs, (list, tuple)):
+    outputs = [outputs]
+  outputs = list(outputs)
+
+  # Compute gradients
+  grads = gradients_impl.gradients(outputs, inputs + variables,
+                                   output_grads)
+
+  if tupleize_grads:
+    if use_data_dep:
+      grads = _tuple_with_data_dep(grads)
+    else:
+      grads = control_flow_ops.tuple(grads)
+
+  grad_inputs = grads[:len(inputs)]
+  grad_vars = grads[len(inputs):]
+  return grad_inputs, grad_vars
+
+
 def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """See recompute_grad."""
+  has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args
   for arg in args:
     if not isinstance(arg, framework_ops.Tensor):
       raise ValueError("All inputs to function must be Tensors")
@@ -472,44 +584,54 @@ def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   if use_data_dep_ == _USE_DEFAULT:
     use_data_dep_ = _is_on_tpu()
 
-  cached_vs = []
-  cached_arg_scope = []
-
-  def grad_fn(inputs, variables, outputs, output_grads):
-    """Recompute outputs for gradient computation."""
-    del outputs
-    # Recompute outputs
-    with framework_ops.control_dependencies(output_grads):
-      if use_data_dep_:
-        inputs = _force_data_dependency(output_grads, inputs)
-      with contrib_framework_ops.arg_scope(cached_arg_scope[0]):
-        with variable_scope.variable_scope(cached_vs[0], reuse=True):
-          outputs = fn(*inputs)
-
-    if not (isinstance(outputs, list) or isinstance(outputs, tuple)):
-      outputs = [outputs]
-    outputs = list(outputs)
-    grads = gradients_impl.gradients(outputs, inputs + variables, output_grads)
-
-    if tupleize_grads:
-      if use_data_dep_:
-        grads = _tuple_with_data_dep(grads)
-      else:
-        grads = control_flow_ops.tuple(grads)
-
-    grad_inputs = grads[:len(inputs)]
-    grad_vars = grads[len(inputs):]
-    return grad_inputs, grad_vars
-
-  @_fn_with_custom_grad(grad_fn)
+  # Use custom_gradient and return a grad_fn that recomputes on the backwards
+  # pass.
+  @custom_gradient.custom_gradient
   def fn_with_recompute(*args):
-    cached_vs.append(variable_scope.get_variable_scope())
-    # TODO(rsepassi): Rm conditional in TF 1.4
-    if hasattr(contrib_framework_ops, "current_arg_scope"):
-      cached_arg_scope.append(contrib_framework_ops.current_arg_scope())
+    """Wrapper for fn."""
+    # Capture the variable and arg scopes so we can re-enter them when
+    # recomputing.
+    vs = variable_scope.get_variable_scope()
+    arg_scope = contrib_framework_ops.current_arg_scope()
+    # Track all variables touched in the function.
+    with backprop.GradientTape() as tape:
+      fn_kwargs = {}
+      if has_is_recompute_kwarg:
+        fn_kwargs["is_recomputing"] = False
+      outputs = fn(*args, **fn_kwargs)
+    original_vars = set(tape.watched_variables())
+
+    def _grad_fn(output_grads, variables=None):
+      # Validate that custom_gradient passes the right variables into grad_fn.
+      if original_vars:
+        assert variables, ("Fn created variables but the variables were not "
+                           "passed to the gradient fn.")
+        if set(variables) != original_vars:
+          raise ValueError(_WRONG_VARS_ERR)
+
+      return _recomputing_grad_fn(
+          compute_fn=fn,
+          original_args=args,
+          original_vars=original_vars,
+          output_grads=output_grads,
+          grad_fn_variables=variables,
+          use_data_dep=use_data_dep_,
+          tupleize_grads=tupleize_grads,
+          arg_scope=arg_scope,
+          var_scope=vs,
+          has_is_recompute_kwarg=has_is_recompute_kwarg)
+
+    # custom_gradient inspects the signature of the function to determine
+    # whether the user expects variables passed in the grad_fn. If the function
+    # created variables, the grad_fn should accept the "variables" kwarg.
+    if original_vars:
+      def grad_fn(*output_grads, **kwargs):
+        return _grad_fn(output_grads, kwargs["variables"])
     else:
-      cached_arg_scope.append({})
-    return fn(*args)
+      def grad_fn(*output_grads):
+        return _grad_fn(output_grads)
+
+    return outputs, grad_fn
 
   return fn_with_recompute(*args)
 
@@ -536,107 +658,6 @@ def _underlying_variable_ref(t):
     return None
 
 
-def _fn_with_custom_grad(grad_fn, use_global_vars=False):
-  """Decorator to create a subgraph with a custom gradient function.
-
-  The subgraph created by the decorated function is NOT put in a Defun and so
-  does not suffer from the limitations of the Defun (all subgraph ops on the
-  same device, no summaries).
-
-  Args:
-    grad_fn: function with signature
-      (inputs, variables, outputs, output_grads) -> (grad_inputs, grad_vars),
-      all of which are lists of Tensors.
-    use_global_vars: if True, variables will be the global variables created.
-      If False, will be the trainable variables.
-
-  Returns:
-    Decorator for function such that the gradient is defined by grad_fn.
-  """
-
-  def dec(fn):
-
-    @functools.wraps(fn)
-    def wrapped(*args):
-      return _fn_with_custom_grad_internal(
-          fn, args, grad_fn, use_global_vars=use_global_vars)
-
-    return wrapped
-
-  return dec
-
-
-def _fn_with_custom_grad_internal(fn, inputs, grad_fn, use_global_vars=False):
-  """Create a subgraph with a custom gradient.
-
-  Args:
-    fn: function that takes inputs as arguments and produces 1 or more Tensors.
-    inputs: list<Tensor>, will be passed as fn(*inputs).
-    grad_fn: function with signature
-      (inputs, vars, outputs, output_grads) -> (grad_inputs, grad_vars),
-      all of which are lists of Tensors.
-    use_global_vars: if True, variables will be the global variables created.
-      If False, will be the trainable variables.
-
-  Returns:
-    fn(*inputs)
-  """
-  vs = variable_scope.get_variable_scope()
-  get_vars_fn = (
-      vs.global_variables if use_global_vars else vs.trainable_variables)
-  len_before_vars = len(get_vars_fn())
-  inputs = [array_ops.identity(x) for x in inputs]
-  outputs = fn(*inputs)
-  train_vars = get_vars_fn()[len_before_vars:]
-
-  if grad_fn is None:
-    return outputs
-
-  if not (isinstance(outputs, tuple) or isinstance(outputs, list)):
-    outputs = [outputs]
-  outputs = list(outputs)
-
-  defun_inputs = [inputs, train_vars, outputs]
-
-  def custom_grad_fn(op, *dys):
-    """Custom grad fn applying grad_fn for identity Defun."""
-    fn_inputs, fn_vars, fn_outputs = nest.pack_sequence_as(
-        defun_inputs, list(op.inputs))
-    fn_vars = [_underlying_variable_ref(v) for v in fn_vars]
-    dys = list(dys)
-    assert len(fn_outputs) == len(outputs)
-    assert len(fn_outputs) == len(dys)
-
-    grad_inputs, grad_vars = grad_fn(fn_inputs, fn_vars, fn_outputs, dys)
-    grad_outputs = [None] * len(fn_outputs)
-    return tuple(grad_inputs + grad_vars + grad_outputs)
-
-  # The Defun takes as input the original inputs, the trainable variables
-  # created in fn, and the outputs. In the forward it passes through the
-  # outputs. In the backwards, it produces gradients for the original inputs
-  # and the trainable variables.
-  in_types = [t.dtype for t in inputs]
-  out_types = [t.dtype for t in outputs]
-  var_types = [t.dtype for t in train_vars]
-
-  # Get a unique name for the Defun
-  with framework_ops.name_scope("identity_custom_grad") as ns:
-    defun_name = ns
-
-  @function.Defun(
-      *(in_types + var_types + out_types),
-      func_name=defun_name,
-      python_grad_func=custom_grad_fn,
-      shape_func=lambda _: [t.get_shape() for t in outputs])
-  def identity(*args):
-    _, _, outs = nest.pack_sequence_as(defun_inputs, args)
-    return tuple([array_ops.identity(t) for t in outs])
-
-  flat_inputs = nest.flatten(defun_inputs)
-  id_out = identity(*flat_inputs)
-  return id_out
-
-
 def _force_data_dependency(first_compute, then_compute):
   """Force all of `then_compute` to depend on all of `first_compute`.
 
diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
index 392a490be1..2c7463acc0 100644
--- a/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
+++ b/tensorflow/contrib/layers/python/layers/rev_block_lib_test.py
@@ -21,9 +21,11 @@ from __future__ import print_function
 from tensorflow.contrib.layers.python.layers import layers
 from tensorflow.contrib.layers.python.layers import rev_block_lib
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.layers import convolutional
 from tensorflow.python.layers import core as core_layers
+from tensorflow.python.layers import normalization as normalization_layers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
@@ -56,12 +58,12 @@ class RevBlockTest(test.TestCase):
     y1, y2 = block.forward(x1, x2)
     x1_inv, x2_inv = block.backward(y1, y2)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       x1, x2, x1_inv, x2_inv = sess.run([x1, x2, x1_inv, x2_inv])
 
-      self.assertAllClose(x1, x1_inv)
-      self.assertAllClose(x2, x2_inv)
+      self.assertAllClose(x1, x1_inv, atol=1e-5)
+      self.assertAllClose(x2, x2_inv, atol=1e-5)
 
   def testBackwardForward(self):
 
@@ -79,12 +81,12 @@ class RevBlockTest(test.TestCase):
     x1, x2 = block.backward(y1, y2)
     y1_inv, y2_inv = block.forward(x1, x2)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       y1, y2, y1_inv, y2_inv = sess.run([y1, y2, y1_inv, y2_inv])
 
-      self.assertAllClose(y1, y1_inv)
-      self.assertAllClose(y2, y2_inv)
+      self.assertAllClose(y1, y1_inv, rtol=1e-5)
+      self.assertAllClose(y2, y2_inv, rtol=1e-5)
 
   def _testRevBlock(self,
                     x=None,
@@ -149,7 +151,7 @@ class RevBlockTest(test.TestCase):
     grads_rev = gradients_impl.gradients(loss_rev, wrt)
     grads = gradients_impl.gradients(loss, wrt)
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       y_val, yd_val, gd_val, g_val = sess.run([y, y_rev, grads_rev, grads])
       self.assertAllClose(y_val, yd_val)
@@ -179,18 +181,16 @@ class RevBlockTest(test.TestCase):
 
     self._testRevBlock(f=[f1, f2, f1, f2])
 
-  # TODO(rsepassi): Recent change to conv seems to have broken this test. Find
-  # out why.
-  def _testConvAndBatchNorm(self):
+  def testConvAndBatchNorm(self):
 
     x = random_ops.random_uniform(
         [self.BATCH_SIZE, 10, self.CHANNELS], dtype=dtypes.float32)
 
     def f(x):
       x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
-      x = layers.batch_norm(x, is_training=True)
+      x = layers.batch_norm(x, is_training=False)
       x = convolutional.conv1d(x, self.CHANNELS // 2, 3, padding="same")
-      x = layers.batch_norm(x, is_training=True)
+      x = layers.batch_norm(x, is_training=False)
       return x
 
     self._testRevBlock(x=x, f=f)
@@ -278,7 +278,7 @@ class RecomputeTest(test.TestCase):
     ]
     outputs_and_vars = []
     for name, wrapped_fn in names_and_fns:
-      with variable_scope.variable_scope(name) as vs:
+      with variable_scope.variable_scope(name, use_resource=True) as vs:
         out = math_ops.reduce_sum(wrapped_fn(x))
         outputs_and_vars.append((out, vs.trainable_variables()))
 
@@ -286,7 +286,7 @@ class RecomputeTest(test.TestCase):
     for out, scope_vars in outputs_and_vars:
       all_grads.append(gradients_impl.gradients(out, scope_vars))
 
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sess.run(variables.global_variables_initializer())
       outputs = list(zip(*outputs_and_vars))[0]
       outs, all_grads_val = sess.run([outputs, all_grads])
@@ -304,103 +304,103 @@ class RecomputeTest(test.TestCase):
           self.assertAllClose(current, g)
           current = g
 
-  def testResourceVariable(self):
-    @rev_block_lib.recompute_grad(tupleize_grads=True)
+  def testDoubleCallInSameScopeFails(self):
+
+    @rev_block_lib.recompute_grad
     def layer_with_recompute(inputs):
-      var = variable_scope.get_variable("var", ())
-      return var * inputs
+      return core_layers.dense(inputs, 2)
 
-    inputs = array_ops.ones((), dtypes.float32)
     with variable_scope.variable_scope("layer", use_resource=True):
-      outputs = layer_with_recompute(inputs)
-      loss = math_ops.square(outputs)
-      grads = gradients_impl.gradients(loss, variables.trainable_variables())
-      self.assertEqual(1, len(grads))
-      self.assertTrue(grads[0] is not None)
+      inputs = array_ops.ones((2, 4), dtypes.float32)
+      out1 = layer_with_recompute(inputs)
+      out2 = layer_with_recompute(inputs) + out1
+      out = math_ops.reduce_sum(out2)
 
+    tvars = variables.trainable_variables()
+    assert len(tvars) == 4
+    with self.assertRaisesWithPredicateMatch(
+        ValueError, "called twice in the same enclosing scope"):
+      gradients_impl.gradients(out, [inputs] + tvars)
 
-class FnWithCustomGradTest(test.TestCase):
+  def testDoubleCallInUniqueScope(self):
 
-  def testCorrectness(self):
+    @rev_block_lib.recompute_grad
+    def layer_with_recompute(inputs):
+      with variable_scope.variable_scope("inner", use_resource=True):
+        return core_layers.dense(inputs, 2)
 
-    w = random_ops.random_uniform([6, 10])
+    with variable_scope.variable_scope("layer", use_resource=True):
+      inputs = array_ops.ones((2, 4), dtypes.float32)
 
-    def fn(a, b, c):
-      return core_layers.dense(
-          a,
-          10,
-          use_bias=False,
-          kernel_initializer=lambda shape, dtype, partition_info: w
-      ) + math_ops.matmul(b, c)
-
-    def grad_fn(inputs, trainable_variables, outputs, grad_outputs):
-      outputs = outputs[0]
-      grad_outputs = grad_outputs[0]
-      grad_inputs = gradients_impl.gradients(
-          outputs, inputs, grad_ys=grad_outputs)
-      grad_vars = gradients_impl.gradients(
-          outputs, trainable_variables, grad_ys=grad_outputs)
-      return grad_inputs, grad_vars
-
-    custom_fn = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)
-
-    a = random_ops.random_uniform([11, 6])
-    b = random_ops.random_uniform([11, 7])
-    c = random_ops.random_uniform([7, 10])
-
-    out = fn(a, b, c)
-    custom_out = custom_fn(a, b, c)
-    self.assertEqual(out.get_shape().as_list(),
-                     custom_out.get_shape().as_list())
-
-    loss = math_ops.reduce_mean(out)
-    custom_loss = math_ops.reduce_mean(custom_out)
-
-    grads = gradients_impl.gradients(
-        loss, [a, b, c] + [variables.trainable_variables()[0]])
-    custom_grads = gradients_impl.gradients(
-        custom_loss, [a, b, c] + [variables.trainable_variables()[1]])
-
-    with self.test_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      out_val, custom_out_val, grads_val, custom_grads_val = sess.run(
-          [out, custom_out, grads, custom_grads])
-      self.assertAllClose(out_val, custom_out_val)
-      for g1, g2 in zip(grads_val, custom_grads_val):
-        self.assertAllClose(g1, g2)
-
-  def testCustomGrad(self):
-
-    def fn(a, b, c):
-      return core_layers.dense(a, 10, use_bias=False) + math_ops.matmul(b, c)
-
-    def grad_fn(inputs, trainable_variables, unused_outputs,
-                unused_grad_outputs):
-      grad_inputs = [
-          array_ops.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)
-      ]
-      grad_vars = [
-          array_ops.ones_like(t) * (i + len(inputs) + 1.)
-          for i, t in enumerate(trainable_variables)
-      ]
-      return grad_inputs, grad_vars
-
-    a = random_ops.random_uniform([11, 6])
-    b = random_ops.random_uniform([11, 7])
-    c = random_ops.random_uniform([7, 10])
-    w = random_ops.random_uniform([6, 10])
-    out = rev_block_lib._fn_with_custom_grad(grad_fn)(fn)(a, b, c)
-    loss = math_ops.reduce_mean(out)
-    grads = gradients_impl.gradients(
-        loss, [a, b, c, variables.trainable_variables()[0]])
-    expected_grads = [
-        array_ops.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w])
-    ]
-    with self.test_session() as sess:
-      sess.run(variables.global_variables_initializer())
-      g_val, eg_val = sess.run([grads, expected_grads])
-      for g1, g2 in zip(g_val, eg_val):
-        self.assertAllClose(g1, g2)
+      with variable_scope.variable_scope("layer1", use_resource=True):
+        out1 = layer_with_recompute(inputs)
+      with variable_scope.variable_scope("layer2", use_resource=True):
+        out2 = layer_with_recompute(inputs) + out1
+      out = math_ops.reduce_sum(out2)
+
+    tvars = variables.trainable_variables()
+    assert len(tvars) == 4
+    grads = gradients_impl.gradients(out, [inputs] + tvars)
+    for grad in grads:
+      self.assertTrue(grad is not None)
+
+  def testWithIsRecomputeKwarg(self):
+
+    kwarg_values = []
+
+    @rev_block_lib.recompute_grad
+    def layer_with_recompute(inputs, is_recomputing=False):
+      kwarg_values.append(is_recomputing)
+      out = core_layers.dense(inputs, 2)
+      out = normalization_layers.batch_normalization(out, training=True)
+      if is_recomputing:
+        # Ensure that the updates are not duplicated by popping off the latest
+        # 2 additions.
+        update_ops = ops.get_collection_ref(ops.GraphKeys.UPDATE_OPS)
+        update_ops.pop()
+        update_ops.pop()
+      return out
+
+    x = array_ops.ones((2, 4), dtypes.float32)
+    with variable_scope.variable_scope("layer1", use_resource=True):
+      y = layer_with_recompute(x)
+    loss = math_ops.reduce_sum(y)
+    tvars = variables.trainable_variables()
+    gradients_impl.gradients(loss, [x] + tvars)
+
+    update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
+    self.assertEqual(2, len(update_ops))
+    self.assertEqual([False, True], kwarg_values)
+
+  def testWithoutVariables(self):
+
+    def concat_n(layer_list, num_inputs):
+      return math_ops.reduce_sum(
+          array_ops.concat([x for x in layer_list[-num_inputs:]], axis=-1),
+          axis=1, keepdims=True)
+
+    @rev_block_lib.recompute_grad
+    def concat_n_wrap(*args):
+      return concat_n(args, 3)
+
+    # DenseNet-style layers
+    layer_list = [random_ops.random_uniform((4, 8))]
+    for _ in range(5):
+      layer_list.append(math_ops.sqrt(concat_n_wrap(*layer_list)))
+
+    grads = gradients_impl.gradients(layer_list[-1], layer_list[0])
+    with self.cached_session() as sess:
+      sess.run(grads)
+
+  def testErrorOnClosedOverTensor(self):
+    x = random_ops.random_uniform((4, 8))
+    y = random_ops.random_uniform((4, 8))
+    z = x * y
+
+    with self.assertRaisesWithPredicateMatch(ValueError, "closes over"):
+      @rev_block_lib.recompute_grad
+      def fn_with_capture(a):  # pylint: disable=unused-variable
+        return a * z
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/layers/python/layers/summaries_test.py b/tensorflow/contrib/layers/python/layers/summaries_test.py
index a1ef06feec..2ec2af9d44 100644
--- a/tensorflow/contrib/layers/python/layers/summaries_test.py
+++ b/tensorflow/contrib/layers/python/layers/summaries_test.py
@@ -29,19 +29,19 @@ from tensorflow.python.platform import test
 class SummariesTest(test.TestCase):
 
   def test_summarize_scalar_tensor(self):
-    with self.test_session():
+    with self.cached_session():
       scalar_var = variables.Variable(1)
       summary_op = summaries_lib.summarize_tensor(scalar_var)
       self.assertEquals(summary_op.op.type, 'ScalarSummary')
 
   def test_summarize_multidim_tensor(self):
-    with self.test_session():
+    with self.cached_session():
       tensor_var = variables.Variable([1, 2, 3])
       summary_op = summaries_lib.summarize_tensor(tensor_var)
       self.assertEquals(summary_op.op.type, 'HistogramSummary')
 
   def test_summarize_activation(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(1)
       op = array_ops.identity(var, name='SummaryTest')
       summary_op = summaries_lib.summarize_activation(op)
@@ -52,7 +52,7 @@ class SummariesTest(test.TestCase):
       self.assertIn(u'SummaryTest/activation', names)
 
   def test_summarize_activation_relu(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(1)
       op = nn_ops.relu(var, name='SummaryTest')
       summary_op = summaries_lib.summarize_activation(op)
@@ -64,7 +64,7 @@ class SummariesTest(test.TestCase):
       self.assertIn(u'SummaryTest/activation', names)
 
   def test_summarize_activation_relu6(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(1)
       op = nn_ops.relu6(var, name='SummaryTest')
       summary_op = summaries_lib.summarize_activation(op)
@@ -77,7 +77,7 @@ class SummariesTest(test.TestCase):
       self.assertIn(u'SummaryTest/activation', names)
 
   def test_summarize_collection_regex(self):
-    with self.test_session():
+    with self.cached_session():
       var = variables.Variable(1)
       array_ops.identity(var, name='Test1')
       ops.add_to_collection('foo', array_ops.identity(var, name='Test2'))
diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py
index f3377f2a05..8a6b4f68a8 100644
--- a/tensorflow/contrib/layers/python/layers/target_column.py
+++ b/tensorflow/contrib/layers/python/layers/target_column.py
@@ -270,7 +270,7 @@ class _RegressionTargetColumn(_TargetColumn):
 
   def logits_to_predictions(self, logits, proba=False):
     if self.num_label_columns == 1:
-      return array_ops.squeeze(logits, squeeze_dims=[1])
+      return array_ops.squeeze(logits, axis=[1])
     return logits
 
   def get_eval_ops(self, features, logits, labels, metrics=None):
@@ -418,7 +418,7 @@ def _softmax_cross_entropy_loss(logits, target):
                      "Instead got %s." % target.dtype)
   # sparse_softmax_cross_entropy_with_logits requires [batch_size] target.
   if len(target.get_shape()) == 2:
-    target = array_ops.squeeze(target, squeeze_dims=[1])
+    target = array_ops.squeeze(target, axis=[1])
   loss_vec = nn.sparse_softmax_cross_entropy_with_logits(
       labels=target, logits=logits)
   return loss_vec
diff --git a/tensorflow/contrib/layers/python/layers/utils_test.py b/tensorflow/contrib/layers/python/layers/utils_test.py
index 3409860add..34f63f5d86 100644
--- a/tensorflow/contrib/layers/python/layers/utils_test.py
+++ b/tensorflow/contrib/layers/python/layers/utils_test.py
@@ -42,12 +42,12 @@ class ConstantValueTest(test.TestCase):
       c = constant_op.constant(v)
       value = utils.constant_value(c)
       self.assertEqual(value, v)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(c.eval(), v)
 
   def test_variable(self):
     for v in [True, False, 1, 0, 1.0]:
-      with ops.Graph().as_default() as g, self.test_session(g) as sess:
+      with ops.Graph().as_default() as g, self.session(g) as sess:
         x = variables.Variable(v)
         value = utils.constant_value(x)
         self.assertEqual(value, None)
@@ -60,7 +60,7 @@ class ConstantValueTest(test.TestCase):
       x = array_ops.identity(p)
       value = utils.constant_value(p)
       self.assertEqual(value, None)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(x.eval(feed_dict={p: v}), v)
 
 
@@ -80,7 +80,7 @@ class StaticCondTest(test.TestCase):
     expected = lambda v: b'fn1' if v else b'fn2'
     for v in [True, False, 1, 0]:
       o = utils.static_cond(v, fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(), expected(v))
 
   def test_variable(self):
@@ -89,7 +89,7 @@ class StaticCondTest(test.TestCase):
     expected = lambda v: b'fn1' if v else b'fn2'
     for v in [True, False, 1, 0]:
       o = utils.static_cond(v, fn1, fn2)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.global_variables_initializer())
         self.assertEqual(o.eval(), expected(v))
 
@@ -99,7 +99,7 @@ class StaticCondTest(test.TestCase):
     expected = lambda v: -1 if v else -2
     for v in [True, False, 1, 0]:
       o = utils.static_cond(v, fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(), expected(v))
 
 
@@ -119,7 +119,7 @@ class SmartCondStaticTest(test.TestCase):
     expected = lambda v: b'fn1' if v else b'fn2'
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(constant_op.constant(v), fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(), expected(v))
 
   def test_variable(self):
@@ -128,7 +128,7 @@ class SmartCondStaticTest(test.TestCase):
     expected = lambda v: b'fn1' if v else b'fn2'
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(constant_op.constant(v), fn1, fn2)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.global_variables_initializer())
         self.assertEqual(o.eval(), expected(v))
 
@@ -138,7 +138,7 @@ class SmartCondStaticTest(test.TestCase):
     expected = lambda v: -1 if v else -2
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(constant_op.constant(v), fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(), expected(v))
 
 
@@ -151,7 +151,7 @@ class SmartCondDynamicTest(test.TestCase):
     p = array_ops.placeholder(dtypes.bool, [])
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(p, fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(feed_dict={p: v}), expected(v))
 
   def test_constant(self):
@@ -161,7 +161,7 @@ class SmartCondDynamicTest(test.TestCase):
     p = array_ops.placeholder(dtypes.bool, [])
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(p, fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(feed_dict={p: v}), expected(v))
 
   def test_variable(self):
@@ -171,7 +171,7 @@ class SmartCondDynamicTest(test.TestCase):
     p = array_ops.placeholder(dtypes.bool, [])
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(p, fn1, fn2)
-      with self.test_session() as sess:
+      with self.cached_session() as sess:
         sess.run(variables.global_variables_initializer())
         self.assertEqual(o.eval(feed_dict={p: v}), expected(v))
 
@@ -182,7 +182,7 @@ class SmartCondDynamicTest(test.TestCase):
     p = array_ops.placeholder(dtypes.bool, [])
     for v in [True, False, 1, 0]:
       o = utils.smart_cond(p, fn1, fn2)
-      with self.test_session():
+      with self.cached_session():
         self.assertEqual(o.eval(feed_dict={p: v}), expected(v))
 
 
@@ -294,7 +294,6 @@ class NPositiveIntegersTest(test.TestCase):
     self.assertEqual(utils.n_positive_integers(2, 2), (2, 2))
     self.assertEqual(utils.n_positive_integers(2, (2, 3)), (2, 3))
     self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1))
-    self.assertEqual(utils.n_positive_integers(3, (2, 3, 1)), (2, 3, 1))
     self.assertEqual(
         utils.n_positive_integers(3, tensor_shape.TensorShape([2, 3, 1])),
         (2, 3, 1))
diff --git a/tensorflow/contrib/layers/python/ops/sparse_ops_test.py b/tensorflow/contrib/layers/python/ops/sparse_ops_test.py
index d50750001e..b6c2cab64a 100644
--- a/tensorflow/contrib/layers/python/ops/sparse_ops_test.py
+++ b/tensorflow/contrib/layers/python/ops/sparse_ops_test.py
@@ -42,7 +42,7 @@ def _assert_sparse_tensor_value(test_case, expected, actual):
 class DenseToSparseTensorTest(test.TestCase):
 
   def test_dense_to_sparse_tensor_1d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor([1, 0, 2, 0])
       result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
@@ -53,7 +53,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_1d_float(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor([1.5, 0.0, 2.3, 0.0])
       result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
@@ -64,7 +64,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_1d_bool(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor([True, False, True, False])
       result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
@@ -75,7 +75,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_1d_str(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor([b'qwe', b'', b'ewq', b''])
       result = sess.run(st)
     self.assertEqual(result.indices.dtype, np.int64)
@@ -86,7 +86,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_1d_str_special_ignore(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor(
           [b'qwe', b'', b'ewq', b''], ignore_value=b'qwe')
       result = sess.run(st)
@@ -98,7 +98,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_2d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor([[1, 2, 0, 0], [3, 4, 5, 0]])
       result = sess.run(st)
     self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
@@ -107,7 +107,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([2, 4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_3d(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor([[[1, 2, 0, 0], [3, 4, 5, 0]],
                                               [[7, 8, 0, 0], [9, 0, 0, 0]]])
       result = sess.run(st)
@@ -117,7 +117,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([2, 2, 4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_unknown_1d_shape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tensor = array_ops.placeholder(shape=[None], dtype=dtypes.int32)
       st = sparse_ops.dense_to_sparse_tensor(tensor)
       result = sess.run(st, feed_dict={tensor: [0, 100, 0, 3]})
@@ -126,7 +126,7 @@ class DenseToSparseTensorTest(test.TestCase):
     self.assertAllEqual([4], result.dense_shape)
 
   def test_dense_to_sparse_tensor_unknown_3d_shape(self):
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       tensor = array_ops.placeholder(
           shape=[None, None, None], dtype=dtypes.int32)
       st = sparse_ops.dense_to_sparse_tensor(tensor)
@@ -142,7 +142,7 @@ class DenseToSparseTensorTest(test.TestCase):
 
   def test_dense_to_sparse_unknown_rank(self):
     ph = array_ops.placeholder(dtype=dtypes.int32)
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       st = sparse_ops.dense_to_sparse_tensor(ph)
       result = sess.run(st, feed_dict={ph: [[1, 2, 0, 0], [3, 4, 5, 0]]})
     self.assertAllEqual([[0, 0], [0, 1], [1, 0], [1, 1], [1, 2]],
@@ -155,7 +155,7 @@ class SparseRowEnvelopeTest(test.TestCase):
 
   def test_sparse_row_envelope(self):
     expected_sparse_row_envelope = [1, 0, 3]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sparse_input = sparse_tensor.SparseTensor(
           indices=[[0, 0], [2, 0], [2, 1], [2, 2]],
           values=[0, 1, 2, 3],
@@ -167,7 +167,7 @@ class SparseRowEnvelopeTest(test.TestCase):
 
   def test_sparse_row_envelope_unsorted_indices(self):
     expected_sparse_row_envelope = [1, 0, 3]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sparse_input = sparse_tensor.SparseTensor(
           indices=[[2, 0], [2, 2], [2, 1], [0, 0]],
           values=[0, 1, 2, 3],
@@ -179,7 +179,7 @@ class SparseRowEnvelopeTest(test.TestCase):
 
   def test_sparse_row_envelope_empty_in_the_end(self):
     expected_sparse_row_envelope = [1, 0, 3, 0, 0]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sparse_input = sparse_tensor.SparseTensor(
           indices=[[0, 0], [2, 0], [2, 1], [2, 2]],
           values=[0, 1, 2, 3],
@@ -191,7 +191,7 @@ class SparseRowEnvelopeTest(test.TestCase):
 
   def test_sparse_row_envelope_empty_3d(self):
     expected_sparse_row_envelope = [1, 0, 3, 0, 0]
-    with self.test_session() as sess:
+    with self.cached_session() as sess:
       sparse_input = sparse_tensor.SparseTensor(
           indices=[[0, 0, 0], [0, 2, 0], [0, 2, 1], [0, 2, 2]],
           values=[0, 1, 2, 3],
@@ -207,7 +207,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
   def test_indicators_to_sparse_ids_1d(self):
     indicators = (0, 0, 1, 0)
     sparse_ids = sparse_ops.indicators_to_sparse_ids(indicators)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0,),),
           values=(2,),
@@ -220,7 +220,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
         (1, 0, 0, 1),
     )
     sparse_ids = sparse_ops.indicators_to_sparse_ids(indicators)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0), (1, 0), (1, 1)),
           values=(2, 0, 3),
@@ -235,7 +235,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
         ((1, 0, 0, 1, 1), (0, 0, 1, 0, 0)),
     )
     sparse_ids = sparse_ops.indicators_to_sparse_ids(indicators)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=(
               (0, 0, 0),
@@ -255,7 +255,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
     )
     sparse_ids = sparse_ops.indicators_to_sparse_ids(
         indicators, dtype=dtypes.int16)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0), (1, 0), (1, 1)),
           values=np.array((2, 0, 3), dtype=np.int16),
@@ -269,7 +269,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
     )
     sparse_ids = sparse_ops.indicators_to_sparse_ids(
         indicators, ignore_value=-1)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
           values=(2, 0, 3, 2),
@@ -282,7 +282,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
         (('B', '', '', 'C'), ('', '', 'D', '')),
     )
     sparse_ids = sparse_ops.indicators_to_sparse_ids(indicators)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
           values=(2, 0, 3, 2),
@@ -296,7 +296,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
     )
     sparse_ids = sparse_ops.indicators_to_sparse_ids(
         indicators, ignore_value='x')
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
           values=(2, 0, 3, 2),
@@ -311,7 +311,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
     indicators = array_ops.placeholder(
         dtype=dtypes.int32, shape=(None, None, None))
     sparse_ids = sparse_ops.indicators_to_sparse_ids(indicators)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
           values=(2, 0, 3, 2),
@@ -325,7 +325,7 @@ class IndicatorToSparseIdsTest(test.TestCase):
     )
     indicators = array_ops.placeholder(dtype=dtypes.int32)
     sparse_ids = sparse_ops.indicators_to_sparse_ids(indicators)
-    with self.test_session():
+    with self.cached_session():
       _assert_sparse_tensor_value(self, sparse_tensor.SparseTensorValue(
           indices=((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)),
           values=(2, 0, 3, 2),
author	Martin Wicke <577277+martinwicke@users.noreply.github.com>	2018-09-22 09:45:11 -0700
committer	GitHub <noreply@github.com>	2018-09-22 09:45:11 -0700
commit	413ac36f33deb0c354dd687963d2410eab048970 (patch)
tree	fd4dc4e9fc5a76efd62c78c213b0e34983359256 /tensorflow/contrib/layers
parent	c22d996c3d6a16db292bd3464b2ef7b91adae676 (diff)
parent	e692dda4c8b199555e2fa32132a7784e0893c870 (diff)