69 files changed, 646 insertions, 735 deletions
diff --git a/configure.py b/configure.py
index 1f205861f1..26da09bd94 100644
--- a/configure.py
+++ b/configure.py
@@ -883,28 +883,27 @@ def set_computecpp_toolkit_path(environ_cp):
   write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH',
                               computecpp_toolkit_path)
 
-
 def set_trisycl_include_dir(environ_cp):
-  """Set TRISYCL_INCLUDE_DIR."""
+  """Set TRISYCL_INCLUDE_DIR"""
   ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
                              'include directory. (Use --config=sycl_trisycl '
                              'when building with Bazel) '
-                             '[Default is %s]: ') % (
-                                 _DEFAULT_TRISYCL_INCLUDE_DIR)
+                             '[Default is %s]: '
+                             ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
   while True:
     trisycl_include_dir = get_from_env_or_user_or_default(
-        environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
-        _DEFAULT_TRISYCL_INCLUDE_DIR)
+      environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir,
+      _DEFAULT_TRISYCL_INCLUDE_DIR)
     if os.path.exists(trisycl_include_dir):
       break
 
-    print('Invalid triSYCL include directory, %s cannot be found' %
-          (trisycl_include_dir))
+    print('Invalid triSYCL include directory, %s cannot be found'
+          % (trisycl_include_dir))
 
   # Set TRISYCL_INCLUDE_DIR
   environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
-  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
-
+  write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
+                              trisycl_include_dir)
 
 def set_mpi_home(environ_cp):
   """Set MPI_HOME."""
diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 6c385af3b3..b795afd5b8 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -119,7 +119,7 @@ def tf_library(name, graph, config,
             out_nodes_file,
         ] + freeze_saver_srcs,
         outs=[freeze_file],
-        cmd=("$(location @org_tensorflow//tensorflow/python/tools:freeze_graph)" +
+        cmd=("$(location //tensorflow/python/tools:freeze_graph)" +
              freeze_args),
         tools=["@org_tensorflow//tensorflow/python/tools:freeze_graph"],
         tags=tags,
@@ -152,7 +152,7 @@ def tf_library(name, graph, config,
            " --target_triple=" + target_llvm_triple() +
            " --out_header=$(@D)/" + header_file +
            " --out_object=$(@D)/" + object_file +
-           " " + flags),
+           flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -189,7 +189,7 @@ def tf_library(name, graph, config,
            " --cpp_class=" + cpp_class +
            " --target_triple=" + target_llvm_triple() +
            " --out_session_module=$(@D)/" + session_module_pb +
-           " " + flags),
+           flags),
       tools=[tfcompile_tool],
       visibility=visibility,
       testonly=testonly,
@@ -256,7 +256,7 @@ def tf_library(name, graph, config,
         ],
         outs=[test_file],
         cmd=("sed " + sed_replace +
-             " $(location @org_tensorflow//tensorflow/compiler/aot:test.cc) " +
+             " $(location //tensorflow/compiler/aot:test.cc) " +
              "> $(OUTS)"),
         tags=tags,
     )
diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py
index 00a9c9a65b..a773b5a947 100644
--- a/tensorflow/compiler/tests/fused_batchnorm_test.py
+++ b/tensorflow/compiler/tests/fused_batchnorm_test.py
@@ -76,8 +76,7 @@ class FusedBatchNormTest(XLATestCase):
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(
-          np.float32, shape=scale_shape, name="offset")
+      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y_ref, mean_ref, var_ref = self._reference_training(
           x_val, scale_val, offset_val, epsilon, data_format)
@@ -113,8 +112,7 @@ class FusedBatchNormTest(XLATestCase):
       # To avoid constant folding
       t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x")
       scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale")
-      offset = array_ops.placeholder(
-          np.float32, shape=scale_shape, name="offset")
+      offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset")
       epsilon = 0.001
       y, mean, var = nn.fused_batch_norm(
           t_val,
diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt
index aba356d616..25ada5ba27 100644
--- a/tensorflow/contrib/android/cmake/CMakeLists.txt
+++ b/tensorflow/contrib/android/cmake/CMakeLists.txt
@@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \
                      -std=c++11 -fno-rtti -fno-exceptions \
                      -O2 -Wno-narrowing -fomit-frame-pointer \
-                     -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \
+                     -mfpu=neon -mfloat-abi=softfp -fPIE \
                      -ftemplate-depth=900 \
                      -DGOOGLE_PROTOBUF_NO_RTTI \
                      -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER")
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
index 73747db31c..7f7697357c 100644
--- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py
@@ -41,7 +41,6 @@ def try_import(name):  # pylint: disable=invalid-name
     tf_logging.warning("Could not import %s: %s" % (name, str(e)))
   return module
 
-
 stats = try_import("scipy.stats")
 
 
@@ -63,9 +62,9 @@ class CauchyTest(test.TestCase):
       self.assertAllEqual(expected, scale_shape.eval())
       loc = array_ops.zeros(loc_shape)
       scale = array_ops.ones(scale_shape)
-      self.assertAllEqual(expected,
-                          array_ops.shape(
-                              cauchy_lib.Cauchy(loc, scale).sample()).eval())
+      self.assertAllEqual(
+          expected,
+          array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval())
 
   def _testParamStaticShapes(self, sample_shape, expected):
     param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape)
@@ -93,7 +92,8 @@ class CauchyTest(test.TestCase):
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       log_pdf = cauchy.log_prob(x)
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           log_pdf.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
@@ -115,15 +115,16 @@ class CauchyTest(test.TestCase):
     with self.test_session():
       batch_size = 6
       loc = constant_op.constant([[3.0, -3.0]] * batch_size)
-      scale = constant_op.constant(
-          [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size)
+      scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] *
+                                   batch_size)
       x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       log_pdf = cauchy.log_prob(x)
       log_pdf_values = log_pdf.eval()
       self.assertEqual(log_pdf.shape, (6, 2))
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          log_pdf.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           log_pdf.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, log_pdf.shape)
@@ -247,7 +248,8 @@ class CauchyTest(test.TestCase):
       cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale)
 
       entropy = cauchy.entropy()
-      self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape)
+      self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
+                          entropy.shape)
       self.assertAllEqual(cauchy.batch_shape_tensor().eval(),
                           entropy.eval().shape)
       self.assertAllEqual(cauchy.batch_shape, entropy.shape)
@@ -255,7 +257,7 @@ class CauchyTest(test.TestCase):
 
       if not stats:
         return
-      expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3))
+      expected_entropy = stats.cauchy(loc, scale).entropy()
       self.assertAllClose(expected_entropy, entropy.eval())
 
   def testCauchyMode(self):
@@ -366,8 +368,8 @@ class CauchyTest(test.TestCase):
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
-      expected_shape = (
-          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
+      expected_shape = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(cauchy.batch_shape))
 
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
@@ -383,18 +385,18 @@ class CauchyTest(test.TestCase):
       samples = cauchy.sample(n)
       sample_values = samples.eval()
       self.assertEqual(samples.shape, (100000, batch_size, 2))
-      self.assertAllClose(
-          np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1)
-      self.assertAllClose(
-          np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1)
+      self.assertAllClose(np.median(sample_values[:, 0, 0]),
+                          loc_v[0], atol=1e-1)
+      self.assertAllClose(np.median(sample_values[:, 0, 1]),
+                          loc_v[1], atol=1e-1)
 
       expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate(
           tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval()))
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
-      expected_shape = (
-          tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape))
+      expected_shape = (tensor_shape.TensorShape(
+          [n.eval()]).concatenate(cauchy.batch_shape))
       self.assertAllEqual(expected_shape, samples.shape)
       self.assertAllEqual(expected_shape, sample_values.shape)
 
@@ -426,12 +428,9 @@ class CauchyTest(test.TestCase):
       self.assertEqual(cauchy.event_shape, ())
       self.assertAllEqual(cauchy.event_shape_tensor().eval(), [])
       self.assertAllEqual(
-          sess.run(
-              cauchy.batch_shape_tensor(),
-              feed_dict={
-                  loc: 5.0,
-                  scale: [1.0, 2.0]
-              }), [2])
+          sess.run(cauchy.batch_shape_tensor(),
+                   feed_dict={loc: 5.0,
+                              scale: [1.0, 2.0]}), [2])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py
index 8d59c1abfb..a17bb091f6 100644
--- a/tensorflow/contrib/distributions/python/ops/cauchy.py
+++ b/tensorflow/contrib/distributions/python/ops/cauchy.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops.distributions import distribution
 
+
 __all__ = [
     "Cauchy",
 ]
@@ -96,7 +97,7 @@ class Cauchy(distribution.Distribution):
                validate_args=False,
                allow_nan_stats=True,
                name="Cauchy"):
-    """Construct Cauchy distributions.
+    """Construct Cauchy distributions with loc and and scale `loc` and `scale`.
 
     The parameters `loc` and `scale` must be shaped in a way that supports
     broadcasting (e.g. `loc + scale` is a valid operation).
@@ -120,8 +121,8 @@ class Cauchy(distribution.Distribution):
     """
     parameters = locals()
     with ops.name_scope(name, values=[loc, scale]):
-      with ops.control_dependencies([check_ops.assert_positive(scale)]
-                                    if validate_args else []):
+      with ops.control_dependencies([check_ops.assert_positive(scale)] if
+                                    validate_args else []):
         self._loc = array_ops.identity(loc, name="loc")
         self._scale = array_ops.identity(scale, name="scale")
         check_ops.assert_same_float_dtype([self._loc, self._scale])
@@ -137,8 +138,8 @@ class Cauchy(distribution.Distribution):
   @staticmethod
   def _param_shapes(sample_shape):
     return dict(
-        zip(("loc", "scale"),
-            ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2)))
+        zip(("loc", "scale"), ([ops.convert_to_tensor(
+            sample_shape, dtype=dtypes.int32)] * 2)))
 
   @property
   def loc(self):
@@ -152,10 +153,13 @@ class Cauchy(distribution.Distribution):
 
   def _batch_shape_tensor(self):
     return array_ops.broadcast_dynamic_shape(
-        array_ops.shape(self.loc), array_ops.shape(self.scale))
+        array_ops.shape(self.loc),
+        array_ops.shape(self.scale))
 
   def _batch_shape(self):
-    return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape)
+    return array_ops.broadcast_static_shape(
+        self.loc.shape,
+        self.scale.shape)
 
   def _event_shape_tensor(self):
     return constant_op.constant([], dtype=dtypes.int32)
diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py
index f1debc8590..9378fe8799 100644
--- a/tensorflow/contrib/layers/python/layers/layers.py
+++ b/tensorflow/contrib/layers/python/layers/layers.py
@@ -309,6 +309,7 @@ def _fused_batch_norm(inputs,
         new_shape = [-1, channels, 1, 1]
       inputs = array_ops.reshape(inputs, new_shape)
     inputs_shape = inputs.get_shape()
+    dtype = inputs.dtype.base_dtype
     if data_format == DATA_FORMAT_NHWC:
       params_shape = inputs_shape[-1:]
     else:
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 27bd3172d6..5aa2253516 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1779,8 +1779,7 @@ class BatchNormTest(test.TestCase):
       dtype = dtypes.float32
     height, width = 3, 3
     with self.test_session():
-      images = np.random.uniform(size=(5, height, width, 3)).astype(
-          dtype.as_numpy_dtype)
+      images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype)
       output = _layers.batch_norm(images, fused=fused)
       expected_name = ('BatchNorm/FusedBatchNorm' if fused else
                        'BatchNorm/batchnorm')
@@ -2666,18 +2665,18 @@ class BatchNormTest(test.TestCase):
     # Test case for 11673
     with self.test_session() as sess:
       a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10))
-      _layers.batch_norm(
-          a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True)
+      b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW',
+                                zero_debias_moving_mean=True)
       a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10))
-      _layers.batch_norm(
-          a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True)
+      b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW',
+                                zero_debias_moving_mean=True)
       sess.run(variables_lib.global_variables_initializer())
 
   def testVariablesAreFloat32(self):
     height, width = 3, 3
     with self.test_session():
-      images = random_ops.random_uniform(
-          (5, height, width, 3), seed=1, dtype=dtypes.float16)
+      images = random_ops.random_uniform((5, height, width, 3),
+                                         seed=1, dtype=dtypes.float16)
       _layers.batch_norm(images, scale=True)
       beta = variables.get_variables_by_name('beta')[0]
       gamma = variables.get_variables_by_name('gamma')[0]
@@ -2692,13 +2691,17 @@ class BatchNormTest(test.TestCase):
     channels = shape[1]
     images = np.arange(np.product(shape), dtype=dtype).reshape(shape)
     beta = init_ops.constant_initializer(
-        np.arange(2, channels + 2, dtype=np.float32))
+        np.arange(
+            2, channels + 2, dtype=np.float32))
     gamma = init_ops.constant_initializer(
-        np.arange(10, channels + 10, dtype=np.float32) * 2.0)
+        np.arange(
+            10, channels + 10, dtype=np.float32) * 2.0)
     mean = init_ops.constant_initializer(
-        np.arange(3, channels + 3, dtype=np.float32) * 5.0)
+        np.arange(
+            3, channels + 3, dtype=np.float32) * 5.0)
     variance = init_ops.constant_initializer(
-        np.arange(1, channels + 1, dtype=np.float32) * 4.0)
+        np.arange(
+            1, channels + 1, dtype=np.float32) * 4.0)
     output = _layers.batch_norm(
         images,
         fused=True,
@@ -2723,6 +2726,7 @@ class BatchNormTest(test.TestCase):
       res_16 = self._runFusedBatchNorm(shape, np.float16)
       self.assertAllClose(res_32, res_16, rtol=1e-3)
 
+
   def testAdjustmentCreated(self):
     # Tests that the adjustment is appropriately passed to and used by the core
     # BN layer.
diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
index 86fad4c553..db18ebf05d 100644
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py
@@ -28,6 +28,7 @@ import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -368,11 +369,10 @@ class DataFeeder(object):
     if x_is_dict:
       num_samples = list(self._x.values())[0].shape[0]
     elif tensor_util.is_tensor(self._x):
-      num_samples = self._x.shape[
-          0].value  # shape will be a Dimension, extract an int
+      num_samples = self._x.shape[0].value  # shape will be a Dimension, extract an int
     else:
       num_samples = self._x.shape[0]
-
+      
     if self._shuffle:
       self.indices = self.random_state.permutation(num_samples)
     else:
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index 7526f3ae0d..86d8484391 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -251,9 +251,8 @@ class SdcaModel(object):
 
       result_dense = 0.0
       for i in range(len(dense_variables)):
-        result_dense += math_ops.matmul(dense_features[i],
-                                        array_ops.expand_dims(
-                                            dense_variables[i], -1))
+        result_dense += math_ops.matmul(
+            dense_features[i], array_ops.expand_dims(dense_variables[i], -1))
 
     # Reshaping to allow shape inference at graph construction time.
     return array_ops.reshape(result_dense, [-1]) + result_sparse
diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py
index 5bca82ded0..b122818221 100644
--- a/tensorflow/contrib/lite/testing/generate_examples.py
+++ b/tensorflow/contrib/lite/testing/generate_examples.py
@@ -40,7 +40,6 @@ from six import StringIO
 # TODO(aselle): Disable GPU for now
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
-# pylint: disable=g-import-not-at-top
 import tensorflow as tf
 from google.protobuf import text_format
 # TODO(aselle): switch to TensorFlow's resource_loader
@@ -384,7 +383,7 @@ def make_zip_of_tests(zip_path,
         report["toco_log"] = ""
         tf.reset_default_graph()
 
-        with tf.device("/cpu:0"):
+        with tf.device('/cpu:0'):
           try:
             inputs, outputs = make_graph(param_dict_real)
           except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError,
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index 04643a6058..4c60c99342 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -34,18 +34,12 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 
 _allowed_symbols = [
-    'PowerSignOptimizer',
-    'AddSignOptimizer'
+    'PowerSignOptimizer', 'AddSignOptimizer'
     'DelayCompensatedGradientDescentOptimizer',
-    'DropStaleGradientOptimizer',
-    'ExternalOptimizerInterface',
-    'LazyAdamOptimizer',
-    'NadamOptimizer',
-    'MovingAverageOptimizer',
-    'ScipyOptimizerInterface',
-    'VariableClippingOptimizer',
-    'MultitaskOptimizerWrapper',
-    'clip_gradients_by_global_norm',
+    'DropStaleGradientOptimizer', 'ExternalOptimizerInterface',
+    'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer',
+    'ScipyOptimizerInterface', 'VariableClippingOptimizer',
+    'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
index cb6c77a86f..c26037935d 100644
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""An optimizer wrapper for stateful optimizers with multitask loss."""
+
+"""An optimizer wrapper that ensures correct behaviour
+of stateful optimizers with multitask loss."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -28,27 +30,26 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import optimizer
 
-__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm']
-
+__all__ = ["MultitaskOptimizerWrapper",
+           "clip_gradients_by_global_norm"]
 
 def _is_all_zeros(grad):
   all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0)
   return all_zeros
 
-
 def _get_wrapper(fn, opt):
-
   def wrapper(self, grad, *args, **kwargs):  # pylint: disable=unused-argument
     all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(all_zeros, control_flow_ops.no_op,
-                                 lambda: fn(grad, *args, **kwargs))
-
+    return control_flow_ops.cond(
+        all_zeros,
+        control_flow_ops.no_op,
+        lambda: fn(grad, *args, **kwargs))
   wrapper = types.MethodType(wrapper, opt)
   return wrapper
 
-
 class MultitaskOptimizerWrapper(object):
-  """Optimizer wrapper making all-zero gradients harmless.
+  """Optimizer wrapper that ensures that
+  all-zero gradients don't affect the optimizer state.
 
   This might be useful when a multi-task loss is used,
   and some components of the loss might be
@@ -87,20 +88,20 @@ class MultitaskOptimizerWrapper(object):
     gradvars_clipped, global_step=batch)
   ```
   """
-
   def __init__(self, opt):
-    """Constructor.
-
+    """
     Args:
-      opt: an instance of a class that implements tf.train.Optimizer.
+    opt: an instance of a class that implements tf.train.Optimizer.
     """
     if not isinstance(opt, optimizer.Optimizer):
       raise TypeError(
-          'Supplied optimizer must be an instance of tf.train.Optimizer')
+          "Supplied optimizer must be an instance of tf.train.Optimizer")
     self._opt = opt
-    overridden_methods = ('_apply_dense', '_resource_apply_dense',
-                          '_apply_sparse', '_resource_apply_sparse')
-    for name in overridden_methods:
+    overriden_methods = ('_apply_dense',
+                         '_resource_apply_dense',
+                         '_apply_sparse',
+                         '_resource_apply_sparse')
+    for name in overriden_methods:
       fn = getattr(self._opt, name)
       wrapper = _get_wrapper(fn, self._opt)
       setattr(self._opt, name, wrapper)
@@ -111,30 +112,27 @@ class MultitaskOptimizerWrapper(object):
 
 def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.):
   """Clips gradients of a multitask loss by their global norm.
-
   Ignores all-zero tensors when computing the global norm.
 
   Args:
-    gradients_variables: a list of pairs (gradient, variable).
-    clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
+  gradients_variables: a list of pairs (gradient, variable).
+  clip_norm: a float Tensor, the global norm to clip on. Default is 20.0.
 
   Returns:
-    list: A list of pairs of the same type as gradients_variables,.
-    fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
+  list: A list of pairs of the same type as gradients_variables,.
+  fixed_global_norm: A 0-D (scalar) Tensor representing the global norm.
   """
   gradients, variables = six.moves.zip(*gradients_variables)
-
   def _replace_nonexisting_grad(grad):
     if grad is None:
       return grad
     all_zeros = _is_all_zeros(grad)
-    return control_flow_ops.cond(
-        all_zeros,
-        lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)),
-        lambda: grad)
-
+    return control_flow_ops.cond(all_zeros,
+                                 lambda: array_ops.zeros(
+                                     [], dtype=dtypes.as_dtype(grad.dtype)),
+                                 lambda: grad)
   nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients]
   fixed_global_norm = clip_ops.global_norm(nonzero_gradients)
-  gradients, _ = clip_ops.clip_by_global_norm(
-      gradients, clip_norm, use_norm=fixed_global_norm)
+  gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm,
+                                              use_norm=fixed_global_norm)
   return list(six.moves.zip(gradients, variables)), fixed_global_norm
diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
index 618d8eb18d..b06213f715 100644
--- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
+++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py
@@ -18,9 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-import six
-
 from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -28,11 +25,13 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum
 
+import numpy as np
+import six
 
 class MultitaskOptimizerWrapperTest(test.TestCase):
-  """Tests for the multitask optimizer wrapper.
   """
-
+  Tests for the multitask optimizer wrapper.
+  """
   def testWrapper(self):
     with self.test_session():
       var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32)
@@ -40,10 +39,12 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32)
       grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32)
       grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32)
-      mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9)
+      mom_opt_impl = momentum.MomentumOptimizer(
+          learning_rate=2.0, momentum=0.9)
       mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper(
           mom_opt_impl)
-      mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1]))
+      mom_update = mom_opt.apply_gradients(
+          zip([grads0, grads1], [var0, var1]))
       mom_update_partial = mom_opt.apply_gradients(
           zip([grads_allzero, grads1], [var0, var1]))
       mom_update_no_action = mom_opt.apply_gradients(
@@ -62,13 +63,14 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 1: normal momentum update.
       self.evaluate(mom_update)
       # Check that the momentum accumulators have been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([0.1, 0.1]), self.evaluate(slot0))
-      self.assertAllCloseAccordingToType(
-          np.array([0.01, 0.01]), self.evaluate(slot1))
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.01, 0.01]),
+                                         self.evaluate(slot1))
       # Check that the parameters have been updated.
       self.assertAllCloseAccordingToType(
-          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0))
+          np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]),
+          self.evaluate(var0))
       self.assertAllCloseAccordingToType(
           np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]),
           self.evaluate(var1))
@@ -76,8 +78,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 2: momentum update that changes only slot1 but not slot0.
       self.evaluate(mom_update_partial)
       # Check that only the relevant momentum accumulator has been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
       self.assertAllCloseAccordingToType(
           np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
           self.evaluate(slot1))
@@ -85,8 +87,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       # Step 3: momentum update that does not change anything.
       self.evaluate(mom_update_no_action)
       # Check that the momentum accumulators have *NOT* been updated.
-      self.assertAllCloseAccordingToType(
-          np.array([0.1, 0.1]), self.evaluate(slot0))
+      self.assertAllCloseAccordingToType(np.array([0.1, 0.1]),
+                                         self.evaluate(slot0))
       self.assertAllCloseAccordingToType(
           np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]),
           self.evaluate(slot1))
@@ -103,9 +105,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
       grads3 = None
       varlist = [var0, var1, var2, var3]
       gradients = [grads0, grads1, grads2, grads3]
-      clipped_gradvars, global_norm = (
-          multitask_optimizer_wrapper.clip_gradients_by_global_norm(
-              six.moves.zip(gradients, varlist), clip_norm=1.0))
+      clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm(
+          six.moves.zip(gradients, varlist), clip_norm=1.0)
       clipped_grads = list(six.moves.zip(*clipped_gradvars))[0]
       reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0])))
       self.assertAllCloseAccordingToType(
@@ -114,6 +115,5 @@ class MultitaskOptimizerWrapperTest(test.TestCase):
           self.evaluate(clipped_grads[2]), np.array([0., 0.]))
       self.assertEqual(clipped_grads[3], None)
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index f130a2187c..16b6d145e3 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -24,7 +24,6 @@ import numpy as np
 
 from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.rnn.python.ops import core_rnn_cell
-from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -39,6 +38,9 @@ from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import test
+from tensorflow.python.framework import test_util
+from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell
+
 
 
 # pylint: enable=protected-access
@@ -372,20 +374,19 @@ class RNNCellTest(test.TestCase):
         h = array_ops.zeros([batch_size, num_proj])
         state = rnn_cell_impl.LSTMStateTuple(c, h)
         cell = contrib_rnn_cell.LayerNormLSTMCell(
-            num_units=num_units,
-            num_proj=num_proj,
-            forget_bias=1.0,
-            layer_norm=True,
-            norm_gain=1.0,
-            norm_shift=0.0)
+          num_units=num_units,
+          num_proj=num_proj,
+          forget_bias=1.0,
+          layer_norm=True,
+          norm_gain=1.0,
+          norm_shift=0.0)
         g, out_m = cell(x, state)
         sess.run([variables_lib.global_variables_initializer()])
-        res = sess.run(
-            [g, out_m], {
-                x.name: np.ones((batch_size, input_size)),
-                c.name: 0.1 * np.ones((batch_size, num_units)),
-                h.name: 0.1 * np.ones((batch_size, num_proj))
-            })
+        res = sess.run([g, out_m], {
+          x.name: np.ones((batch_size, input_size)),
+          c.name: 0.1 * np.ones((batch_size, num_units)),
+          h.name: 0.1 * np.ones((batch_size, num_proj))
+        })
         self.assertEqual(len(res), 2)
         # The numbers in results were not calculated, this is mostly just a
         # smoke test.
@@ -395,9 +396,9 @@ class RNNCellTest(test.TestCase):
         # Different inputs so different outputs and states
         for i in range(1, batch_size):
           self.assertTrue(
-              float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
+            float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6)
           self.assertTrue(
-              float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
+            float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6)
 
   def testOutputProjectionWrapper(self):
     with self.test_session() as sess:
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
index 46823fa364..b4a5f2d7eb 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
@@ -996,19 +996,26 @@ class RNNCellTest(test.TestCase):
         output, state = cell(x, hidden)
 
         sess.run([variables.global_variables_initializer()])
-        res = sess.run(
-            [output, state], {
-                hidden[0].name:
-                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
-                        1.
-                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]],
-                                 [[[2.], [2.]], [[2.], [2.]]]]]),
-                x.name:
-                    np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[
-                        1.
-                    ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]],
-                                                                [[2.], [2.]]]]])
-            })
+        res = sess.run([output, state], {
+            hidden[0].name:
+                np.array([[[[[1.],[1.]], 
+                            [[1.],[1.]]],
+                           [[[1.],[1.]],
+                            [[1.],[1.]]]], 
+                          [[[[2.],[2.]],
+                            [[2.],[2.]]],
+                           [[[2.],[2.]],
+                            [[2.],[2.]]]]]),
+            x.name:
+                np.array([[[[[1.],[1.]],
+                            [[1.],[1.]]],
+                           [[[1.],[1.]],
+                            [[1.],[1.]]]],
+                          [[[[2.],[2.]],
+                            [[2.],[2.]]],
+                           [[[2.],[2.]],
+                           [[2.],[2.]]]]])
+        })
         # This is a smoke test, making sure expected values are unchanged.
         self.assertEqual(len(res), 2)
         self.assertAllClose(res[0], res[1].h)
@@ -1269,8 +1276,10 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         self.assertAllClose(res[2].c, expected_c1, 1e-5)
         self.assertAllClose(res[2].h, expected_h1, 1e-5)
 
+
   def testBasicLSTMCellWithStateTupleLayerNorm(self):
-    """The results of LSTMCell and LayerNormBasicLSTMCell should be the same."""
+    """The results of LSTMCell and LayerNormBasicLSTMCell 
+    should be same. """
     with self.test_session() as sess:
       with variable_scope.variable_scope(
           "root", initializer=init_ops.constant_initializer(0.5)):
@@ -1281,21 +1290,21 @@ class LayerNormBasicLSTMCellTest(test.TestCase):
         c1 = array_ops.zeros([1, 2])
         h1 = array_ops.zeros([1, 2])
         state1 = rnn_cell_impl.LSTMStateTuple(c1, h1)
-        cell = rnn_cell_impl.MultiRNNCell([
-            contrib_rnn_cell.LayerNormLSTMCell(
-                2, layer_norm=True, norm_gain=1.0, norm_shift=0.0)
-            for _ in range(2)
-        ])
+        cell = rnn_cell_impl.MultiRNNCell(
+          [contrib_rnn_cell.LayerNormLSTMCell(
+              2,
+              layer_norm=True,
+              norm_gain=1.0,
+              norm_shift=0.0) for _ in range(2)])
         h, (s0, s1) = cell(x, (state0, state1))
         sess.run([variables.global_variables_initializer()])
-        res = sess.run(
-            [h, s0, s1], {
-                x.name: np.array([[1., 1.]]),
-                c0.name: 0.1 * np.asarray([[0, 1]]),
-                h0.name: 0.1 * np.asarray([[2, 3]]),
-                c1.name: 0.1 * np.asarray([[4, 5]]),
-                h1.name: 0.1 * np.asarray([[6, 7]]),
-            })
+        res = sess.run([h, s0, s1], {
+          x.name: np.array([[1., 1.]]),
+          c0.name: 0.1 * np.asarray([[0, 1]]),
+          h0.name: 0.1 * np.asarray([[2, 3]]),
+          c1.name: 0.1 * np.asarray([[4, 5]]),
+          h1.name: 0.1 * np.asarray([[6, 7]]),
+        })
 
         expected_h = np.array([[-0.38079708, 0.38079708]])
         expected_h0 = np.array([[-0.38079708, 0.38079708]])
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index 0698d40438..5e85c125df 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope as vs
+from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
@@ -114,7 +115,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
 
   The class uses optional peep-hole connections, and an optional projection
   layer.
-
+  
   Layer normalization implementation is based on:
 
     https://arxiv.org/abs/1607.06450.
@@ -123,24 +124,15 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
   Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
 
   and is applied before the internal nonlinearities.
-
+  
   """
 
-  def __init__(self,
-               num_units,
-               use_peepholes=False,
-               initializer=None,
-               num_proj=None,
-               proj_clip=None,
-               num_unit_shards=1,
-               num_proj_shards=1,
-               forget_bias=1.0,
-               state_is_tuple=True,
-               activation=math_ops.tanh,
-               reuse=None,
-               layer_norm=False,
-               norm_gain=1.0,
-               norm_shift=0.0):
+  def __init__(self, num_units, use_peepholes=False,
+               initializer=None, num_proj=None, proj_clip=None,
+               num_unit_shards=1, num_proj_shards=1,
+               forget_bias=1.0, state_is_tuple=True,
+               activation=math_ops.tanh, reuse=None,
+               layer_norm=False, norm_gain=1.0, norm_shift=0.0):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -172,6 +164,8 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell):
         `layer_norm` has been set to `False`, this argument will be ignored.
       norm_shift: float, The layer normalization shift initial value. If
         `layer_norm` has been set to `False`, this argument will be ignored.
+        
+        
     """
     super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse)
     if not state_is_tuple:
@@ -2055,8 +2049,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell):
     if self._skip_connection:
       self._total_output_channels += self._input_shape[-1]
 
-    state_size = tensor_shape.TensorShape(
-        self._input_shape[:-1] + [self._output_channels])
+    state_size = tensor_shape.TensorShape(self._input_shape[:-1] 
+                                          + [self._output_channels])
     self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size)
     self._output_size = tensor_shape.TensorShape(self._input_shape[:-1]
                                                  + [self._total_output_channels])
@@ -2116,8 +2110,11 @@ class Conv3DLSTMCell(ConvLSTMCell):
     """Construct Conv3DLSTM. See `ConvLSTMCell` for more details."""
     super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs)
 
-
-def _conv(args, filter_size, num_features, bias, bias_start=0.0):
+def _conv(args, 
+          filter_size,
+          num_features,
+          bias,
+          bias_start=0.0):
   """convolution:
   Args:
     args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, 
@@ -2394,19 +2391,12 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
   """
 
-  def __init__(self,
-               num_units,
-               use_peepholes=False,
-               cell_clip=None,
-               initializer=None,
-               num_proj=None,
-               proj_clip=None,
+  def __init__(self, num_units,
+               use_peepholes=False, cell_clip=None,
+               initializer=None, num_proj=None, proj_clip=None,
                forget_bias=1.0,
-               activation=None,
-               layer_norm=False,
-               norm_gain=1.0,
-               norm_shift=0.0,
-               reuse=None):
+               activation=None, layer_norm=False,
+               norm_gain=1.0, norm_shift=0.0, reuse=None):
     """Initialize the parameters for an LSTM cell.
 
     Args:
@@ -2467,6 +2457,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
   def output_size(self):
     return self._output_size
 
+
   def _linear(self,
               args,
               output_size,
@@ -2516,9 +2507,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
     scope = vs.get_variable_scope()
     with vs.variable_scope(scope) as outer_scope:
       weights = vs.get_variable(
-          "kernel", [total_arg_size, output_size],
-          dtype=dtype,
-          initializer=kernel_initializer)
+        "kernel", [total_arg_size, output_size],
+        dtype=dtype,
+        initializer=kernel_initializer)
       if len(args) == 1:
         res = math_ops.matmul(args[0], weights)
       else:
@@ -2530,7 +2521,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
         if bias_initializer is None:
           bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype)
         biases = vs.get_variable(
-            "bias", [output_size], dtype=dtype, initializer=bias_initializer)
+          "bias", [output_size],
+          dtype=dtype,
+          initializer=bias_initializer)
 
     if not layer_norm:
       res = nn_ops.bias_add(res, biases)
@@ -2561,6 +2554,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
       ValueError: If input size cannot be inferred from inputs via
         static shape inference.
     """
+    num_proj = self._num_units if self._num_proj is None else self._num_proj
     sigmoid = math_ops.sigmoid
 
     (c_prev, m_prev) = state
@@ -2573,14 +2567,10 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
     with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
 
       # i = input_gate, j = new_input, f = forget_gate, o = output_gate
-      lstm_matrix = self._linear(
-          [inputs, m_prev],
-          4 * self._num_units,
-          bias=True,
-          bias_initializer=None,
-          layer_norm=self._layer_norm)
+      lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True,
+                            bias_initializer=None, layer_norm=self._layer_norm)
       i, j, f, o = array_ops.split(
-          value=lstm_matrix, num_or_size_splits=4, axis=1)
+        value=lstm_matrix, num_or_size_splits=4, axis=1)
 
       if self._layer_norm:
         i = _norm(self._norm_gain, self._norm_shift, i, "input")
@@ -2590,22 +2580,20 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
 
       # Diagonal connections
       if self._use_peepholes:
-        with vs.variable_scope(unit_scope):
+        with vs.variable_scope(unit_scope) as projection_scope:
           w_f_diag = vs.get_variable(
-              "w_f_diag", shape=[self._num_units], dtype=dtype)
+            "w_f_diag", shape=[self._num_units], dtype=dtype)
           w_i_diag = vs.get_variable(
-              "w_i_diag", shape=[self._num_units], dtype=dtype)
+            "w_i_diag", shape=[self._num_units], dtype=dtype)
           w_o_diag = vs.get_variable(
-              "w_o_diag", shape=[self._num_units], dtype=dtype)
+            "w_o_diag", shape=[self._num_units], dtype=dtype)
 
       if self._use_peepholes:
-        c = (
-            sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
-            sigmoid(i + w_i_diag * c_prev) * self._activation(j))
+        c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev +
+             sigmoid(i + w_i_diag * c_prev) * self._activation(j))
       else:
-        c = (
-            sigmoid(f + self._forget_bias) * c_prev +
-            sigmoid(i) * self._activation(j))
+        c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) *
+             self._activation(j))
 
       if self._layer_norm:
         c = _norm(self._norm_gain, self._norm_shift, c, "state")
@@ -2620,7 +2608,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
         m = sigmoid(o) * self._activation(c)
 
       if self._num_proj is not None:
-        with vs.variable_scope("projection"):
+        with vs.variable_scope("projection") as proj_scope:
           m = self._linear(m, self._num_proj, bias=False)
 
         if self._proj_clip is not None:
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index e87ef41388..c3b180d9f4 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -192,8 +192,7 @@ class _BaseAttentionMechanism(AttentionMechanism):
       raise TypeError("probability_fn must be callable, saw type: %s" %
                       type(probability_fn).__name__)
     if score_mask_value is None:
-      score_mask_value = dtypes.as_dtype(
-          self._memory_layer.dtype).as_numpy_dtype(-np.inf)
+      score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf)
     self._probability_fn = lambda score, prev: (  # pylint:disable=g-long-lambda
         probability_fn(
             _maybe_mask_score(score, memory_sequence_length, score_mask_value),
@@ -1146,9 +1145,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
             % (len(attention_layer_sizes), len(attention_mechanisms)))
       self._attention_layers = tuple(
           layers_core.Dense(
-              attention_layer_size,
-              name="attention_layer",
-              use_bias=False,
+              attention_layer_size, name="attention_layer", use_bias=False,
               dtype=attention_mechanisms[i].dtype)
           for i, attention_layer_size in enumerate(attention_layer_sizes))
       self._attention_layer_size = sum(attention_layer_sizes)
diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc
index ac8d994502..331943a3ef 100644
--- a/tensorflow/contrib/verbs/rdma.cc
+++ b/tensorflow/contrib/verbs/rdma.cc
@@ -16,8 +16,8 @@ limitations under the License.
 #ifdef TENSORFLOW_USE_VERBS
 
 #include "tensorflow/contrib/verbs/rdma.h"
-#include <fcntl.h>
 #include <cstdlib>
+#include <fcntl.h>
 #include "tensorflow/contrib/verbs/verbs_util.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
@@ -137,7 +137,7 @@ ibv_device* set_device() {
   if (!env_p_rdma_device.empty()) {
     for (device_index = 0; device_index < dev_num; device_index++) {
       if (!env_p_rdma_device.compare(
-              ibv_get_device_name(dev_list[device_index]))) {
+               ibv_get_device_name(dev_list[device_index]))) {
         CHECK(get_dev_active_port_count(dev_list[device_index]) != 0)
             << "Device " << ibv_get_device_name(dev_list[device_index])
             << " has no active ports";
@@ -147,7 +147,7 @@ ibv_device* set_device() {
     // check validity of input device
     CHECK(false) << "The device " << env_p_rdma_device << " wasn't found";
   } else {
-    // set default device
+  // set default device
     str_port_num = get_env_var("RDMA_DEVICE_PORT");
     CHECK(str_port_num.empty())
         << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user";
@@ -177,7 +177,7 @@ ibv_device* set_device() {
 // Returns:
 //   port to use
 uint8_t set_port(ibv_context* context) {
-  uint8_t port_num = 0;  // 0 is illegal port number
+  uint8_t port_num = 0; //0 is illegal port number
   string str_port_num;
   ibv_device_attr device_att;
   ibv_port_attr port_attr;
@@ -199,7 +199,9 @@ uint8_t set_port(ibv_context* context) {
     // check if port id active
     CHECK(port_attr.state == IBV_PORT_ACTIVE)
         << "Selected RDMA_DEVICE_PORT is not active";
-  } else {  // set default port
+  }
+  // set default port
+  else {
     for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) {
       rc = ibv_query_port(context, port_index, &port_attr);
       CHECK(!rc) << "Failed to query the port" << port_index;
@@ -267,7 +269,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num,
 // Function to set GID index.
 // If the port link is IB, no GID index should be selected.
 // If Ethernet but RDMA_GID_INDEX not set gid index that supports
-//   RoCE V2 will be chosen(fails if more than one IP is configured)
+//   RoCE V2 will be chosen(fails if more then one IP is configured)
 // Args:
 //   context - device context
 //   port_num - port number
@@ -300,7 +302,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) {
     }
   }
   switch (port_attr.link_layer) {
-    case (IBV_LINK_LAYER_ETHERNET):
+    case(IBV_LINK_LAYER_ETHERNET) :
       gid_str = get_env_var("RDMA_GID_INDEX");
       if (!gid_str.empty()) {
         gid_index = stoi(gid_str);
@@ -311,7 +313,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) {
             << "More than one IP is available, please specify GID_INDEX";
       }
       break;
-    case (IBV_LINK_LAYER_INFINIBAND):  // no need in GID index
+    case(IBV_LINK_LAYER_INFINIBAND) :  // no need in GID index
       break;
     default:
       LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and "
@@ -372,8 +374,7 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) {
         break;
       default:
         CHECK(0) << "Error: MTU input value must be one of the following: 256, "
-                    "512, 1024, 2048, 4096. MTU "
-                 << mtu << " is invalid\n";
+                    "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n";
         break;
     }
     CHECK(mtu < port_attr.active_mtu)
@@ -452,9 +453,9 @@ void RdmaAdapter::Process_CQ() {
     CHECK_GE(ne, 0);
     for (int i = 0; i < ne; ++i) {
       CHECK(wc_[i].status == IBV_WC_SUCCESS)
-          << "Failed status \n"
-          << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " "
-          << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err;
+          << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " "
+          << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "
+          << wc_[i].vendor_err;
       if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
         RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id);
         // put back a recv wr.
@@ -610,7 +611,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name,
   // create message and ack buffers, then initialize the tables.
   {
     const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer",
-                                   "tx_ack_buffer", "rx_ack_buffer"};
+                                   "tx_ack_buffer",     "rx_ack_buffer"};
     tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]);
     rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]);
     tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]);
@@ -671,7 +672,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) {
 void RdmaChannel::Recv() {
   struct ibv_recv_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   struct ibv_recv_wr* bad_wr;
   CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv";
 }
@@ -825,11 +826,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class;
 
     int r;
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
-                                  IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
-                                  IBV_QP_MAX_DEST_RD_ATOMIC |
-                                  IBV_QP_MIN_RNR_TIMER)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV |
+                                              IBV_QP_PATH_MTU |
+                                              IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
+                                              IBV_QP_MAX_DEST_RD_ATOMIC |
+                                              IBV_QP_MIN_RNR_TIMER)))
         << "QP to Ready to Receive " << r;
 
     memset(&attr, 0, sizeof(ibv_qp_attr));
@@ -840,10 +841,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) {
     attr.rnr_retry = 7; /* infinite */
     attr.max_rd_atomic = 1;
 
-    CHECK(!(r = ibv_modify_qp(qp_, &attr,
-                              IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
-                                  IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
-                                  IBV_QP_MAX_QP_RD_ATOMIC)))
+    CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT |
+                                              IBV_QP_RETRY_CNT |
+                                              IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN |
+                                              IBV_QP_MAX_QP_RD_ATOMIC)))
         << "QP to Ready to Send " << r;
 
     connected_ = true;
@@ -930,7 +931,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) {
 
   struct ibv_send_wr wr;
   memset(&wr, 0, sizeof(wr));
-  wr.wr_id = (uint64_t)this;
+  wr.wr_id = (uint64_t) this;
   wr.sg_list = &list;
   wr.num_sge = 1;
   wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
@@ -1025,9 +1026,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
     TensorProto proto;
     if (src_dev->tensorflow_gpu_device_info() &&
         (!send_args.alloc_attrs.on_host())) {
-      CHECK(send_args.device_context)
-          << "send dev name: " << src_dev->name()
-          << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
+      CHECK(send_args.device_context) << "send dev name: " << src_dev->name()
+                                      << " gpu_info: "
+                                      << src_dev->tensorflow_gpu_device_info();
 
       if (can_memcpy) {
         AllocatorAttributes host_alloc_attrs;
@@ -1053,8 +1054,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback(
         // aync instead
         GPUUtil::SetProtoFromGPU(
             in, src_dev, send_args.device_context, &proto, is_dead,
-            [this, proto, buffer_size, key, in, step_id, key_with_step_id,
-             is_dead, send_args, recv_args](const Status& s) mutable {
+	    [this, proto, buffer_size, key, in, step_id, key_with_step_id,
+            is_dead, send_args, recv_args](const Status& s) mutable {
               CHECK(s.ok()) << "copy proto from gpu sync";
               auto tensor_bytes = proto.ByteSize();
               buffer_size += tensor_bytes;
diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
deleted file mode 100644
index cd7ec6e551..0000000000
--- a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt
+++ /dev/null
@@ -1,47 +0,0 @@
-op {
-  graph_op_name: "UniqueV2"
-  in_arg {
-    name: "x"
-    description: <<END
-A `Tensor`.
-END
-  }
-  in_arg {
-    name: "axis"
-    description: <<END
-A `Tensor` of type `int64` (default: 0). The axis of the Tensor to
-find the unique elements.
-END
-  }
-  out_arg {
-    name: "y"
-    description: <<END
-A `Tensor`. Unique elements along the `axis` of `Tensor` x.
-END
-  }
-  out_arg {
-    name: "idx"
-    description: <<END
-A 1-D Tensor. Has the same type as x that contains the index of each
-value of x in the output y.
-END
-  }
-  summary: "Finds unique elements in a 1-D tensor."
-  description: <<END
-This operation returns a tensor `y` containing all of the unique elements of `x`
-sorted in the same order that they occur in `x`. This operation also returns a
-tensor `idx` the same size as `x` that contains the index of each value of `x`
-in the unique output `y`. In other words:
-
-`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-
-For example:
-
-```
-# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-y, idx = unique(x)
-y ==> [1, 2, 4, 7, 8]
-idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-```
-END
-}
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
index 77a96d1e03..0a3355cdbc 100644
--- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt
@@ -26,8 +26,6 @@ need not be sorted and need not cover all values in the full
 range of valid values.
 
 If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-If the given segment ID `i` is negative, the value is dropped and will not be
-added to the sum of the segment.
 
 `num_segments` should equal the number of distinct segment IDs.
 
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 223dd12f8f..d0dba6e1f0 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -455,7 +455,7 @@ class Graph {
   // the corresponding NodeDef to reflect the change.
   // REQUIRES: The control edge must exist.
   void RemoveControlEdge(const Edge* e);
-
+  
   // Updates the input to a node.  The existing edge to `dst` is removed and an
   // edge from `new_src` to `dst` is created. The NodeDef associated with `dst`
   // is also updated.
diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index e2ce0ba046..2aa1b31e15 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -118,9 +118,11 @@ class GraphTest : public ::testing::Test {
     LOG(FATAL) << name;
   }
 
-  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) {
-    for (const Edge* e : dst->in_edges()) {
-      if (e->IsControlEdge() && e->src() == src &&
+  bool ControlEdgeExistsInGraphOrNodeDef(const Node* src,
+                                         const Node* dst) {
+    for (const Edge *e : dst->in_edges()) {
+      if (e->IsControlEdge() &&
+          e->src() == src &&
           e->src_output() == Graph::kControlSlot &&
           e->dst_input() == Graph::kControlSlot) {
         return true;
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index b4a5a3c796..f1cb9a1860 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1720,7 +1720,6 @@ tf_cuda_cc_tests(
         ":data_flow",
         ":ops_testutil",
         ":ops_util",
-        "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc
index 890fa3121b..766d63e3be 100644
--- a/tensorflow/core/kernels/bincount_op.cc
+++ b/tensorflow/core/kernels/bincount_op.cc
@@ -97,9 +97,8 @@ class BincountOp : public OpKernel {
     const Tensor& weights_t = ctx->input(2);
 
     int32 size = size_tensor.scalar<int32>()();
-    OP_REQUIRES(
-        ctx, size >= 0,
-        errors::InvalidArgument("size (", size, ") must be non-negative"));
+    OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument(
+                                    "size (", size, ") must be non-negative"));
 
     const auto arr = arr_t.flat<int32>();
     const auto weights = weights_t.flat<T>();
diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h
index cd3d560cd1..0f8dd2b82a 100644
--- a/tensorflow/core/kernels/bincount_op.h
+++ b/tensorflow/core/kernels/bincount_op.h
@@ -16,11 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_BINCOUNT_OP_H_
 #define TENSORFLOW_BINCOUNT_OP_H_
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 namespace tensorflow {
 
diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
index 6074b3e1f6..ae9e26ffdf 100644
--- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc
@@ -17,12 +17,12 @@ limitations under the License.
 
 #define EIGEN_USE_GPU
 
+#include "tensorflow/core/kernels/bincount_op.h"
 #include "external/cub_archive/cub/device/device_histogram.cuh"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/kernels/bincount_op.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/cuda_kernel_helper.h"
@@ -93,8 +93,8 @@ struct BincountFunctor<GPUDevice, T> {
         /* num_samples */ num_samples,
         /* stream */ stream);
     if (err != cudaSuccess) {
-      return errors::Internal(
-          "Could not launch HistogramEven: ", cudaGetErrorString(err), ".");
+      return errors::Internal("Could not launch HistogramEven: ",
+                              cudaGetErrorString(err), ".");
     }
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc
index cb04b40637..14becc87a7 100644
--- a/tensorflow/core/kernels/bincount_op_test.cc
+++ b/tensorflow/core/kernels/bincount_op_test.cc
@@ -30,8 +30,8 @@ static Graph* Bincount(int arr_size, int nbins) {
   Tensor arr(DT_INT32, TensorShape({arr_size}));
   arr.flat<int32>() = arr.flat<int32>().setRandom().abs();
 
-  Tensor size(DT_INT32, TensorShape({static_cast<int32>(1)}));
-  size.flat<int32>()(0) = static_cast<int32>(nbins);
+  Tensor size(DT_INT32, TensorShape({(int32)1}));
+  size.flat<int32>()(0) = (int32)nbins;
 
   Tensor weights(DT_INT32, TensorShape({0}));
 
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
index 325dee793b..aafbbe41b4 100644
--- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -77,10 +77,10 @@ struct BucketizeFunctor<GPUDevice, T> {
     TF_RETURN_IF_ERROR(boundaries_array.Finalize());
 
     CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
-    BucketizeCustomKernel<T>
-        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-            input.size(), input.data(), boundaries_vector.size(),
-            boundaries_array.data(), output.data());
+    BucketizeCustomKernel<
+        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+        input.size(), input.data(), boundaries_vector.size(),
+        boundaries_array.data(), output.data());
 
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index c2d24d1f12..f819fccbfb 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1101,27 +1101,29 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
   bool cudnn_use_autotune_;
 };
 
+
+
 #define REGISTER_GPU_KERNEL(T)                                                \
   REGISTER_KERNEL_BUILDER(                                                    \
       Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"),  \
       Conv3DBackpropInputOp<GPUDevice, T>);                                   \
   REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2")                       \
-                              .Device(DEVICE_GPU)                             \
-                              .TypeConstraint<T>("T")                         \
-                              .HostMemory("input_sizes"),                     \
-                          Conv3DBackpropInputOp<GPUDevice, T>);               \
+                            .Device(DEVICE_GPU)                               \
+                            .TypeConstraint<T>("T")                           \
+                            .HostMemory("input_sizes"),                       \
+                        Conv3DBackpropInputOp<GPUDevice, T>);                 \
   REGISTER_KERNEL_BUILDER(                                                    \
-      Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-      Conv3DBackpropFilterOp<GPUDevice, T>);                                  \
+    Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"),   \
+    Conv3DBackpropFilterOp<GPUDevice, T>);                                    \
   REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2")                      \
-                              .Device(DEVICE_GPU)                             \
-                              .TypeConstraint<T>("T")                         \
-                              .HostMemory("filter_sizes"),                    \
-                          Conv3DBackpropFilterOp<GPUDevice, T>);
+                            .Device(DEVICE_GPU)                               \
+                            .TypeConstraint<T>("T")                           \
+                            .HostMemory("filter_sizes"),                      \
+                        Conv3DBackpropFilterOp<GPUDevice, T>);
 TF_CALL_half(REGISTER_GPU_KERNEL);
 TF_CALL_float(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
-
+     
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc
index a7673afd0b..8d44208aa7 100644
--- a/tensorflow/core/kernels/cwise_op_asinh.cc
+++ b/tensorflow/core/kernels/cwise_op_asinh.cc
@@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double,
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double);
diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc
index 7b688db4c5..bbc69e45aa 100644
--- a/tensorflow/core/kernels/cwise_op_atanh.cc
+++ b/tensorflow/core/kernels/cwise_op_atanh.cc
@@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double,
 
 #ifdef TENSORFLOW_USE_SYCL
 REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double);
-#endif  // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
 
 #if GOOGLE_CUDA
 REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double);
diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
index 9347978d51..53d65a22d1 100644
--- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc
@@ -231,8 +231,7 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args,
       }
       // Pad to vector-register width (if needed).
       for (int64 d = 0; d < pad_size; ++d) {
-        buffer[buf_base + vectorized_size + scalar_size + d] =
-            static_cast<T>(0);
+        buffer[buf_base + vectorized_size + scalar_size + d] = static_cast<T>(0);
       }
     }
   }
@@ -511,8 +510,7 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice,
-                                                          Eigen::half>;
+extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>;
 
@@ -887,8 +885,7 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args,
 
 #if GOOGLE_CUDA
 
-extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice,
-                                                           Eigen::half>;
+extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>;
 extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>;
 
diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc
index 30ecd0c2ba..2759ecb2f1 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.cc
+++ b/tensorflow/core/kernels/depthwise_conv_op.cc
@@ -427,11 +427,6 @@ TF_CALL_double(REGISTER_CPU_KERNEL);
 #endif
 
 #if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Eigen::half>("T"),
-                        DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
-
 REGISTER_KERNEL_BUILDER(
     Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"),
     DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>);
diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h
index 097a9f5bfa..11aed5b415 100644
--- a/tensorflow/core/kernels/depthwise_conv_op.h
+++ b/tensorflow/core/kernels/depthwise_conv_op.h
@@ -158,8 +158,7 @@ struct DepthwiseFilterPadOp {
       }
       // Pad the remainder of output to vector-register boundary.
       for (int64 j = 0; j < pad_size; ++j) {
-        padded_filter[output_base + vectorized_size + scalar_size + j] =
-            static_cast<T>(0);
+        padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast<T>(0);
       }
     }
   }
diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc
index d8bdb700e6..157ce106ce 100644
--- a/tensorflow/core/kernels/maxpooling_op.cc
+++ b/tensorflow/core/kernels/maxpooling_op.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "tensorflow/core/kernels/maxpooling_op.h"
 
 #include <vector>
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
@@ -38,6 +37,7 @@ limitations under the License.
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
 #include "tensorflow/core/util/use_cudnn.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #if GOOGLE_CUDA
 #include "tensorflow/core/kernels/maxpooling_op_gpu.h"
@@ -359,8 +359,7 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
 
     use_dnn_ = CanUseCudnn();
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -889,8 +888,7 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
                 errors::Unimplemented(
                     "Pooling is not yet supported on the batch dimension."));
 
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1054,8 +1052,7 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel {
                     "Pooling is not yet supported on the batch dimension."));
     use_dnn_ = CanUseCudnn();
 
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
@@ -1140,8 +1137,7 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel {
     }
     OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_));
     use_dnn_ = CanUseCudnn();
-    TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false,
-                                   &propagate_nans_));
+    ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_);
   }
 
   void Compute(OpKernelContext* context) override {
diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
index f8daaca4c9..d96b844383 100644
--- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc
@@ -405,17 +405,17 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()(
   if (propagate_nans) {
     MaxPoolForwardNHWC<true>
         <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>(
-            output_size, bottom_data, height, width, channels, pooled_height,
-            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-            top_data, mask);
+           kThreadsPerBlock, 0, d.stream()>>>
+        (output_size, bottom_data, height, width, channels, pooled_height,
+         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+         top_data, mask);
   } else {
     MaxPoolForwardNHWC<false>
         <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock,
-           kThreadsPerBlock, 0, d.stream()>>>(
-            output_size, bottom_data, height, width, channels, pooled_height,
-            pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
-            top_data, mask);
+           kThreadsPerBlock, 0, d.stream()>>>
+        (output_size, bottom_data, height, width, channels, pooled_height,
+         pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l,
+         top_data, mask);
   }
   return d.ok();
 }
diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h
index c4d5a45d3c..0a5be4fec9 100644
--- a/tensorflow/core/kernels/mkl_tfconv_op.h
+++ b/tensorflow/core/kernels/mkl_tfconv_op.h
@@ -101,8 +101,8 @@ class MklToTfOp : public OpKernel {
       // Allocate output tensor.
       TensorShape output_shape = input_shape.GetTfShape();
       Tensor* output_tensor = NULL;
-      OP_REQUIRES_OK(context, context->allocate_output(
-                                  input_number, output_shape, &output_tensor));
+      OP_REQUIRES_OK(context, context->allocate_output(input_number,
+                                  output_shape, &output_tensor));
       CHECK_NOTNULL(output_tensor);
 
       // Do we need to reorder Mkl layout into TensorFlow layout?
@@ -116,13 +116,13 @@ class MklToTfOp : public OpKernel {
         // If not, just forward input tensor to output tensor.
         CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
       }
-    } catch (mkldnn::error& e) {
+    } catch (mkldnn::error &e) {
       string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + std::string(e.message) + ", in file " +
-                         std::string(__FILE__) + ":" + std::to_string(__LINE__);
-      OP_REQUIRES_OK(
-          context,
-          errors::Aborted("Operation received an exception:", error_msg));
+                       ", message: " + std::string(e.message) +
+                       ", in file " + std::string(__FILE__) + ":" +
+                       std::to_string(__LINE__);
+      OP_REQUIRES_OK(context,
+        errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 #else
@@ -160,8 +160,8 @@ class MklToTfOp : public OpKernel {
 
     // Allocate output tensor.
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape,
-                                                     &output_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(input_number,
+                              output_shape, &output_tensor));
 
     dnnLayout_t output_layout =
         static_cast<dnnLayout_t>(input_shape.GetTfLayout());
diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h
index 93ef512778..d3d1b56c9d 100644
--- a/tensorflow/core/kernels/ops_util.h
+++ b/tensorflow/core/kernels/ops_util.h
@@ -98,19 +98,6 @@ gtl::InlinedVector<T, 8> ComputeStride(const TensorShape& shape) {
   return strides;
 }
 
-// Helper to compute 'strides' given an Eigen TensorDimensions
-template <typename T, typename EigenDimensions>
-gtl::InlinedVector<T, 8> ComputeEigenStrides(const EigenDimensions& shape) {
-  const int ndims = shape.rank();
-  gtl::InlinedVector<T, 8> strides(ndims);
-  T stride = 1;
-  for (int i = ndims - 1; i >= 0; --i) {
-    strides[i] = stride;
-    stride *= static_cast<T>(shape[i]);
-  }
-  return strides;
-}
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_OPS_UTIL_H_
diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc
index cda6d7d8f9..f8b0285c50 100644
--- a/tensorflow/core/platform/posix/error.cc
+++ b/tensorflow/core/platform/posix/error.cc
@@ -131,8 +131,8 @@ error::Code ErrnoToCode(int err_number) {
     case ENETUNREACH:   // Network unreachable
     case ENOLCK:        // No locks available
     case ENOLINK:       // Link has been severed
-#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \
-      defined(__HAIKU__))
+#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \
+	|| defined(__HAIKU__))
     case ENONET:  // Machine is not on the network
 #endif
       code = error::UNAVAILABLE;
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 614ee00b01..09f69a95c1 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #ifdef TF_USE_SNAPPY
 #include "snappy.h"
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
-    defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
+	|| defined(__HAIKU__)
 #include <thread>
 #endif
 
@@ -62,8 +62,8 @@ int NumSchedulableCPUs() {
   }
   perror("sched_getaffinity");
 #endif
-#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
-    defined(__HAIKU__)
+#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \
+	|| defined(__HAIKU__)
   unsigned int count = std::thread::hardware_concurrency();
   if (count > 0) return static_cast<int>(count);
 #endif
diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h
index cf11f419a4..8fa0dfbed9 100644
--- a/tensorflow/core/util/cuda_kernel_helper.h
+++ b/tensorflow/core/util/cuda_kernel_helper.h
@@ -752,12 +752,6 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value,
   return __shfl_down_sync(mask, value, delta, width);
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown(
-    unsigned mask, Eigen::half value, int delta, int width = warpSize) {
-  return Eigen::half(
-      __shfl_down_sync(mask, static_cast<uint16>(value), delta, width));
-}
-
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
@@ -780,12 +774,6 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value,
   return __shfl_xor_sync(mask, value, laneMask, width);
 }
 
-__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor(
-    unsigned mask, Eigen::half value, int laneMask, int width = warpSize) {
-  return Eigen::half(
-      __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width));
-}
-
 // Variant of the (undocumented) version from the CUDA SDK, but using unsigned
 // instead of float for lo and hi (which is incorrect with ftz, for example).
 // A bug has been filed with NVIDIA and will be fixed in the next CUDA release.
diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h
index 148c7851bd..118ff0d0d6 100644
--- a/tensorflow/core/util/mkl_util.h
+++ b/tensorflow/core/util/mkl_util.h
@@ -24,25 +24,25 @@ limitations under the License.
 #include "mkl_dnn_types.h"
 #include "mkl_service.h"
 #include "mkl_trans.h"
-#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
-#include "tensorflow/core/graph/mkl_graph_util.h"
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/util/padding.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/graph/mkl_graph_util.h"
 
 #ifdef INTEL_MKL_DNN
 #include "mkldnn.hpp"
 
-using mkldnn::engine;
 using mkldnn::memory;
-using mkldnn::padding_kind;
-using mkldnn::primitive;
 using mkldnn::reorder;
+using mkldnn::primitive;
+using mkldnn::padding_kind;
+using mkldnn::engine;
 #endif
 
 // The file contains a number of utility classes and functions used by MKL
@@ -56,14 +56,8 @@ namespace tensorflow {
 // Tensorflow tensor.
 
 typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims;
-typedef enum {
-  Dim_N = 0,
-  Dim_C = 1,
-  Dim_H = 2,
-  Dim_W = 3,
-  Dim_O = 0,
-  Dim_I = 1
-} MklDnnDims;
+typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3,
+               Dim_O = 0, Dim_I = 1 } MklDnnDims;
 
 class MklShape {
  public:
@@ -242,7 +236,8 @@ class MklShape {
   (IS_MKL_TENSOR_OFFSET + sizeof(size_t))  // Location of dimension_
 // Location of sizes. Note dim is not used here, left here
 // to make macros consistent.
-#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t))
+#define SIZES_OFFSET(dims) \
+  (DIMS_OFFSET + sizeof(size_t))
 #define STRIDES_OFFSET(dims) \
   (SIZES_OFFSET(dims) + dims * sizeof(size_t))  // Location of strides
 #define MKL_LAYOUT_OFFSET(dims) \
@@ -337,7 +332,7 @@ class MklDnnShape {
     /// Number of dimensions in Tensorflow format
     size_t dimension_ = 0;
     /// Required by MKLDNN for conversions
-    mkldnn_dims_t sizes_;  // Required by MKL for conversions
+    mkldnn_dims_t sizes_;    // Required by MKL for conversions
     memory::format tf_data_format_ = memory::format::format_undef;
     memory::data_type T_ = memory::data_type::data_undef;
     // MKL layout
@@ -350,13 +345,15 @@ class MklDnnShape {
   typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t;
 #define INVALID_DIM_SIZE -1
 
+
  public:
   MklDnnShape() {
-    for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
-         ++i) {
+    for (size_t i = 0; i < sizeof(data_.sizes_) /
+                           sizeof(data_.sizes_[0]); ++i) {
       data_.sizes_[i] = -1;
     }
-    for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) {
+    for (size_t i = 0; i < sizeof(data_.map_) /
+                           sizeof(data_.map_[0]); ++i) {
       data_.map_[i] = -1;
     }
   }
@@ -372,26 +369,26 @@ class MklDnnShape {
   inline void SetDimensions(const size_t dimension) {
     data_.dimension_ = dimension;
   }
-  inline size_t GetDimension(char dimension) const {
+  inline size_t GetDimension(char dimension)const {
     int index = GetMklDnnTensorDimIndex(dimension);
     CHECK(index >= 0 && index < this->GetDimension())
         << "Invalid index from the dimension: " << index << ", " << dimension;
     return this->DimSize(index);
   }
 
-  inline int32 GetMklDnnTensorDimIndex(char dimension) const {
+  inline int32 GetMklDnnTensorDimIndex(char dimension)const {
     switch (dimension) {
-      case 'N':
-        return MklDnnDims::Dim_N;
-      case 'C':
-        return MklDnnDims::Dim_C;
-      case 'H':
-        return MklDnnDims::Dim_H;
-      case 'W':
-        return MklDnnDims::Dim_W;
-      default:
-        LOG(FATAL) << "Invalid dimension: " << dimension;
-        return -1;  // Avoid compiler warning about missing return value
+  case 'N':
+    return MklDnnDims::Dim_N;
+  case 'C':
+    return MklDnnDims::Dim_C;
+  case 'H':
+    return MklDnnDims::Dim_H;
+  case 'W':
+    return MklDnnDims::Dim_W;
+  default:
+    LOG(FATAL) << "Invalid dimension: " << dimension;
+    return -1;  // Avoid compiler warning about missing return value
     }
   }
 
@@ -406,9 +403,9 @@ class MklDnnShape {
     memory::dims retVal;
     if (data_.is_mkl_tensor_) {
       int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]);
-      for (size_t i = 0; i < dimensions; i++) {
+      for (size_t i = 0 ; i < dimensions; i++) {
         if (data_.sizes_[i] != INVALID_DIM_SIZE)
-          retVal.push_back(data_.sizes_[i]);
+        retVal.push_back(data_.sizes_[i]);
       }
     } else {
       CHECK_EQ(data_.is_mkl_tensor_, true);
@@ -417,7 +414,7 @@ class MklDnnShape {
   }
 
   inline int64 DimSize(int index) const {
-    CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0]));
+    CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0]));
     return data_.sizes_[index];
   }
 
@@ -454,7 +451,7 @@ class MklDnnShape {
   /// We don't create primitive_descriptor for TensorFlow layout now.
   /// We use lazy evaluation and create it only when needed.
   inline void SetTfLayout(size_t dims, const memory::dims& sizes,
-                          memory::format format) {
+                   memory::format format) {
     CHECK_EQ(dims, sizes.size());
     data_.dimension_ = dims;
     for (size_t ii = 0; ii < dims; ii++) {
@@ -500,7 +497,9 @@ class MklDnnShape {
     SetTfDimOrder(dimension, data_format);
   }
 
-  inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; }
+  inline const mkldnn_dim_t* GetTfToMklDimMap() const {
+    return &data_.map_[0];
+  }
   inline size_t TfDimIdx(int index) const { return data_.map_[index]; }
   inline int64 TfDimSize(int index) const {
     return data_.sizes_[TfDimIdx(index)];
@@ -554,7 +553,9 @@ class MklDnnShape {
 
   /// Size of buffer to hold the serialized object, the size is computed by
   /// following above mentioned order
-  inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); }
+  inline size_t GetSerializeBufferSize() const {
+    return sizeof(MklShapeData);
+  }
 
   void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const {
     CHECK(buf_size >= GetSerializeBufferSize())
@@ -565,12 +566,12 @@ class MklDnnShape {
   void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) {
     // Make sure buffer holds at least is_mkl_tensor_.
     CHECK(buf_size >= sizeof(data_.is_mkl_tensor_))
-        << "Buffer size is too small in DeSerializeMklDnnShape";
+      << "Buffer size is too small in DeSerializeMklDnnShape";
 
     const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf);
     if (is_mkl_tensor) {  // If it is an MKL Tensor then read the rest
       CHECK(buf_size >= GetSerializeBufferSize())
-          << "Buffer size is too small in DeSerializeMklDnnShape";
+        << "Buffer size is too small in DeSerializeMklDnnShape";
       data_ = *reinterpret_cast<const MklShapeData*>(buf);
     }
   }
@@ -659,7 +660,8 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) {
 }
 
 #ifdef INTEL_MKL_DNN
-inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) {
+inline void GetMklShape(OpKernelContext* ctext, int n,
+                        MklDnnShape* mklshape) {
   mklshape->DeSerializeMklDnnShape(
       ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs()))
           .flat<uint8>()
@@ -698,7 +700,8 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name,
 /// Get shape of input tensor pointed by 'input_idx' in TensorShape format.
 /// If the input tensor is in MKL layout, then obtains TensorShape from
 /// MklShape.
-inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) {
+inline TensorShape GetTfShape(OpKernelContext* context,
+                              size_t input_idx) {
   // Sanity check.
   CHECK_NOTNULL(context);
   CHECK_LT(input_idx, context->num_inputs());
@@ -818,7 +821,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
 
 template <typename T>
 inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out,
-                           TensorShape tf_shape) {
+                              TensorShape tf_shape) {
   OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(),
                                                  tf_shape, tensor_out));
 }
@@ -1096,8 +1099,7 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) {
 ///
 /// @input None
 /// @return memory::data_type corresponding to type T
-template <typename T>
-static memory::data_type MklDnnType();
+template<typename T> static memory::data_type MklDnnType();
 
 /// Instantiation for float type. Add similar instantiations for other
 /// type if needed.
@@ -1112,11 +1114,10 @@ memory::data_type MklDnnType<float>() {
 /// @return: memory::format corresponding to TensorFlow data format;
 ///          Fails with an error if invalid data format.
 inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
-  if (format == FORMAT_NHWC)
-    return memory::format::nhwc;
-  else if (format == FORMAT_NCHW)
-    return memory::format::nchw;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == FORMAT_NHWC) return memory::format::nhwc;
+  else if (format == FORMAT_NCHW) return memory::format::nchw;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
   // Return to get rid of compiler warning
   return memory::format::format_undef;
 }
@@ -1127,11 +1128,10 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) {
 /// @return: Tensorflow data format corresponding to memory::format
 ///          Fails with an error if invalid data format.
 inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) {
-  if (format == memory::format::nhwc)
-    return FORMAT_NHWC;
-  else if (format == memory::format::nchw)
-    return FORMAT_NCHW;
-  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format"));
+  if (format == memory::format::nhwc) return FORMAT_NHWC;
+  else if (format == memory::format::nchw) return FORMAT_NCHW;
+  TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT,
+                     "Unsupported data format"));
 }
 
 /// Map TensorShape object into memory::dims required by MKL-DNN
@@ -1161,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) {
 /// @input TensorShape object in shape
 /// @return memory::dims in MKL-DNN required NCHW format
 inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape,
-                                              TensorFormat format) {
+                                            TensorFormat format) {
   // Check validity of format.
   CHECK_NE(TFDataFormatToMklDnnDataFormat(format),
            memory::format::format_undef);
@@ -1237,23 +1237,21 @@ class MklDnnData {
   const engine* cpu_engine_;
 
  public:
-  explicit MklDnnData(const engine* e)
-      : user_memory_(nullptr),
-        reorder_memory_(nullptr),
-        op_md_(nullptr),
-        cpu_engine_(e) {}
+  explicit MklDnnData(const engine* e) : user_memory_(nullptr),
+                                         reorder_memory_(nullptr),
+                                         op_md_(nullptr), cpu_engine_(e) {}
 
   ~MklDnnData() {
     cpu_engine_ = nullptr;  // We don't own this.
-    delete (user_memory_);
-    delete (reorder_memory_);
-    delete (op_md_);
+    delete(user_memory_);
+    delete(reorder_memory_);
+    delete(op_md_);
   }
 
   inline void* GetTensorBuffer(const Tensor* tensor) const {
     CHECK_NOTNULL(tensor);
-    return const_cast<void*>(
-        static_cast<const void*>(tensor->flat<T>().data()));
+    return const_cast<void*>(static_cast<const void*>(
+              tensor->flat<T>().data()));
   }
 
   /// Set user memory primitive using specified dimensions, memory format and
@@ -1285,7 +1283,7 @@ class MklDnnData {
   /// @return: memory::desc object corresponding to blocked memory format
   ///          for given dimensions and strides.
   static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim,
-                                                  const memory::dims& strides) {
+      const memory::dims& strides) {
     CHECK_EQ(dim.size(), strides.size());
 
     // We have to construct memory descriptor in a C style. This is not at all
@@ -1354,7 +1352,7 @@ class MklDnnData {
     CHECK_NOTNULL(cpu_engine_);
     // TODO(nhasabni): can we remove dynamic memory allocation?
     if (data_buffer) {
-      user_memory_ = new memory(pd, data_buffer);
+     user_memory_ = new memory(pd, data_buffer);
     } else {
       user_memory_ = new memory(pd);
     }
diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc
index 8b73eadb40..6aef3d86e9 100644
--- a/tensorflow/core/util/mkl_util_test.cc
+++ b/tensorflow/core/util/mkl_util_test.cc
@@ -54,6 +54,7 @@ TEST(MklUtilTest, MklDnnTfShape) {
   EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape);
 }
 
+
 TEST(MklUtilTest, MklDnnBlockedFormatTest) {
   // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension
   // first (case 1) and then it being outermost dimension (case 2).
diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
index 313c09e1e4..92cc3bd60e 100644
--- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
+++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java
@@ -84,10 +84,11 @@ public class ShapeTest {
     assertEquals(Shape.scalar(), Shape.scalar());
     assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3));
 
-    assertNotEquals(Shape.make(1, 2), null);
-    assertNotEquals(Shape.make(1, 2), new Object());
+    assertNotEquals(Shape.make(1,2), null);
+    assertNotEquals(Shape.make(1,2), new Object());
     assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4));
 
+
     assertNotEquals(Shape.unknown(), Shape.unknown());
     assertNotEquals(Shape.make(-1), Shape.make(-1));
     assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3));
@@ -102,3 +103,4 @@ public class ShapeTest {
     assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode());
   }
 }
+
diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py
index 750af20e8a..3512f66284 100644
--- a/tensorflow/python/estimator/inputs/numpy_io.py
+++ b/tensorflow/python/estimator/inputs/numpy_io.py
@@ -117,11 +117,11 @@ def numpy_input_fn(x,
         raise ValueError('y cannot be empty dict, use None instead.')
 
       ordered_dict_y = collections.OrderedDict(
-          sorted(y.items(), key=lambda t: t[0]))
+        sorted(y.items(), key=lambda t: t[0]))
       target_keys = list(ordered_dict_y.keys())
 
       duplicate_keys = set(feature_keys).intersection(set(target_keys))
-      if duplicate_keys:
+      if len(duplicate_keys):
         raise ValueError('{} duplicate keys are found in both x and y: '
                          '{}'.format(len(duplicate_keys), duplicate_keys))
 
@@ -131,14 +131,16 @@ def numpy_input_fn(x,
       ordered_dict_data[target_keys] = y
 
     if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1:
-      shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys}
+      shape_dict_of_x = {k: ordered_dict_data[k].shape
+                         for k in feature_keys}
 
       if target_keys is None:
         shape_of_y = None
       elif isinstance(target_keys, string_types):
         shape_of_y = y.shape
       else:
-        shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys}
+        shape_of_y = {k: ordered_dict_data[k].shape
+                      for k in target_keys}
 
       raise ValueError('Length of tensors in x and y is mismatched. All '
                        'elements in x and y must have the same length.\n'
@@ -153,12 +155,11 @@ def numpy_input_fn(x,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
 
-    batch = (
-        queue.dequeue_many(batch_size)
-        if num_epochs is None else queue.dequeue_up_to(batch_size))
+    batch = (queue.dequeue_many(batch_size) if num_epochs is None
+                else queue.dequeue_up_to(batch_size))
 
     # Remove the first `Tensor` in `batch`, which is the row number.
-    if batch:
+    if len(batch) > 0:
       batch.pop(0)
 
     features = dict(zip(feature_keys, batch[:len(feature_keys)]))
diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py
index 1374e3f7e1..65eae7a7dc 100644
--- a/tensorflow/python/estimator/inputs/numpy_io_test.py
+++ b/tensorflow/python/estimator/inputs/numpy_io_test.py
@@ -255,7 +255,7 @@ class NumpyIoTest(test.TestCase):
 
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
       features_tensor = input_fn()
 
       coord = coordinator.Coordinator()
@@ -327,7 +327,7 @@ class NumpyIoTest(test.TestCase):
 
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
-          x, y, batch_size=2, shuffle=False, num_epochs=1)
+        x, y, batch_size=2, shuffle=False, num_epochs=1)
       features_tensor, targets_tensor = input_fn()
 
       coord = coordinator.Coordinator()
@@ -362,10 +362,13 @@ class NumpyIoTest(test.TestCase):
     a = np.arange(4) * 1.0
     b = np.arange(32, 36)
     x = {'a': a, 'b': b}
-    y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b}
+    y = {'y1': np.arange(-32, -28),
+         'a': a,
+         'y2': np.arange(32, 28, -1),
+         'b': b}
     with self.test_session():
       with self.assertRaisesRegexp(
-          ValueError, '2 duplicate keys are found in both x and y'):
+              ValueError, '2 duplicate keys are found in both x and y'):
         failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
         failing_input_fn()
 
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4c026590c2..1610214d54 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -987,9 +987,10 @@ class TensorFlowTestCase(googletest.TestCase):
       msg: An optional string message to append to the failure message.
     """
     # f1 == f2 is needed here as we might have: f1, f2 = inf, inf
-    self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err,
-                    "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
-                                           if msg is not None else ""))
+    self.assertTrue(
+        f1 == f2 or math.fabs(f1 - f2) <= err,
+        "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
+                               if msg is not None else ""))
 
   def assertArrayNear(self, farray1, farray2, err):
     """Asserts that two float arrays are near each other.
diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 1bf2b70c1b..76b80e60ea 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -114,21 +114,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     arr = np.random.rand(*arr_shape)
     mask = make_mask(arr_shape[:ndims_mask])
     if axis is not None:
-      mask = make_mask(arr_shape[axis:ndims_mask + axis])
+      mask = make_mask(arr_shape[axis:ndims_mask+axis])
     if axis is None or axis == 0:
       masked_arr = arr[mask]
     elif axis == 1:
-      masked_arr = arr[:, mask]
+      masked_arr = arr[:,mask]
     elif axis == 2:
-      masked_arr = arr[:, :, mask]
-    with self.test_session():
+      masked_arr = arr[:,:,mask]
+    with self.test_session() as sess:
       masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
       # Leading dimension size of masked_tensor is always unknown until runtime
       # since we don't how many elements will be kept.
       leading = 1 if axis is None else axis + 1
       self.assertAllEqual(masked_tensor.get_shape()[leading:],
-                          masked_arr.shape[leading:])
+          masked_arr.shape[leading:])
 
       self.assertAllClose(masked_arr, masked_tensor.eval())
 
@@ -1078,7 +1078,6 @@ class PadTest(test_util.TensorFlowTestCase):
                            [0, 0, 4, 5, 6, 0, 0],
                            [0, 0, 0, 0, 0, 0, 0]])
 
-
 class InvertPermutationTest(test_util.TensorFlowTestCase):
 
   def testInvertPermutation(self):
diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py
index 2767df127e..79285476b4 100644
--- a/tensorflow/python/kernel_tests/bincount_op_test.py
+++ b/tensorflow/python/kernel_tests/bincount_op_test.py
@@ -25,7 +25,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
-
 class BincountTest(test_util.TensorFlowTestCase):
 
   def test_empty(self):
@@ -73,7 +72,8 @@ class BincountTest(test_util.TensorFlowTestCase):
         else:
           weights = np.random.random(num_samples)
         self.assertAllClose(
-            math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights))
+            math_ops.bincount(arr, weights).eval(),
+            np.bincount(arr, weights))
 
   def test_random_without_weights(self):
     num_samples = 10000
@@ -83,7 +83,8 @@ class BincountTest(test_util.TensorFlowTestCase):
         arr = np.random.randint(0, 1000, num_samples)
         weights = np.ones(num_samples).astype(dtype)
         self.assertAllClose(
-            math_ops.bincount(arr, None).eval(), np.bincount(arr, weights))
+            math_ops.bincount(arr, None).eval(),
+            np.bincount(arr, weights))
 
   def test_zero_weights(self):
     with self.test_session(use_gpu=True):
diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py
index 68817cc256..6cbdd4cbb3 100644
--- a/tensorflow/python/kernel_tests/constant_op_test.py
+++ b/tensorflow/python/kernel_tests/constant_op_test.py
@@ -439,10 +439,11 @@ class ZerosLikeTest(test.TestCase):
 
   def testZerosLikeCPU(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
-        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
-        dtypes_lib.complex128, dtypes_lib.string
+        dtypes_lib.float32, dtypes_lib.float64,
+        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+        dtypes_lib.complex64, dtypes_lib.complex128,
+        dtypes_lib.string
     ]:
       self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False)
       self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False)
@@ -573,10 +574,10 @@ class OnesLikeTest(test.TestCase):
 
   def testOnesLike(self):
     for dtype in [
-        dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8,
-        dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32,
-        dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64,
-        dtypes_lib.complex128
+        dtypes_lib.float32, dtypes_lib.float64,
+        dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16,
+        dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool,
+        dtypes_lib.complex64, dtypes_lib.complex128
     ]:
       numpy_dtype = dtype.as_numpy_dtype
       with self.test_session():
diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py
index d92797a7d3..a7e23ead1c 100644
--- a/tensorflow/python/kernel_tests/conv1d_test.py
+++ b/tensorflow/python/kernel_tests/conv1d_test.py
@@ -52,6 +52,7 @@ class Conv1DTest(test.TestCase):
           self.assertEqual(len(output), 2)
           self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4])
 
+
   def testConv1DTranspose(self):
     with self.test_session():
       stride = 2
@@ -92,6 +93,5 @@ class Conv1DTest(test.TestCase):
 
     self.assertAllClose(cache_values, value)
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
index ec8ac74163..116681fc4c 100644
--- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py
@@ -68,8 +68,8 @@ class Conv3DTest(test.TestCase):
       total_size_2 *= s
 
     # Initializes the input tensor with array containing numbers from 0 to 1.
-    # We keep the input tensor values fairly small to avoid overflowing float16
-    # during the conv3d.
+    # We keep the input tensor values fairly small to avoid overflowing a float16 
+    # tensor during the conv3d 
     x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)]
     x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)]
     with self.test_session(use_gpu=use_gpu):
@@ -115,13 +115,15 @@ class Conv3DTest(test.TestCase):
           if value.dtype == np.float16:
             tol = 1e-3
 
-          self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol)
+          self.assertAllClose(expected, value.flatten(), atol=tol,
+                              rtol=tol)
 
   def testConv3D1x1x1Filter(self):
     expected_output = [
-        0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259,
-        0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926,
-        1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593
+        0.18518519,  0.22222222,  0.25925926,  0.40740741,  0.5       ,
+        0.59259259,  0.62962963,  0.77777778,  0.92592593,  0.85185185,
+        1.05555556,  1.25925926,  1.07407407,  1.33333333,  1.59259259,
+        1.2962963 ,  1.61111111,  1.92592593
     ]
 
     # These are equivalent to the Conv2D1x1 case.
@@ -147,10 +149,10 @@ class Conv3DTest(test.TestCase):
   # Expected values computed using scipy's correlate function.
   def testConv3D2x2x2Filter(self):
     expected_output = [
-        3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148,
-        6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148,
-        9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222,
-        10.70023148
+        3.77199074,   3.85069444,   3.92939815,   4.2650463 ,   4.35763889,
+        4.45023148,   6.73032407,   6.89236111,   7.05439815,   7.22337963,
+        7.39930556,   7.57523148,   9.68865741,   9.93402778,  10.17939815,
+        10.18171296,  10.44097222,  10.70023148
     ]
     # expected_shape = [1, 3, 1, 2, 5]
     self._VerifyValues(
@@ -162,17 +164,19 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStrides(self):
     expected_output = [
-        0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095,
-        0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095,
-        0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095,
-        0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095,
-        1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095,
-        1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095,
-        2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381,
-        2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095,
-        1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095,
-        3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095,
-        3.45238095, 3.46488095, 1.98452381
+        0.06071429,  0.08988095,  0.10238095,  0.11488095,  0.12738095,
+        0.13988095,  0.08452381,  0.26071429,  0.35238095,  0.36488095,
+        0.37738095,  0.38988095,  0.40238095,  0.23452381,  0.46071429,
+        0.61488095,  0.62738095,  0.63988095,  0.65238095,  0.66488095,
+        0.38452381,  1.12738095,  1.48988095,  1.50238095,  1.51488095,
+        1.52738095,  1.53988095,  0.88452381,  1.32738095,  1.75238095,
+        1.76488095,  1.77738095,  1.78988095,  1.80238095,  1.03452381,
+        1.52738095,  2.01488095,  2.02738095,  2.03988095,  2.05238095,
+        2.06488095,  1.18452381,  2.19404762,  2.88988095,  2.90238095,
+        2.91488095,  2.92738095,  2.93988095,  1.68452381,  2.39404762,
+        3.15238095,  3.16488095,  3.17738095,  3.18988095,  3.20238095,
+        1.83452381,  2.59404762,  3.41488095,  3.42738095,  3.43988095,
+        3.45238095,  3.46488095,  1.98452381
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 5, 8, 7, 1],
@@ -183,7 +187,8 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2(self):
     expected_output = [
-        3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815
+        3.77199074,  3.85069444,  3.92939815,  9.68865741,  9.93402778,
+        10.17939815
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -194,12 +199,14 @@ class Conv3DTest(test.TestCase):
 
   def testConv3DStride3(self):
     expected_output = [
-        1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016,
-        1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016,
-        1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159,
-        4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016,
-        4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016,
-        4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159
+        1.51140873,  1.57167659,  1.63194444,  1.56349206,  1.62673611,
+        1.68998016,  1.6155754 ,  1.68179563,  1.74801587,  1.9280754 ,
+        2.01215278,  2.09623016,  1.98015873,  2.0672123 ,  2.15426587,
+        2.03224206,  2.12227183,  2.21230159,  4.4280754 ,  4.65500992,
+        4.88194444,  4.48015873,  4.71006944,  4.93998016,  4.53224206,
+        4.76512897,  4.99801587,  4.84474206,  5.09548611,  5.34623016,
+        4.8968254 ,  5.15054563,  5.40426587,  4.94890873,  5.20560516,
+        5.46230159
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 6, 7, 8, 2],
@@ -210,8 +217,9 @@ class Conv3DTest(test.TestCase):
 
   def testConv3D2x2x2FilterStride2Same(self):
     expected_output = [
-        3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074,
-        9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074
+        3.77199074,   3.85069444,   3.92939815,   2.0162037 ,   2.06597222,
+        2.11574074,   9.68865741,   9.93402778,  10.17939815,   4.59953704,
+        4.73263889,   4.86574074
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 4, 2, 3, 3],
@@ -222,8 +230,8 @@ class Conv3DTest(test.TestCase):
 
   def testKernelSmallerThanStride(self):
     expected_output = [
-        0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778,
-        0.92592593, 1.
+        0.03703704,  0.11111111,  0.25925926,  0.33333333,  0.7037037 ,
+        0.77777778,  0.92592593,  1.
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 3, 3, 3, 1],
@@ -239,11 +247,12 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122,
-        0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551,
-        2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714,
-        1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082,
-        0.3691691, 0.37244898, 0.125
+        0.54081633,  0.58017493,  0.28061224,  0.81632653,  0.85568513,
+        0.40306122,  0.41873178,  0.4340379 ,  0.19642857,  2.46938776,
+        2.50874636,  1.1377551 ,  2.74489796,  2.78425656,  1.26020408,
+        1.16873178,  1.1840379 ,  0.51785714,  1.09511662,  1.10604956,
+        0.44642857,  1.17164723,  1.18258017,  0.47704082,  0.3691691 ,
+        0.37244898,  0.125
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -253,8 +262,8 @@ class Conv3DTest(test.TestCase):
         expected=expected_output)
 
     expected_output = [
-        0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898,
-        2.784257
+        0.540816,  0.580175,  0.816327,  0.855685,  2.469388,  2.508746,
+        2.744898,  2.784257
     ]
     self._VerifyValues(
         tensor_in_sizes=[1, 7, 7, 7, 1],
@@ -269,7 +278,7 @@ class Conv3DTest(test.TestCase):
         filter_in_sizes=[2, 1, 2, 1, 2],
         stride=1,
         padding="VALID",
-        expected=[1.5625, 1.875])
+        expected=[1.5625,  1.875])
 
   def _ConstructAndTestGradientForConfig(
       self, batch, input_shape, filter_shape, in_depth, out_depth, stride,
@@ -309,6 +318,7 @@ class Conv3DTest(test.TestCase):
     input_data = [x * 1.0 / input_size for x in range(0, input_size)]
     filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)]
 
+
     for data_type in self._DtypesToTest(use_gpu=use_gpu):
       # TODO(mjanusz): Modify gradient_checker to also provide max relative
       # error and synchronize the tolerance levels between the tests for forward
@@ -320,11 +330,12 @@ class Conv3DTest(test.TestCase):
       elif data_type == dtypes.float16:
         tolerance = 1e-3
 
+
       with self.test_session(use_gpu=use_gpu):
         orig_input_tensor = constant_op.constant(
-            input_data, shape=input_shape, dtype=data_type, name="input")
+          input_data, shape=input_shape, dtype=data_type, name="input")
         filter_tensor = constant_op.constant(
-            filter_data, shape=filter_shape, dtype=data_type, name="filter")
+          filter_data, shape=filter_shape, dtype=data_type, name="filter")
 
         if data_format == "NCDHW":
           input_tensor = test_util.NHWCToNCHW(orig_input_tensor)
@@ -334,23 +345,25 @@ class Conv3DTest(test.TestCase):
           new_strides = strides
 
         conv = nn_ops.conv3d(
-            input_tensor,
-            filter_tensor,
-            new_strides,
-            padding,
-            data_format=data_format,
-            name="conv")
+          input_tensor, filter_tensor, new_strides, padding,
+          data_format=data_format, name="conv")
 
         if data_format == "NCDHW":
           conv = test_util.NCHWToNHWC(conv)
 
+        
         if test_input:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(
-              orig_input_tensor, input_shape, conv, output_shape)
+          jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor,
+                                                               input_shape,
+                                                               conv,
+                                                               output_shape)
         else:
-          jacob_t, jacob_n = gradient_checker.compute_gradient(
-              filter_tensor, filter_shape, conv, output_shape)
-
+          jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor,
+                                                               filter_shape,
+                                                               conv,
+                                                               output_shape)
+        
+        
         if data_type != dtypes.float16:
           reference_jacob_t = jacob_t
           err = np.fabs(jacob_t - jacob_n).max()
@@ -362,6 +375,7 @@ class Conv3DTest(test.TestCase):
       print("conv3d gradient error = ", err)
       self.assertLess(err, tolerance)
 
+
   def ConstructAndTestGradient(self, **kwargs):
     for data_format, use_gpu in GetTestConfigs():
       self._ConstructAndTestGradientForConfig(data_format=data_format,
diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py
index 6be8997cab..150e2ff7f2 100644
--- a/tensorflow/python/kernel_tests/pooling_ops_test.py
+++ b/tensorflow/python/kernel_tests/pooling_ops_test.py
@@ -18,8 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import os
 import numpy as np
+import os
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -1442,6 +1442,7 @@ class PoolingTest(test.TestCase):
           use_gpu=True,
           v2=v2)
 
+
     # Propagate the diff in cases of NaNs
     os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1"
     expected_input_backprop_cudnn = expected_input_backprop_tf_cpu
diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py
index 223a4b2c87..8e54d10f32 100644
--- a/tensorflow/python/kernel_tests/reader_ops_test.py
+++ b/tensorflow/python/kernel_tests/reader_ops_test.py
@@ -1018,15 +1018,15 @@ class LMDBReaderTest(test.TestCase):
     with self.test_session() as sess:
       reader1 = io_ops.LMDBReader(name="test_read_from_same_file1")
       reader2 = io_ops.LMDBReader(name="test_read_from_same_file2")
-      filename_queue = input_lib.string_input_producer(
-          [self.db_path], num_epochs=None)
+      filename_queue = input_lib.string_input_producer([self.db_path],
+                                                       num_epochs=None)
       key1, value1 = reader1.read(filename_queue)
       key2, value2 = reader2.read(filename_queue)
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
-      for _ in range(3):
-        for _ in range(10):
+      for i in range(3):
+        for j in range(10):
           k1, v1, k2, v2 = sess.run([key1, value1, key2, value2])
           self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2))
           self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2))
@@ -1054,14 +1054,14 @@ class LMDBReaderTest(test.TestCase):
   def testReadFromFileRepeatedly(self):
     with self.test_session() as sess:
       reader = io_ops.LMDBReader(name="test_read_from_file_repeated")
-      filename_queue = input_lib.string_input_producer(
-          [self.db_path], num_epochs=None)
+      filename_queue = input_lib.string_input_producer([self.db_path],
+                                                       num_epochs=None)
       key, value = reader.read(filename_queue)
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(sess, coord=coord)
       # Iterate over the lmdb 3 times.
-      for _ in range(3):
+      for i in range(3):
         # Go over all 10 records each time.
         for j in range(10):
           k, v = sess.run([key, value])
@@ -1071,6 +1071,5 @@ class LMDBReaderTest(test.TestCase):
       coord.request_stop()
       coord.join(threads)
 
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
index 99f9f09690..3a02f24902 100644
--- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
+++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py
@@ -380,7 +380,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper):
           # Replace np_ans[8] with 0 for the value
           np_ans[8:] = 0
           # Replace 8 with -1 in indices
-          np.place(indices, indices == 8, [-1])
+          np.place(indices, indices==8, [-1])
           s = math_ops.unsorted_segment_sum(
               data=tf_x, segment_ids=indices, num_segments=num_segments)
           tf_ans = s.eval()
diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py
index 6390b7c518..04758ce45a 100644
--- a/tensorflow/python/kernel_tests/unique_op_test.py
+++ b/tensorflow/python/kernel_tests/unique_op_test.py
@@ -87,7 +87,6 @@ class UniqueTest(test.TestCase):
     for i in range(len(x)):
       self.assertEqual(x[i], tf_y[tf_idx[i]])
 
-
 class UniqueWithCountsTest(test.TestCase):
 
   def testInt32(self):
diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py
index 83237b8733..4d5fb97845 100644
--- a/tensorflow/python/layers/normalization.py
+++ b/tensorflow/python/layers/normalization.py
@@ -267,34 +267,34 @@ class BatchNormalization(base.Layer):
           self.axis[idx] = x + 1      # Account for added dimension
 
     if self.scale:
-      self.gamma = self.add_variable(
-          name='gamma',
-          shape=param_shape,
-          dtype=param_dtype,
-          initializer=self.gamma_initializer,
-          regularizer=self.gamma_regularizer,
-          constraint=self.gamma_constraint,
-          trainable=True)
+      self.gamma = self.add_variable(name='gamma',
+                                     shape=param_shape,
+                                     dtype=param_dtype,
+                                     initializer=self.gamma_initializer,
+                                     regularizer=self.gamma_regularizer,
+                                     constraint=self.gamma_constraint,
+                                     trainable=True)
     else:
       self.gamma = None
       if self.fused:
-        self._gamma_const = array_ops.constant(
-            1.0, dtype=param_dtype, shape=param_shape)
+        self._gamma_const = array_ops.constant(1.0,
+                                               dtype=param_dtype,
+                                               shape=param_shape)
 
     if self.center:
-      self.beta = self.add_variable(
-          name='beta',
-          shape=param_shape,
-          dtype=param_dtype,
-          initializer=self.beta_initializer,
-          regularizer=self.beta_regularizer,
-          constraint=self.beta_constraint,
-          trainable=True)
+      self.beta = self.add_variable(name='beta',
+                                    shape=param_shape,
+                                    dtype=param_dtype,
+                                    initializer=self.beta_initializer,
+                                    regularizer=self.beta_regularizer,
+                                    constraint=self.beta_constraint,
+                                    trainable=True)
     else:
       self.beta = None
       if self.fused:
-        self._beta_const = array_ops.constant(
-            0.0, dtype=param_dtype, shape=param_shape)
+        self._beta_const = array_ops.constant(0.0,
+                                              dtype=param_dtype,
+                                              shape=param_shape)
 
     # Disable variable partitioning when creating the moving mean and variance
     try:
@@ -327,12 +327,11 @@ class BatchNormalization(base.Layer):
         # stack to be cleared. The nested ones use a `lambda` to set the desired
         # device and ignore any devices that may be set by the custom getter.
         def _renorm_variable(name, shape):
-          var = self.add_variable(
-              name=name,
-              shape=shape,
-              dtype=param_dtype,
-              initializer=init_ops.zeros_initializer(),
-              trainable=False)
+          var = self.add_variable(name=name,
+                                  shape=shape,
+                                  dtype=param_dtype,
+                                  initializer=init_ops.zeros_initializer(),
+                                  trainable=False)
           return var
 
         with ops.device(None):
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 7c91c3284e..b2876c58c2 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -101,13 +101,15 @@ class BNTest(test.TestCase):
       loss_val = sess.run(loss, feed_dict={image: image_val})
       return loss_val
 
-  def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu,
+  def _trainEvalSequence(self,
+                         dtype,
+                         train1_use_gpu,
+                         train2_use_gpu,
                          infer_use_gpu):
     batch, height, width, input_channels = 2, 4, 5, 3
     shape = [batch, height, width, input_channels]
     checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' %
-                              (dtype, train1_use_gpu, train2_use_gpu,
-                               infer_use_gpu))
+        (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu))
 
     self._train(
         checkpoint,
@@ -128,27 +130,30 @@ class BNTest(test.TestCase):
         dtype=dtype)
 
     np.random.seed(0)
-    image_val = np.random.rand(batch, height, width, input_channels).astype(
-        dtype.as_numpy_dtype)
-    loss_val = self._infer(
-        checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True)
+    image_val = np.random.rand(batch,
+                               height,
+                               width,
+                               input_channels).astype(dtype.as_numpy_dtype)
+    loss_val = self._infer(checkpoint, image_val, shape,
+                           use_gpu=infer_use_gpu, is_fused=True)
 
     return train_vars, loss_val
 
   def testHalfPrecision(self):
-    ref_vars, ref_loss = self._trainEvalSequence(
-        dtype=dtypes.float32,
-        train1_use_gpu=True,
-        train2_use_gpu=True,
-        infer_use_gpu=True)
-
+    ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32,
+                                                 train1_use_gpu=True,
+                                                 train2_use_gpu=True,
+                                                 infer_use_gpu=True)
+ 
     self.assertEqual(len(ref_vars), 5)
 
     for train1_use_gpu in [True, False]:
       for train2_use_gpu in [True, False]:
         for infer_use_gpu in [True, False]:
-          test_vars, test_loss = self._trainEvalSequence(
-              dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu)
+          test_vars, test_loss = self._trainEvalSequence(dtypes.float16,
+                                                         train1_use_gpu,
+                                                         train2_use_gpu,
+                                                         infer_use_gpu)
           self.assertEqual(len(test_vars), 5)
           for test_var, ref_var in zip(test_vars, ref_vars):
             self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3)
@@ -276,8 +281,9 @@ class BNTest(test.TestCase):
   def testCreateFusedBNFloat16(self):
     # Call layer.
     bn = normalization_layers.BatchNormalization(axis=1, fused=True)
-    inputs = random_ops.random_uniform(
-        (5, 4, 3, 3), seed=1, dtype=dtypes.float16)
+    inputs = random_ops.random_uniform((5, 4, 3, 3),
+                                       seed=1,
+                                       dtype=dtypes.float16)
     training = array_ops.placeholder(dtype='bool')
     outputs = bn.apply(inputs, training=training)
 
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 38eff54c69..43238757c7 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -1194,19 +1194,18 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None):
           "Number of mask dimensions must be specified, even if some dimensions"
           " are None.  E.g. shape=[None] is ok, but shape=None is not.")
     axis = 0 if axis is None else axis
-    shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask)
+    shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask)
 
-    leading_size = gen_math_ops._prod(
-        shape(tensor)[axis:axis + ndims_mask], [0])
+    leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0])
     tensor = reshape(tensor,
-                     concat([
-                         shape(tensor)[:axis], [leading_size],
-                         shape(tensor)[axis + ndims_mask:]
-                     ], 0))
-    first_dim = shape_tensor[axis:axis + ndims_mask].num_elements()
+                     concat([shape(tensor)[:axis],
+                             [leading_size],
+                             shape(tensor)[axis+ndims_mask:]], 0))
+    first_dim = shape_tensor[axis:axis+ndims_mask].num_elements()
     tensor.set_shape(
-        tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim])
-        .concatenate(shape_tensor[axis + ndims_mask:]))
+        tensor_shape.as_shape(shape_tensor[:axis])
+        .concatenate([first_dim])
+        .concatenate(shape_tensor[axis+ndims_mask:]))
 
     mask = reshape(mask, [-1])
     return _apply_mask_1d(tensor, mask, axis)
diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py
index 04762565c2..d49fac59ca 100644
--- a/tensorflow/python/ops/distributions/multinomial.py
+++ b/tensorflow/python/ops/distributions/multinomial.py
@@ -23,10 +23,10 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops.distributions import distribution
 from tensorflow.python.ops.distributions import util as distribution_util
 
@@ -243,26 +243,25 @@ class Multinomial(distribution.Distribution):
         n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits
 
     # flatten the total_count and logits
-    flat_logits = array_ops.reshape(logits, [-1, k])  # [B1B2...Bm, k]
-    flat_ndraws = n * array_ops.reshape(n_draws, [-1])  # [B1B2...Bm]
+    flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k]
+    flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm]
 
     # computes each total_count and logits situation by map_fn
     def _sample_single(args):
-      logits, n_draw = args[0], args[1]  # [K], []
-      x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw,
-                                 seed)  # [1, n*n_draw]
-      x = array_ops.reshape(x, shape=[n, -1])  # [n, n_draw]
-      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2)  # [n, k]
+      logits, n_draw = args[0], args[1] # [K], []
+      x = random_ops.multinomial(logits[array_ops.newaxis, ...],
+                                 n_draw, seed) # [1, n*n_draw]
+      x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw]
+      x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k]
       return x
-
-    x = functional_ops.map_fn(
-        _sample_single, [flat_logits, flat_ndraws],
-        dtype=self.dtype)  # [B1B2...Bm, n, k]
+    x = functional_ops.map_fn(_sample_single,
+                              [flat_logits, flat_ndraws],
+                              dtype=self.dtype) # [B1B2...Bm, n, k]
 
     # reshape the results to proper shape
     x = array_ops.transpose(x, perm=[1, 0, 2])
     final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0)
-    x = array_ops.reshape(x, final_shape)  # [n, B1, B2,..., Bm, k]
+    x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k]
     return x
 
   @distribution_util.AppendDocstring(_multinomial_sample_note)
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index b9c89d62d5..7c23321ca5 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -1119,8 +1119,9 @@ def rgb_to_grayscale(images, name=None):
     # https://en.wikipedia.org/wiki/Luma_%28video%29
     rgb_weights = [0.2989, 0.5870, 0.1140]
     rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0)
-    gray_float = math_ops.reduce_sum(
-        flt_image * rgb_weights, rank_1, keepdims=True)
+    gray_float = math_ops.reduce_sum(flt_image * rgb_weights,
+                                     rank_1,
+                                     keepdims=True)
     gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
     return convert_image_dtype(gray_float, orig_dtype, name=name)
 
diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py
index be9beee633..14a039ffd0 100644
--- a/tensorflow/python/ops/linalg_ops.py
+++ b/tensorflow/python/ops/linalg_ops.py
@@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.gen_linalg_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import compat
-from tensorflow.python.util import deprecation
+from tensorflow.python.util.deprecation import deprecated_args
 
 # Names below are lower_case.
 # pylint: disable=invalid-name
@@ -439,13 +439,9 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None):
 
 
 # pylint: disable=redefined-builtin
-@deprecation.deprecated_args(
-    None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims')
-def norm(tensor,
-         ord='euclidean',
-         axis=None,
-         keepdims=None,
-         name=None,
+@deprecated_args(None, "keep_dims is deprecated, use keepdims instead",
+                 "keep_dims")
+def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None,
          keep_dims=None):
   r"""Computes the norm of vectors, matrices, and tensors.
 
@@ -482,7 +478,6 @@ def norm(tensor,
     keepdims: If True, the axis indicated in `axis` are kept with size 1.
       Otherwise, the dimensions in `axis` are removed from the output shape.
     name: The name of the op.
-    keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
     output: A `Tensor` of the same type as tensor, containing the vector or
@@ -505,8 +500,11 @@ def norm(tensor,
      higher order tensors.
   @end_compatibility
   """
-  keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims,
-                                                    'keep_dims', keep_dims)
+
+  if keep_dims is not None:
+    if keepdims is not None:
+      raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'")
+    keepdims = keep_dims
   if keepdims is None:
     keepdims = False
 
@@ -557,8 +555,8 @@ def norm(tensor,
       else:
         # General p-norms (positive p only)
         result = math_ops.pow(
-            math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True),
-            1.0 / ord)
+            math_ops.reduce_sum(
+                math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord)
     if not keepdims:
       result = array_ops.squeeze(result, axis)
     return result
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index e04121ee31..d30f6b92ad 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -792,10 +792,9 @@ def mean_cosine_distance(labels, predictions, dim, weights=None,
   predictions, labels, weights = _remove_squeezable_dimensions(
       predictions=predictions, labels=labels, weights=weights)
   radial_diffs = math_ops.multiply(predictions, labels)
-  radial_diffs = math_ops.reduce_sum(
-      radial_diffs, reduction_indices=[
-          dim,
-      ], keepdims=True)
+  radial_diffs = math_ops.reduce_sum(radial_diffs,
+                                     reduction_indices=[dim,],
+                                     keepdims=True)
   mean_distance, update_op = mean(radial_diffs, weights,
                                   None,
                                   None,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 654eb1c118..da037a7983 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -333,7 +333,6 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None):
     epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the
       divisor if `norm < sqrt(epsilon)`.
     name: A name for this operation (optional).
-    dim: Deprecated alias for axis.
 
   Returns:
     A `Tensor` with the same shape as `x`.
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index ec7b9372ca..61fa462988 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -23,6 +23,7 @@ import numbers
 import numpy as np
 
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import ops
@@ -37,10 +38,11 @@ from tensorflow.python.ops import random_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
+from tensorflow.python.util.deprecation import deprecated_args
+from tensorflow.python.util.deprecation import deprecated_argument_lookup
 
 from tensorflow.python.util import deprecation
 
-
 # Aliases for some automatically-generated names.
 local_response_normalization = gen_nn_ops.lrn
 
@@ -1646,7 +1648,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   return output
 
 
-@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def softmax(logits, axis=None, name=None, dim=None):
   """Computes softmax activations.
 
@@ -1660,7 +1662,6 @@ def softmax(logits, axis=None, name=None, dim=None):
     axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
-    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type and shape as `logits`.
@@ -1669,13 +1670,13 @@ def softmax(logits, axis=None, name=None, dim=None):
     InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
+  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
   if axis is None:
     axis = -1
   return _softmax(logits, gen_nn_ops._softmax, axis, name)
 
 
-@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim")
+@deprecated_args(None, "dim is deprecated, use axis instead", "dim")
 def log_softmax(logits, axis=None, name=None, dim=None):
   """Computes log softmax activations.
 
@@ -1689,7 +1690,6 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     axis: The dimension softmax would be performed on. The default is -1 which
       indicates the last dimension.
     name: A name for the operation (optional).
-    dim: Deprecated alias for `axis`.
 
   Returns:
     A `Tensor`. Has the same type as `logits`. Same shape as `logits`.
@@ -1698,7 +1698,7 @@ def log_softmax(logits, axis=None, name=None, dim=None):
     InvalidArgumentError: if `logits` is empty or `axis` is beyond the last
       dimension of `logits`.
   """
-  axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim)
+  axis = deprecated_argument_lookup("axis", axis, "dim", dim)
   if axis is None:
     axis = -1
   return _softmax(logits, gen_nn_ops._log_softmax, axis, name)
@@ -2316,14 +2316,13 @@ def conv1d(value, filters, stride, padding,
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
-def conv1d_transpose(
-    value,
-    filter,  # pylint: disable=redefined-builtin
-    output_shape,
-    stride,
-    padding="SAME",
-    data_format="NWC",
-    name=None):
+def conv1d_transpose(value,
+                     filter,
+                     output_shape,
+                     stride,
+                     padding="SAME",
+                     data_format="NWC",
+                     name=None):
   """The transpose of `conv1d`.
 
   This operation is sometimes called "deconvolution" after [Deconvolutional
@@ -2358,8 +2357,8 @@ def conv1d_transpose(
                       [value, filter, output_shape]) as name:
     output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape")
     if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)):
-      raise ValueError("output_shape must have shape (3,), got {}".format(
-          output_shape_.get_shape()))
+      raise ValueError("output_shape must have shape (3,), got {}"
+                       .format(output_shape_.get_shape()))
 
     # The format could be either NWC or NCW, map to NHWC or NCHW
     if data_format is None or data_format == "NWC":
@@ -2381,8 +2380,7 @@ def conv1d_transpose(
       if not filter.get_shape()[1].is_compatible_with(output_shape[axis]):
         raise ValueError(
             "output_shape does not match filter's output channels, "
-            "{} != {}".format(output_shape[axis],
-                              filter.get_shape()[1]))
+            "{} != {}".format(output_shape[axis], filter.get_shape()[1]))
 
     if padding != "VALID" and padding != "SAME":
       raise ValueError("padding must be either VALID or SAME:"
@@ -2390,26 +2388,25 @@ def conv1d_transpose(
 
     # Reshape the input tensor to [batch, 1, in_width, in_channels]
     if data_format_2d == "NHWC":
-      output_shape_ = array_ops.concat(
-          [output_shape_[:1], [1], output_shape_[1:]], axis=0)
+      output_shape_ = array_ops.concat([output_shape_[:1], [1],
+                                        output_shape_[1:]], axis=0)
       spatial_start_dim = 1
       strides = [1, 1, stride, 1]
     else:
-      output_shape_ = array_ops.concat(
-          [output_shape_[:2], [1], output_shape_[2:]], axis=0)
+      output_shape_ = array_ops.concat([output_shape_[:2], [1],
+                                        output_shape_[2:]], axis=0)
       spatial_start_dim = 2
       strides = [1, 1, 1, stride]
     value = array_ops.expand_dims(value, spatial_start_dim)
     filter = array_ops.expand_dims(filter, 0)
 
-    result = gen_nn_ops.conv2d_backprop_input(
-        input_sizes=output_shape_,
-        filter=filter,
-        out_backprop=value,
-        strides=strides,
-        padding=padding,
-        data_format=data_format_2d,
-        name=name)
+    result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_,
+                                              filter=filter,
+                                              out_backprop=value,
+                                              strides=strides,
+                                              padding=padding,
+                                              data_format=data_format_2d,
+                                              name=name)
     return array_ops.squeeze(result, [spatial_start_dim])
 
 
diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc
index 44144a0613..43d2d3cd48 100644
--- a/tensorflow/stream_executor/dnn.cc
+++ b/tensorflow/stream_executor/dnn.cc
@@ -470,7 +470,6 @@ string ConvolutionDescriptor::ToShortString() const {
 PoolingDescriptor::PoolingDescriptor(int ndims)
     : mode_(dnn::PoolingMode::kMaximum),
       ndims_(ndims),
-      propagate_nans_(false),
       window_(ndims, 0),
       padding_(ndims, 0),
       strides_(ndims, 1) {}
diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl
index 87a70d8f95..b470772fbf 100644
--- a/third_party/sycl/crosstool/trisycl.tpl
+++ b/third_party/sycl/crosstool/trisycl.tpl
@@ -11,12 +11,10 @@ CPU_C_COMPILER = ('%{host_c_compiler}')
 CURRENT_DIR = os.path.dirname(sys.argv[0])
 TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include'
 
-
 def main():
   compiler_flags = []
 
-  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable',
-                  '-Wignored-attributes', '-fno-exceptions')
+  remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions')
   # remove -fsamotoze-coverage from string with g++
   if 'g++' in CPU_CXX_COMPILER:
     remove_flags += ('-fsanitize-coverage',)
@@ -24,62 +22,52 @@ def main():
   else:
     compiler_flags += ['-fopenmp=libomp']
 
-  compiler_flags += [
-      flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)
-  ]
+  compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)]
+
 
   output_file_index = compiler_flags.index('-o') + 1
   output_file_name = compiler_flags[output_file_index]
 
-  if (output_file_index == 1):
+  if(output_file_index == 1):
     # we are linking
-    return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined'])
+    return call([CPU_CXX_COMPILER] + compiler_flags +
+                ['-Wl,--no-undefined'])
 
   # find what we compile
   compiling_cpp = 0
-  if ('-c' in compiler_flags):
-    compiled_file_index = compiler_flags.index('-c') + 1
-    compiled_file_name = compiler_flags[compiled_file_index]
-    if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C',
-                                     '.cxx'))):
-      compiling_cpp = 1
-
-  debug_flags = [
-      '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL',
-      '-lpthread', '-lboost_log', '-g', '-rdynamic'
-  ]
+  if('-c' in compiler_flags):
+      compiled_file_index = compiler_flags.index('-c') + 1
+      compiled_file_name = compiler_flags[compiled_file_index]
+      if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP',
+                                      '.C', '.cxx'))):
+        compiling_cpp = 1;
+
+  debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic']
 
   opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3']
 
-  compiler_flags = compiler_flags + [
-      '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH',
-      '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL'
-  ] + opt_flags
+  compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1',
+                                     '-DEIGEN_HAS_C99_MATH',
+                                     '-DEIGEN_MAX_ALIGN_BYTES=16',
+                                     '-DTENSORFLOW_USE_SYCL'] + opt_flags
 
-  if (compiling_cpp == 1):
+  if(compiling_cpp == 1):
     # create a blacklist of folders that will be skipped when compiling
     # with triSYCL
-    skip_extensions = ['.cu.cc']
-    skip_folders = [
-        'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard',
-        'third_party', 'external', 'hexagon'
-    ]
+    skip_extensions = [".cu.cc"]
+    skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"]
     skip_folders = [(folder + '/') for folder in skip_folders]
     # if compiling external project skip triSYCL
-    if any(
-        compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(
-            _folder in output_file_name for _folder in skip_folders):
+    if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders):
       return call([CPU_CXX_COMPILER] + compiler_flags)
 
-    host_compiler_flags = [
-        '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR
-    ] + compiler_flags
+    host_compiler_flags = ['-xc++', '-Wno-unused-variable',
+                           '-I', TRISYCL_INCLUDE_DIR] + compiler_flags
     x = call([CPU_CXX_COMPILER] + host_compiler_flags)
     return x
   else:
     # compile for C
     return call([CPU_C_COMPILER] + compiler_flags)
 
-
 if __name__ == '__main__':
   sys.exit(main())
diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl
index 5b9d0eb383..a0c9e4e43a 100644
--- a/third_party/sycl/sycl_configure.bzl
+++ b/third_party/sycl/sycl_configure.bzl
@@ -67,6 +67,7 @@ def find_computecpp_root(repository_ctx):
 
 def find_trisycl_include_dir(repository_ctx):
   """Find triSYCL include directory. """
+  sycl_name = ""
   if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ:
     sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip()
     if sycl_name.startswith("/"):