diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-11-21 23:55:59 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-11-22 00:01:14 -0800 |
commit | d0a3b2d3983b970b750329088013dc5cb67d96f9 (patch) | |
tree | 17cc584c4568e4e64a4bdd6bbee0be0b9d96f62c | |
parent | c6d603f02e1a98f871912cda6716cdcbed6b439e (diff) |
Merged commit includes the following changes:
176617057 by yifeif:
Internal change.
--
176615737 by yifeif:
Fix internal tests.
--
PiperOrigin-RevId: 176617057
69 files changed, 733 insertions, 644 deletions
diff --git a/configure.py b/configure.py index 26da09bd94..1f205861f1 100644 --- a/configure.py +++ b/configure.py @@ -883,27 +883,28 @@ def set_computecpp_toolkit_path(environ_cp): write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', computecpp_toolkit_path) + def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR""" + """Set TRISYCL_INCLUDE_DIR.""" ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' 'include directory. (Use --config=sycl_trisycl ' 'when building with Bazel) ' - '[Default is %s]: ' - ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) + '[Default is %s]: ') % ( + _DEFAULT_TRISYCL_INCLUDE_DIR) while True: trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) if os.path.exists(trisycl_include_dir): break - print('Invalid triSYCL include directory, %s cannot be found' - % (trisycl_include_dir)) + print('Invalid triSYCL include directory, %s cannot be found' % + (trisycl_include_dir)) # Set TRISYCL_INCLUDE_DIR environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', - trisycl_include_dir) + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) + def set_mpi_home(environ_cp): """Set MPI_HOME.""" diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index 1e22b760b8..6c385af3b3 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -152,7 +152,7 @@ def tf_library(name, graph, config, " --target_triple=" + target_llvm_triple() + " --out_header=$(@D)/" + header_file + " --out_object=$(@D)/" + object_file + - flags), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, @@ -189,7 +189,7 @@ def tf_library(name, graph, config, " --cpp_class=" + cpp_class + " --target_triple=" + target_llvm_triple() + " --out_session_module=$(@D)/" + session_module_pb + - flags), + " " + flags), tools=[tfcompile_tool], visibility=visibility, testonly=testonly, diff --git a/tensorflow/compiler/tests/fused_batchnorm_test.py b/tensorflow/compiler/tests/fused_batchnorm_test.py index a773b5a947..00a9c9a65b 100644 --- a/tensorflow/compiler/tests/fused_batchnorm_test.py +++ b/tensorflow/compiler/tests/fused_batchnorm_test.py @@ -76,7 +76,8 @@ class FusedBatchNormTest(XLATestCase): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") + offset = array_ops.placeholder( + np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y_ref, mean_ref, var_ref = self._reference_training( x_val, scale_val, offset_val, epsilon, data_format) @@ -112,7 +113,8 @@ class FusedBatchNormTest(XLATestCase): # To avoid constant folding t_val = array_ops.placeholder(np.float32, shape=x_shape, name="x") scale = array_ops.placeholder(np.float32, shape=scale_shape, name="scale") - offset = array_ops.placeholder(np.float32, shape=scale_shape, name="offset") + offset = array_ops.placeholder( + np.float32, shape=scale_shape, name="offset") epsilon = 0.001 y, mean, var = nn.fused_batch_norm( t_val, diff --git a/tensorflow/contrib/android/cmake/CMakeLists.txt b/tensorflow/contrib/android/cmake/CMakeLists.txt index 25ada5ba27..aba356d616 100644 --- a/tensorflow/contrib/android/cmake/CMakeLists.txt +++ b/tensorflow/contrib/android/cmake/CMakeLists.txt @@ -37,7 +37,7 @@ set_target_properties(lib_tf PROPERTIES IMPORTED_LOCATION set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIS_SLIM_BUILD \ -std=c++11 -fno-rtti -fno-exceptions \ -O2 -Wno-narrowing -fomit-frame-pointer \ - -mfpu=neon -mfloat-abi=softfp -fPIE \ + -mfpu=neon -mfloat-abi=softfp -fPIE -fPIC \ -ftemplate-depth=900 \ -DGOOGLE_PROTOBUF_NO_RTTI \ -DGOOGLE_PROTOBUF_NO_STATIC_INITIALIZER") diff --git a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py index 7f7697357c..73747db31c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/cauchy_test.py @@ -41,6 +41,7 @@ def try_import(name): # pylint: disable=invalid-name tf_logging.warning("Could not import %s: %s" % (name, str(e))) return module + stats = try_import("scipy.stats") @@ -62,9 +63,9 @@ class CauchyTest(test.TestCase): self.assertAllEqual(expected, scale_shape.eval()) loc = array_ops.zeros(loc_shape) scale = array_ops.ones(scale_shape) - self.assertAllEqual( - expected, - array_ops.shape(cauchy_lib.Cauchy(loc, scale).sample()).eval()) + self.assertAllEqual(expected, + array_ops.shape( + cauchy_lib.Cauchy(loc, scale).sample()).eval()) def _testParamStaticShapes(self, sample_shape, expected): param_shapes = cauchy_lib.Cauchy.param_static_shapes(sample_shape) @@ -92,8 +93,7 @@ class CauchyTest(test.TestCase): cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) log_pdf = cauchy.log_prob(x) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.eval().shape) self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) @@ -115,16 +115,15 @@ class CauchyTest(test.TestCase): with self.test_session(): batch_size = 6 loc = constant_op.constant([[3.0, -3.0]] * batch_size) - scale = constant_op.constant([[np.sqrt(10.0), np.sqrt(15.0)]] * - batch_size) + scale = constant_op.constant( + [[np.sqrt(10.0), np.sqrt(15.0)]] * batch_size) x = np.array([[-2.5, 2.5, 4.0, 0.0, -1.0, 2.0]], dtype=np.float32).T cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) log_pdf = cauchy.log_prob(x) log_pdf_values = log_pdf.eval() self.assertEqual(log_pdf.shape, (6, 2)) - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - log_pdf.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), log_pdf.eval().shape) self.assertAllEqual(cauchy.batch_shape, log_pdf.shape) @@ -248,8 +247,7 @@ class CauchyTest(test.TestCase): cauchy = cauchy_lib.Cauchy(loc=loc, scale=scale) entropy = cauchy.entropy() - self.assertAllEqual(cauchy.batch_shape_tensor().eval(), - entropy.shape) + self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.shape) self.assertAllEqual(cauchy.batch_shape_tensor().eval(), entropy.eval().shape) self.assertAllEqual(cauchy.batch_shape, entropy.shape) @@ -257,7 +255,7 @@ class CauchyTest(test.TestCase): if not stats: return - expected_entropy = stats.cauchy(loc, scale).entropy() + expected_entropy = stats.cauchy(loc, scale[0]).entropy().reshape((1, 3)) self.assertAllClose(expected_entropy, entropy.eval()) def testCauchyMode(self): @@ -368,8 +366,8 @@ class CauchyTest(test.TestCase): self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) - expected_shape = (tensor_shape.TensorShape( - [n.eval()]).concatenate(cauchy.batch_shape)) + expected_shape = ( + tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) @@ -385,18 +383,18 @@ class CauchyTest(test.TestCase): samples = cauchy.sample(n) sample_values = samples.eval() self.assertEqual(samples.shape, (100000, batch_size, 2)) - self.assertAllClose(np.median(sample_values[:, 0, 0]), - loc_v[0], atol=1e-1) - self.assertAllClose(np.median(sample_values[:, 0, 1]), - loc_v[1], atol=1e-1) + self.assertAllClose( + np.median(sample_values[:, 0, 0]), loc_v[0], atol=1e-1) + self.assertAllClose( + np.median(sample_values[:, 0, 1]), loc_v[1], atol=1e-1) expected_shape = tensor_shape.TensorShape([n.eval()]).concatenate( tensor_shape.TensorShape(cauchy.batch_shape_tensor().eval())) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) - expected_shape = (tensor_shape.TensorShape( - [n.eval()]).concatenate(cauchy.batch_shape)) + expected_shape = ( + tensor_shape.TensorShape([n.eval()]).concatenate(cauchy.batch_shape)) self.assertAllEqual(expected_shape, samples.shape) self.assertAllEqual(expected_shape, sample_values.shape) @@ -428,9 +426,12 @@ class CauchyTest(test.TestCase): self.assertEqual(cauchy.event_shape, ()) self.assertAllEqual(cauchy.event_shape_tensor().eval(), []) self.assertAllEqual( - sess.run(cauchy.batch_shape_tensor(), - feed_dict={loc: 5.0, - scale: [1.0, 2.0]}), [2]) + sess.run( + cauchy.batch_shape_tensor(), + feed_dict={ + loc: 5.0, + scale: [1.0, 2.0] + }), [2]) if __name__ == "__main__": diff --git a/tensorflow/contrib/distributions/python/ops/cauchy.py b/tensorflow/contrib/distributions/python/ops/cauchy.py index a17bb091f6..8d59c1abfb 100644 --- a/tensorflow/contrib/distributions/python/ops/cauchy.py +++ b/tensorflow/contrib/distributions/python/ops/cauchy.py @@ -30,7 +30,6 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops.distributions import distribution - __all__ = [ "Cauchy", ] @@ -97,7 +96,7 @@ class Cauchy(distribution.Distribution): validate_args=False, allow_nan_stats=True, name="Cauchy"): - """Construct Cauchy distributions with loc and and scale `loc` and `scale`. + """Construct Cauchy distributions. The parameters `loc` and `scale` must be shaped in a way that supports broadcasting (e.g. `loc + scale` is a valid operation). @@ -121,8 +120,8 @@ class Cauchy(distribution.Distribution): """ parameters = locals() with ops.name_scope(name, values=[loc, scale]): - with ops.control_dependencies([check_ops.assert_positive(scale)] if - validate_args else []): + with ops.control_dependencies([check_ops.assert_positive(scale)] + if validate_args else []): self._loc = array_ops.identity(loc, name="loc") self._scale = array_ops.identity(scale, name="scale") check_ops.assert_same_float_dtype([self._loc, self._scale]) @@ -138,8 +137,8 @@ class Cauchy(distribution.Distribution): @staticmethod def _param_shapes(sample_shape): return dict( - zip(("loc", "scale"), ([ops.convert_to_tensor( - sample_shape, dtype=dtypes.int32)] * 2))) + zip(("loc", "scale"), + ([ops.convert_to_tensor(sample_shape, dtype=dtypes.int32)] * 2))) @property def loc(self): @@ -153,13 +152,10 @@ class Cauchy(distribution.Distribution): def _batch_shape_tensor(self): return array_ops.broadcast_dynamic_shape( - array_ops.shape(self.loc), - array_ops.shape(self.scale)) + array_ops.shape(self.loc), array_ops.shape(self.scale)) def _batch_shape(self): - return array_ops.broadcast_static_shape( - self.loc.shape, - self.scale.shape) + return array_ops.broadcast_static_shape(self.loc.shape, self.scale.shape) def _event_shape_tensor(self): return constant_op.constant([], dtype=dtypes.int32) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 9378fe8799..f1debc8590 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -309,7 +309,6 @@ def _fused_batch_norm(inputs, new_shape = [-1, channels, 1, 1] inputs = array_ops.reshape(inputs, new_shape) inputs_shape = inputs.get_shape() - dtype = inputs.dtype.base_dtype if data_format == DATA_FORMAT_NHWC: params_shape = inputs_shape[-1:] else: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 5aa2253516..27bd3172d6 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1779,7 +1779,8 @@ class BatchNormTest(test.TestCase): dtype = dtypes.float32 height, width = 3, 3 with self.test_session(): - images = np.random.uniform(size=(5, height, width, 3)).astype(dtype.as_numpy_dtype) + images = np.random.uniform(size=(5, height, width, 3)).astype( + dtype.as_numpy_dtype) output = _layers.batch_norm(images, fused=fused) expected_name = ('BatchNorm/FusedBatchNorm' if fused else 'BatchNorm/batchnorm') @@ -2665,18 +2666,18 @@ class BatchNormTest(test.TestCase): # Test case for 11673 with self.test_session() as sess: a_32 = array_ops.placeholder(dtypes.float32, shape=(10, 10, 10, 10)) - b_32 = _layers.batch_norm(a_32, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + _layers.batch_norm( + a_32, center=False, data_format='NCHW', zero_debias_moving_mean=True) a_16 = array_ops.placeholder(dtypes.float16, shape=(10, 10, 10, 10)) - b_16 = _layers.batch_norm(a_16, center=False, data_format='NCHW', - zero_debias_moving_mean=True) + _layers.batch_norm( + a_16, center=False, data_format='NCHW', zero_debias_moving_mean=True) sess.run(variables_lib.global_variables_initializer()) def testVariablesAreFloat32(self): height, width = 3, 3 with self.test_session(): - images = random_ops.random_uniform((5, height, width, 3), - seed=1, dtype=dtypes.float16) + images = random_ops.random_uniform( + (5, height, width, 3), seed=1, dtype=dtypes.float16) _layers.batch_norm(images, scale=True) beta = variables.get_variables_by_name('beta')[0] gamma = variables.get_variables_by_name('gamma')[0] @@ -2691,17 +2692,13 @@ class BatchNormTest(test.TestCase): channels = shape[1] images = np.arange(np.product(shape), dtype=dtype).reshape(shape) beta = init_ops.constant_initializer( - np.arange( - 2, channels + 2, dtype=np.float32)) + np.arange(2, channels + 2, dtype=np.float32)) gamma = init_ops.constant_initializer( - np.arange( - 10, channels + 10, dtype=np.float32) * 2.0) + np.arange(10, channels + 10, dtype=np.float32) * 2.0) mean = init_ops.constant_initializer( - np.arange( - 3, channels + 3, dtype=np.float32) * 5.0) + np.arange(3, channels + 3, dtype=np.float32) * 5.0) variance = init_ops.constant_initializer( - np.arange( - 1, channels + 1, dtype=np.float32) * 4.0) + np.arange(1, channels + 1, dtype=np.float32) * 4.0) output = _layers.batch_norm( images, fused=True, @@ -2726,7 +2723,6 @@ class BatchNormTest(test.TestCase): res_16 = self._runFusedBatchNorm(shape, np.float16) self.assertAllClose(res_32, res_16, rtol=1e-3) - def testAdjustmentCreated(self): # Tests that the adjustment is appropriately passed to and used by the core # BN layer. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index db18ebf05d..86fad4c553 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,7 +28,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -369,10 +368,11 @@ class DataFeeder(object): if x_is_dict: num_samples = list(self._x.values())[0].shape[0] elif tensor_util.is_tensor(self._x): - num_samples = self._x.shape[0].value # shape will be a Dimension, extract an int + num_samples = self._x.shape[ + 0].value # shape will be a Dimension, extract an int else: num_samples = self._x.shape[0] - + if self._shuffle: self.indices = self.random_state.permutation(num_samples) else: diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index 86d8484391..7526f3ae0d 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -251,8 +251,9 @@ class SdcaModel(object): result_dense = 0.0 for i in range(len(dense_variables)): - result_dense += math_ops.matmul( - dense_features[i], array_ops.expand_dims(dense_variables[i], -1)) + result_dense += math_ops.matmul(dense_features[i], + array_ops.expand_dims( + dense_variables[i], -1)) # Reshaping to allow shape inference at graph construction time. return array_ops.reshape(result_dense, [-1]) + result_sparse diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index b122818221..5bca82ded0 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -40,6 +40,7 @@ from six import StringIO # TODO(aselle): Disable GPU for now os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +# pylint: disable=g-import-not-at-top import tensorflow as tf from google.protobuf import text_format # TODO(aselle): switch to TensorFlow's resource_loader @@ -383,7 +384,7 @@ def make_zip_of_tests(zip_path, report["toco_log"] = "" tf.reset_default_graph() - with tf.device('/cpu:0'): + with tf.device("/cpu:0"): try: inputs, outputs = make_graph(param_dict_real) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 4c60c99342..04643a6058 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -34,12 +34,18 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'PowerSignOptimizer', 'AddSignOptimizer' + 'PowerSignOptimizer', + 'AddSignOptimizer' 'DelayCompensatedGradientDescentOptimizer', - 'DropStaleGradientOptimizer', 'ExternalOptimizerInterface', - 'LazyAdamOptimizer', 'NadamOptimizer', 'MovingAverageOptimizer', - 'ScipyOptimizerInterface', 'VariableClippingOptimizer', - 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', + 'DropStaleGradientOptimizer', + 'ExternalOptimizerInterface', + 'LazyAdamOptimizer', + 'NadamOptimizer', + 'MovingAverageOptimizer', + 'ScipyOptimizerInterface', + 'VariableClippingOptimizer', + 'MultitaskOptimizerWrapper', + 'clip_gradients_by_global_norm', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py index c26037935d..cb6c77a86f 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper.py @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - -"""An optimizer wrapper that ensures correct behaviour -of stateful optimizers with multitask loss.""" +"""An optimizer wrapper for stateful optimizers with multitask loss.""" from __future__ import absolute_import from __future__ import division @@ -30,26 +28,27 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.training import optimizer -__all__ = ["MultitaskOptimizerWrapper", - "clip_gradients_by_global_norm"] +__all__ = ['MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm'] + def _is_all_zeros(grad): all_zeros = math_ops.equal(math_ops.count_nonzero(grad), 0) return all_zeros + def _get_wrapper(fn, opt): + def wrapper(self, grad, *args, **kwargs): # pylint: disable=unused-argument all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond( - all_zeros, - control_flow_ops.no_op, - lambda: fn(grad, *args, **kwargs)) + return control_flow_ops.cond(all_zeros, control_flow_ops.no_op, + lambda: fn(grad, *args, **kwargs)) + wrapper = types.MethodType(wrapper, opt) return wrapper + class MultitaskOptimizerWrapper(object): - """Optimizer wrapper that ensures that - all-zero gradients don't affect the optimizer state. + """Optimizer wrapper making all-zero gradients harmless. This might be useful when a multi-task loss is used, and some components of the loss might be @@ -88,20 +87,20 @@ class MultitaskOptimizerWrapper(object): gradvars_clipped, global_step=batch) ``` """ + def __init__(self, opt): - """ + """Constructor. + Args: - opt: an instance of a class that implements tf.train.Optimizer. + opt: an instance of a class that implements tf.train.Optimizer. """ if not isinstance(opt, optimizer.Optimizer): raise TypeError( - "Supplied optimizer must be an instance of tf.train.Optimizer") + 'Supplied optimizer must be an instance of tf.train.Optimizer') self._opt = opt - overriden_methods = ('_apply_dense', - '_resource_apply_dense', - '_apply_sparse', - '_resource_apply_sparse') - for name in overriden_methods: + overridden_methods = ('_apply_dense', '_resource_apply_dense', + '_apply_sparse', '_resource_apply_sparse') + for name in overridden_methods: fn = getattr(self._opt, name) wrapper = _get_wrapper(fn, self._opt) setattr(self._opt, name, wrapper) @@ -112,27 +111,30 @@ class MultitaskOptimizerWrapper(object): def clip_gradients_by_global_norm(gradients_variables, clip_norm=20.): """Clips gradients of a multitask loss by their global norm. + Ignores all-zero tensors when computing the global norm. Args: - gradients_variables: a list of pairs (gradient, variable). - clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. + gradients_variables: a list of pairs (gradient, variable). + clip_norm: a float Tensor, the global norm to clip on. Default is 20.0. Returns: - list: A list of pairs of the same type as gradients_variables,. - fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. + list: A list of pairs of the same type as gradients_variables,. + fixed_global_norm: A 0-D (scalar) Tensor representing the global norm. """ gradients, variables = six.moves.zip(*gradients_variables) + def _replace_nonexisting_grad(grad): if grad is None: return grad all_zeros = _is_all_zeros(grad) - return control_flow_ops.cond(all_zeros, - lambda: array_ops.zeros( - [], dtype=dtypes.as_dtype(grad.dtype)), - lambda: grad) + return control_flow_ops.cond( + all_zeros, + lambda: array_ops.zeros([], dtype=dtypes.as_dtype(grad.dtype)), + lambda: grad) + nonzero_gradients = [_replace_nonexisting_grad(g) for g in gradients] fixed_global_norm = clip_ops.global_norm(nonzero_gradients) - gradients, _ = clip_ops.clip_by_global_norm(gradients, clip_norm, - use_norm=fixed_global_norm) + gradients, _ = clip_ops.clip_by_global_norm( + gradients, clip_norm, use_norm=fixed_global_norm) return list(six.moves.zip(gradients, variables)), fixed_global_norm diff --git a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py index b06213f715..618d8eb18d 100644 --- a/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py +++ b/tensorflow/contrib/opt/python/training/multitask_optimizer_wrapper_test.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np +import six + from tensorflow.contrib.opt.python.training import multitask_optimizer_wrapper from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -25,13 +28,11 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import momentum -import numpy as np -import six class MultitaskOptimizerWrapperTest(test.TestCase): + """Tests for the multitask optimizer wrapper. """ - Tests for the multitask optimizer wrapper. - """ + def testWrapper(self): with self.test_session(): var0 = variables.Variable([1.0, 2.0], dtype=dtypes.float32) @@ -39,12 +40,10 @@ class MultitaskOptimizerWrapperTest(test.TestCase): grads0 = constant_op.constant([0.1, 0.1], dtype=dtypes.float32) grads1 = constant_op.constant([0.01, 0.01], dtype=dtypes.float32) grads_allzero = constant_op.constant([0.0, 0.0], dtype=dtypes.float32) - mom_opt_impl = momentum.MomentumOptimizer( - learning_rate=2.0, momentum=0.9) + mom_opt_impl = momentum.MomentumOptimizer(learning_rate=2.0, momentum=0.9) mom_opt = multitask_optimizer_wrapper.MultitaskOptimizerWrapper( mom_opt_impl) - mom_update = mom_opt.apply_gradients( - zip([grads0, grads1], [var0, var1])) + mom_update = mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) mom_update_partial = mom_opt.apply_gradients( zip([grads_allzero, grads1], [var0, var1])) mom_update_no_action = mom_opt.apply_gradients( @@ -63,14 +62,13 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 1: normal momentum update. self.evaluate(mom_update) # Check that the momentum accumulators have been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) - self.assertAllCloseAccordingToType(np.array([0.01, 0.01]), - self.evaluate(slot1)) + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.01, 0.01]), self.evaluate(slot1)) # Check that the parameters have been updated. self.assertAllCloseAccordingToType( - np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), - self.evaluate(var0)) + np.array([1.0 - (0.1 * 2.0), 2.0 - (0.1 * 2.0)]), self.evaluate(var0)) self.assertAllCloseAccordingToType( np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) @@ -78,8 +76,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 2: momentum update that changes only slot1 but not slot0. self.evaluate(mom_update_partial) # Check that only the relevant momentum accumulator has been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) @@ -87,8 +85,8 @@ class MultitaskOptimizerWrapperTest(test.TestCase): # Step 3: momentum update that does not change anything. self.evaluate(mom_update_no_action) # Check that the momentum accumulators have *NOT* been updated. - self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), - self.evaluate(slot0)) + self.assertAllCloseAccordingToType( + np.array([0.1, 0.1]), self.evaluate(slot0)) self.assertAllCloseAccordingToType( np.array([(0.9 * 0.01 + 0.01), (0.9 * 0.01 + 0.01)]), self.evaluate(slot1)) @@ -105,8 +103,9 @@ class MultitaskOptimizerWrapperTest(test.TestCase): grads3 = None varlist = [var0, var1, var2, var3] gradients = [grads0, grads1, grads2, grads3] - clipped_gradvars, global_norm = multitask_optimizer_wrapper.clip_gradients_by_global_norm( - six.moves.zip(gradients, varlist), clip_norm=1.0) + clipped_gradvars, global_norm = ( + multitask_optimizer_wrapper.clip_gradients_by_global_norm( + six.moves.zip(gradients, varlist), clip_norm=1.0)) clipped_grads = list(six.moves.zip(*clipped_gradvars))[0] reference_global_norm = np.sqrt(np.sum(np.square([10.0, 15.0, 0.0, 5.0]))) self.assertAllCloseAccordingToType( @@ -115,5 +114,6 @@ class MultitaskOptimizerWrapperTest(test.TestCase): self.evaluate(clipped_grads[2]), np.array([0., 0.])) self.assertEqual(clipped_grads[3], None) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 16b6d145e3..f130a2187c 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.rnn.python.ops import core_rnn_cell +from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell from tensorflow.core.protobuf import config_pb2 from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -38,9 +39,6 @@ from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test -from tensorflow.python.framework import test_util -from tensorflow.contrib.rnn.python.ops import rnn_cell as contrib_rnn_cell - # pylint: enable=protected-access @@ -374,19 +372,20 @@ class RNNCellTest(test.TestCase): h = array_ops.zeros([batch_size, num_proj]) state = rnn_cell_impl.LSTMStateTuple(c, h) cell = contrib_rnn_cell.LayerNormLSTMCell( - num_units=num_units, - num_proj=num_proj, - forget_bias=1.0, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) + num_units=num_units, + num_proj=num_proj, + forget_bias=1.0, + layer_norm=True, + norm_gain=1.0, + norm_shift=0.0) g, out_m = cell(x, state) sess.run([variables_lib.global_variables_initializer()]) - res = sess.run([g, out_m], { - x.name: np.ones((batch_size, input_size)), - c.name: 0.1 * np.ones((batch_size, num_units)), - h.name: 0.1 * np.ones((batch_size, num_proj)) - }) + res = sess.run( + [g, out_m], { + x.name: np.ones((batch_size, input_size)), + c.name: 0.1 * np.ones((batch_size, num_units)), + h.name: 0.1 * np.ones((batch_size, num_proj)) + }) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is mostly just a # smoke test. @@ -396,9 +395,9 @@ class RNNCellTest(test.TestCase): # Different inputs so different outputs and states for i in range(1, batch_size): self.assertTrue( - float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) + float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) < 1e-6) self.assertTrue( - float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) + float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) < 1e-6) def testOutputProjectionWrapper(self): with self.test_session() as sess: diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index b4a5f2d7eb..46823fa364 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -996,26 +996,19 @@ class RNNCellTest(test.TestCase): output, state = cell(x, hidden) sess.run([variables.global_variables_initializer()]) - res = sess.run([output, state], { - hidden[0].name: - np.array([[[[[1.],[1.]], - [[1.],[1.]]], - [[[1.],[1.]], - [[1.],[1.]]]], - [[[[2.],[2.]], - [[2.],[2.]]], - [[[2.],[2.]], - [[2.],[2.]]]]]), - x.name: - np.array([[[[[1.],[1.]], - [[1.],[1.]]], - [[[1.],[1.]], - [[1.],[1.]]]], - [[[[2.],[2.]], - [[2.],[2.]]], - [[[2.],[2.]], - [[2.],[2.]]]]]) - }) + res = sess.run( + [output, state], { + hidden[0].name: + np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ + 1. + ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], + [[[2.], [2.]], [[2.], [2.]]]]]), + x.name: + np.array([[[[[1.], [1.]], [[1.], [1.]]], [[[1.], [1.]], [[ + 1. + ], [1.]]]], [[[[2.], [2.]], [[2.], [2.]]], [[[2.], [2.]], + [[2.], [2.]]]]]) + }) # This is a smoke test, making sure expected values are unchanged. self.assertEqual(len(res), 2) self.assertAllClose(res[0], res[1].h) @@ -1276,10 +1269,8 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(res[2].c, expected_c1, 1e-5) self.assertAllClose(res[2].h, expected_h1, 1e-5) - def testBasicLSTMCellWithStateTupleLayerNorm(self): - """The results of LSTMCell and LayerNormBasicLSTMCell - should be same. """ + """The results of LSTMCell and LayerNormBasicLSTMCell should be the same.""" with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): @@ -1290,21 +1281,21 @@ class LayerNormBasicLSTMCellTest(test.TestCase): c1 = array_ops.zeros([1, 2]) h1 = array_ops.zeros([1, 2]) state1 = rnn_cell_impl.LSTMStateTuple(c1, h1) - cell = rnn_cell_impl.MultiRNNCell( - [contrib_rnn_cell.LayerNormLSTMCell( - 2, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0) for _ in range(2)]) + cell = rnn_cell_impl.MultiRNNCell([ + contrib_rnn_cell.LayerNormLSTMCell( + 2, layer_norm=True, norm_gain=1.0, norm_shift=0.0) + for _ in range(2) + ]) h, (s0, s1) = cell(x, (state0, state1)) sess.run([variables.global_variables_initializer()]) - res = sess.run([h, s0, s1], { - x.name: np.array([[1., 1.]]), - c0.name: 0.1 * np.asarray([[0, 1]]), - h0.name: 0.1 * np.asarray([[2, 3]]), - c1.name: 0.1 * np.asarray([[4, 5]]), - h1.name: 0.1 * np.asarray([[6, 7]]), - }) + res = sess.run( + [h, s0, s1], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + c1.name: 0.1 * np.asarray([[4, 5]]), + h1.name: 0.1 * np.asarray([[6, 7]]), + }) expected_h = np.array([[-0.38079708, 0.38079708]]) expected_h0 = np.array([[-0.38079708, 0.38079708]]) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 5e85c125df..0698d40438 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -36,7 +36,6 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import rnn_cell_impl from tensorflow.python.ops import variable_scope as vs -from tensorflow.python.ops import partitioned_variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest @@ -115,7 +114,7 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): The class uses optional peep-hole connections, and an optional projection layer. - + Layer normalization implementation is based on: https://arxiv.org/abs/1607.06450. @@ -124,15 +123,24 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton and is applied before the internal nonlinearities. - + """ - def __init__(self, num_units, use_peepholes=False, - initializer=None, num_proj=None, proj_clip=None, - num_unit_shards=1, num_proj_shards=1, - forget_bias=1.0, state_is_tuple=True, - activation=math_ops.tanh, reuse=None, - layer_norm=False, norm_gain=1.0, norm_shift=0.0): + def __init__(self, + num_units, + use_peepholes=False, + initializer=None, + num_proj=None, + proj_clip=None, + num_unit_shards=1, + num_proj_shards=1, + forget_bias=1.0, + state_is_tuple=True, + activation=math_ops.tanh, + reuse=None, + layer_norm=False, + norm_gain=1.0, + norm_shift=0.0): """Initialize the parameters for an LSTM cell. Args: @@ -164,8 +172,6 @@ class CoupledInputForgetGateLSTMCell(rnn_cell_impl.RNNCell): `layer_norm` has been set to `False`, this argument will be ignored. norm_shift: float, The layer normalization shift initial value. If `layer_norm` has been set to `False`, this argument will be ignored. - - """ super(CoupledInputForgetGateLSTMCell, self).__init__(_reuse=reuse) if not state_is_tuple: @@ -2049,8 +2055,8 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell): if self._skip_connection: self._total_output_channels += self._input_shape[-1] - state_size = tensor_shape.TensorShape(self._input_shape[:-1] - + [self._output_channels]) + state_size = tensor_shape.TensorShape( + self._input_shape[:-1] + [self._output_channels]) self._state_size = rnn_cell_impl.LSTMStateTuple(state_size, state_size) self._output_size = tensor_shape.TensorShape(self._input_shape[:-1] + [self._total_output_channels]) @@ -2110,11 +2116,8 @@ class Conv3DLSTMCell(ConvLSTMCell): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) -def _conv(args, - filter_size, - num_features, - bias, - bias_start=0.0): + +def _conv(args, filter_size, num_features, bias, bias_start=0.0): """convolution: Args: args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, @@ -2391,12 +2394,19 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): """ - def __init__(self, num_units, - use_peepholes=False, cell_clip=None, - initializer=None, num_proj=None, proj_clip=None, + def __init__(self, + num_units, + use_peepholes=False, + cell_clip=None, + initializer=None, + num_proj=None, + proj_clip=None, forget_bias=1.0, - activation=None, layer_norm=False, - norm_gain=1.0, norm_shift=0.0, reuse=None): + activation=None, + layer_norm=False, + norm_gain=1.0, + norm_shift=0.0, + reuse=None): """Initialize the parameters for an LSTM cell. Args: @@ -2457,7 +2467,6 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): def output_size(self): return self._output_size - def _linear(self, args, output_size, @@ -2507,9 +2516,9 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): scope = vs.get_variable_scope() with vs.variable_scope(scope) as outer_scope: weights = vs.get_variable( - "kernel", [total_arg_size, output_size], - dtype=dtype, - initializer=kernel_initializer) + "kernel", [total_arg_size, output_size], + dtype=dtype, + initializer=kernel_initializer) if len(args) == 1: res = math_ops.matmul(args[0], weights) else: @@ -2521,9 +2530,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): if bias_initializer is None: bias_initializer = init_ops.constant_initializer(0.0, dtype=dtype) biases = vs.get_variable( - "bias", [output_size], - dtype=dtype, - initializer=bias_initializer) + "bias", [output_size], dtype=dtype, initializer=bias_initializer) if not layer_norm: res = nn_ops.bias_add(res, biases) @@ -2554,7 +2561,6 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): ValueError: If input size cannot be inferred from inputs via static shape inference. """ - num_proj = self._num_units if self._num_proj is None else self._num_proj sigmoid = math_ops.sigmoid (c_prev, m_prev) = state @@ -2567,10 +2573,14 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): with vs.variable_scope(scope, initializer=self._initializer) as unit_scope: # i = input_gate, j = new_input, f = forget_gate, o = output_gate - lstm_matrix = self._linear([inputs, m_prev], 4 * self._num_units, bias=True, - bias_initializer=None, layer_norm=self._layer_norm) + lstm_matrix = self._linear( + [inputs, m_prev], + 4 * self._num_units, + bias=True, + bias_initializer=None, + layer_norm=self._layer_norm) i, j, f, o = array_ops.split( - value=lstm_matrix, num_or_size_splits=4, axis=1) + value=lstm_matrix, num_or_size_splits=4, axis=1) if self._layer_norm: i = _norm(self._norm_gain, self._norm_shift, i, "input") @@ -2580,20 +2590,22 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): # Diagonal connections if self._use_peepholes: - with vs.variable_scope(unit_scope) as projection_scope: + with vs.variable_scope(unit_scope): w_f_diag = vs.get_variable( - "w_f_diag", shape=[self._num_units], dtype=dtype) + "w_f_diag", shape=[self._num_units], dtype=dtype) w_i_diag = vs.get_variable( - "w_i_diag", shape=[self._num_units], dtype=dtype) + "w_i_diag", shape=[self._num_units], dtype=dtype) w_o_diag = vs.get_variable( - "w_o_diag", shape=[self._num_units], dtype=dtype) + "w_o_diag", shape=[self._num_units], dtype=dtype) if self._use_peepholes: - c = (sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + - sigmoid(i + w_i_diag * c_prev) * self._activation(j)) + c = ( + sigmoid(f + self._forget_bias + w_f_diag * c_prev) * c_prev + + sigmoid(i + w_i_diag * c_prev) * self._activation(j)) else: - c = (sigmoid(f + self._forget_bias) * c_prev + sigmoid(i) * - self._activation(j)) + c = ( + sigmoid(f + self._forget_bias) * c_prev + + sigmoid(i) * self._activation(j)) if self._layer_norm: c = _norm(self._norm_gain, self._norm_shift, c, "state") @@ -2608,7 +2620,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): m = sigmoid(o) * self._activation(c) if self._num_proj is not None: - with vs.variable_scope("projection") as proj_scope: + with vs.variable_scope("projection"): m = self._linear(m, self._num_proj, bias=False) if self._proj_clip is not None: diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index c3b180d9f4..e87ef41388 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -192,7 +192,8 @@ class _BaseAttentionMechanism(AttentionMechanism): raise TypeError("probability_fn must be callable, saw type: %s" % type(probability_fn).__name__) if score_mask_value is None: - score_mask_value = dtypes.as_dtype(self._memory_layer.dtype).as_numpy_dtype(-np.inf) + score_mask_value = dtypes.as_dtype( + self._memory_layer.dtype).as_numpy_dtype(-np.inf) self._probability_fn = lambda score, prev: ( # pylint:disable=g-long-lambda probability_fn( _maybe_mask_score(score, memory_sequence_length, score_mask_value), @@ -1145,7 +1146,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): % (len(attention_layer_sizes), len(attention_mechanisms))) self._attention_layers = tuple( layers_core.Dense( - attention_layer_size, name="attention_layer", use_bias=False, + attention_layer_size, + name="attention_layer", + use_bias=False, dtype=attention_mechanisms[i].dtype) for i, attention_layer_size in enumerate(attention_layer_sizes)) self._attention_layer_size = sum(attention_layer_sizes) diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 331943a3ef..ac8d994502 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -16,8 +16,8 @@ limitations under the License. #ifdef TENSORFLOW_USE_VERBS #include "tensorflow/contrib/verbs/rdma.h" -#include <cstdlib> #include <fcntl.h> +#include <cstdlib> #include "tensorflow/contrib/verbs/verbs_util.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/dma_helper.h" @@ -137,7 +137,7 @@ ibv_device* set_device() { if (!env_p_rdma_device.empty()) { for (device_index = 0; device_index < dev_num; device_index++) { if (!env_p_rdma_device.compare( - ibv_get_device_name(dev_list[device_index]))) { + ibv_get_device_name(dev_list[device_index]))) { CHECK(get_dev_active_port_count(dev_list[device_index]) != 0) << "Device " << ibv_get_device_name(dev_list[device_index]) << " has no active ports"; @@ -147,7 +147,7 @@ ibv_device* set_device() { // check validity of input device CHECK(false) << "The device " << env_p_rdma_device << " wasn't found"; } else { - // set default device + // set default device str_port_num = get_env_var("RDMA_DEVICE_PORT"); CHECK(str_port_num.empty()) << "RDMA_DEVICE should be provided if RDMA_DEVICE_PORT is set by user"; @@ -177,7 +177,7 @@ ibv_device* set_device() { // Returns: // port to use uint8_t set_port(ibv_context* context) { - uint8_t port_num = 0; //0 is illegal port number + uint8_t port_num = 0; // 0 is illegal port number string str_port_num; ibv_device_attr device_att; ibv_port_attr port_attr; @@ -199,9 +199,7 @@ uint8_t set_port(ibv_context* context) { // check if port id active CHECK(port_attr.state == IBV_PORT_ACTIVE) << "Selected RDMA_DEVICE_PORT is not active"; - } - // set default port - else { + } else { // set default port for (port_index = 1; port_index <= device_att.phys_port_cnt; port_index++) { rc = ibv_query_port(context, port_index, &port_attr); CHECK(!rc) << "Failed to query the port" << port_index; @@ -269,7 +267,7 @@ bool is_gid_type_roce_v2(ibv_context* context, uint8_t port_num, // Function to set GID index. // If the port link is IB, no GID index should be selected. // If Ethernet but RDMA_GID_INDEX not set gid index that supports -// RoCE V2 will be chosen(fails if more then one IP is configured) +// RoCE V2 will be chosen(fails if more than one IP is configured) // Args: // context - device context // port_num - port number @@ -302,7 +300,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { } } switch (port_attr.link_layer) { - case(IBV_LINK_LAYER_ETHERNET) : + case (IBV_LINK_LAYER_ETHERNET): gid_str = get_env_var("RDMA_GID_INDEX"); if (!gid_str.empty()) { gid_index = stoi(gid_str); @@ -313,7 +311,7 @@ uint8_t set_gid(uint8_t port_num, ibv_context* context) { << "More than one IP is available, please specify GID_INDEX"; } break; - case(IBV_LINK_LAYER_INFINIBAND) : // no need in GID index + case (IBV_LINK_LAYER_INFINIBAND): // no need in GID index break; default: LOG(INFO) << "Unknown port link layer. Currently supporting Ethernet and " @@ -374,7 +372,8 @@ enum ibv_mtu set_mtu(uint8_t port_num, ibv_context* context) { break; default: CHECK(0) << "Error: MTU input value must be one of the following: 256, " - "512, 1024, 2048, 4096. MTU " << mtu << " is invalid\n"; + "512, 1024, 2048, 4096. MTU " + << mtu << " is invalid\n"; break; } CHECK(mtu < port_attr.active_mtu) @@ -453,9 +452,9 @@ void RdmaAdapter::Process_CQ() { CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) - << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " - << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " " - << wc_[i].vendor_err; + << "Failed status \n" + << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " + << static_cast<int>(wc_[i].wr_id) << " " << wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { RdmaChannel* rc = reinterpret_cast<RdmaChannel*>(wc_[i].wr_id); // put back a recv wr. @@ -611,7 +610,7 @@ RdmaChannel::RdmaChannel(const RdmaAdapter* adapter, const string local_name, // create message and ack buffers, then initialize the tables. { const string buffer_names[] = {"tx_message_buffer", "rx_message_buffer", - "tx_ack_buffer", "rx_ack_buffer"}; + "tx_ack_buffer", "rx_ack_buffer"}; tx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[0]); rx_message_buffer_ = new RdmaMessageBuffer(this, buffer_names[1]); tx_ack_buffer_ = new RdmaAckBuffer(this, buffer_names[2]); @@ -672,7 +671,7 @@ void RdmaChannel::SetRemoteAddress(const RdmaAddress& ra, bool override) { void RdmaChannel::Recv() { struct ibv_recv_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t) this; + wr.wr_id = (uint64_t)this; struct ibv_recv_wr* bad_wr; CHECK(!ibv_post_recv(qp_, &wr, &bad_wr)) << "Failed to post recv"; } @@ -826,11 +825,11 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.ah_attr.grh.traffic_class = adapter_->params_.traffic_class; int r; - CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_AV | - IBV_QP_PATH_MTU | - IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | - IBV_QP_MAX_DEST_RD_ATOMIC | - IBV_QP_MIN_RNR_TIMER))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, + IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER))) << "QP to Ready to Receive " << r; memset(&attr, 0, sizeof(ibv_qp_attr)); @@ -841,10 +840,10 @@ void RdmaChannel::Connect(const RdmaAddress& remoteAddr) { attr.rnr_retry = 7; /* infinite */ attr.max_rd_atomic = 1; - CHECK(!(r = ibv_modify_qp(qp_, &attr, IBV_QP_STATE | IBV_QP_TIMEOUT | - IBV_QP_RETRY_CNT | - IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | - IBV_QP_MAX_QP_RD_ATOMIC))) + CHECK(!(r = ibv_modify_qp(qp_, &attr, + IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC))) << "QP to Ready to Send " << r; connected_ = true; @@ -931,7 +930,7 @@ void RdmaBuffer::Write(uint32_t imm_data, size_t buffer_size) { struct ibv_send_wr wr; memset(&wr, 0, sizeof(wr)); - wr.wr_id = (uint64_t) this; + wr.wr_id = (uint64_t)this; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM; @@ -1026,9 +1025,9 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( TensorProto proto; if (src_dev->tensorflow_gpu_device_info() && (!send_args.alloc_attrs.on_host())) { - CHECK(send_args.device_context) << "send dev name: " << src_dev->name() - << " gpu_info: " - << src_dev->tensorflow_gpu_device_info(); + CHECK(send_args.device_context) + << "send dev name: " << src_dev->name() + << " gpu_info: " << src_dev->tensorflow_gpu_device_info(); if (can_memcpy) { AllocatorAttributes host_alloc_attrs; @@ -1054,8 +1053,8 @@ Rendezvous::DoneCallback RdmaTensorBuffer::getRecvTensorCallback( // aync instead GPUUtil::SetProtoFromGPU( in, src_dev, send_args.device_context, &proto, is_dead, - [this, proto, buffer_size, key, in, step_id, key_with_step_id, - is_dead, send_args, recv_args](const Status& s) mutable { + [this, proto, buffer_size, key, in, step_id, key_with_step_id, + is_dead, send_args, recv_args](const Status& s) mutable { CHECK(s.ok()) << "copy proto from gpu sync"; auto tensor_bytes = proto.ByteSize(); buffer_size += tensor_bytes; diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt new file mode 100644 index 0000000000..cd7ec6e551 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UniqueV2.pbtxt @@ -0,0 +1,47 @@ +op { + graph_op_name: "UniqueV2" + in_arg { + name: "x" + description: <<END +A `Tensor`. +END + } + in_arg { + name: "axis" + description: <<END +A `Tensor` of type `int64` (default: 0). The axis of the Tensor to +find the unique elements. +END + } + out_arg { + name: "y" + description: <<END +A `Tensor`. Unique elements along the `axis` of `Tensor` x. +END + } + out_arg { + name: "idx" + description: <<END +A 1-D Tensor. Has the same type as x that contains the index of each +value of x in the output y. +END + } + summary: "Finds unique elements in a 1-D tensor." + description: <<END +This operation returns a tensor `y` containing all of the unique elements of `x` +sorted in the same order that they occur in `x`. This operation also returns a +tensor `idx` the same size as `x` that contains the index of each value of `x` +in the unique output `y`. In other words: + +`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` + +For example: + +``` +# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] +y, idx = unique(x) +y ==> [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +``` +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index 0a3355cdbc..77a96d1e03 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -26,6 +26,8 @@ need not be sorted and need not cover all values in the full range of valid values. If the sum is empty for a given segment ID `i`, `output[i] = 0`. +If the given segment ID `i` is negative, the value is dropped and will not be +added to the sum of the segment. `num_segments` should equal the number of distinct segment IDs. diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index d0dba6e1f0..223dd12f8f 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -455,7 +455,7 @@ class Graph { // the corresponding NodeDef to reflect the change. // REQUIRES: The control edge must exist. void RemoveControlEdge(const Edge* e); - + // Updates the input to a node. The existing edge to `dst` is removed and an // edge from `new_src` to `dst` is created. The NodeDef associated with `dst` // is also updated. diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc index 2aa1b31e15..e2ce0ba046 100644 --- a/tensorflow/core/graph/graph_test.cc +++ b/tensorflow/core/graph/graph_test.cc @@ -118,11 +118,9 @@ class GraphTest : public ::testing::Test { LOG(FATAL) << name; } - bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, - const Node* dst) { - for (const Edge *e : dst->in_edges()) { - if (e->IsControlEdge() && - e->src() == src && + bool ControlEdgeExistsInGraphOrNodeDef(const Node* src, const Node* dst) { + for (const Edge* e : dst->in_edges()) { + if (e->IsControlEdge() && e->src() == src && e->src_output() == Graph::kControlSlot && e->dst_input() == Graph::kControlSlot) { return true; diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f1cb9a1860..b4a5a3c796 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1720,6 +1720,7 @@ tf_cuda_cc_tests( ":data_flow", ":ops_testutil", ":ops_util", + "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/kernels/bincount_op.cc b/tensorflow/core/kernels/bincount_op.cc index 766d63e3be..890fa3121b 100644 --- a/tensorflow/core/kernels/bincount_op.cc +++ b/tensorflow/core/kernels/bincount_op.cc @@ -97,8 +97,9 @@ class BincountOp : public OpKernel { const Tensor& weights_t = ctx->input(2); int32 size = size_tensor.scalar<int32>()(); - OP_REQUIRES(ctx, size >= 0, errors::InvalidArgument( - "size (", size, ") must be non-negative")); + OP_REQUIRES( + ctx, size >= 0, + errors::InvalidArgument("size (", size, ") must be non-negative")); const auto arr = arr_t.flat<int32>(); const auto weights = weights_t.flat<T>(); diff --git a/tensorflow/core/kernels/bincount_op.h b/tensorflow/core/kernels/bincount_op.h index 0f8dd2b82a..cd3d560cd1 100644 --- a/tensorflow/core/kernels/bincount_op.h +++ b/tensorflow/core/kernels/bincount_op.h @@ -16,11 +16,11 @@ limitations under the License. #ifndef TENSORFLOW_BINCOUNT_OP_H_ #define TENSORFLOW_BINCOUNT_OP_H_ +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/errors.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc index ae9e26ffdf..6074b3e1f6 100644 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -17,12 +17,12 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/kernels/bincount_op.h" #include "external/cub_archive/cub/device/device_histogram.cuh" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/bincount_op.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -93,8 +93,8 @@ struct BincountFunctor<GPUDevice, T> { /* num_samples */ num_samples, /* stream */ stream); if (err != cudaSuccess) { - return errors::Internal("Could not launch HistogramEven: ", - cudaGetErrorString(err), "."); + return errors::Internal( + "Could not launch HistogramEven: ", cudaGetErrorString(err), "."); } return Status::OK(); } diff --git a/tensorflow/core/kernels/bincount_op_test.cc b/tensorflow/core/kernels/bincount_op_test.cc index 14becc87a7..cb04b40637 100644 --- a/tensorflow/core/kernels/bincount_op_test.cc +++ b/tensorflow/core/kernels/bincount_op_test.cc @@ -30,8 +30,8 @@ static Graph* Bincount(int arr_size, int nbins) { Tensor arr(DT_INT32, TensorShape({arr_size})); arr.flat<int32>() = arr.flat<int32>().setRandom().abs(); - Tensor size(DT_INT32, TensorShape({(int32)1})); - size.flat<int32>()(0) = (int32)nbins; + Tensor size(DT_INT32, TensorShape({static_cast<int32>(1)})); + size.flat<int32>()(0) = static_cast<int32>(nbins); Tensor weights(DT_INT32, TensorShape({0})); diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc index aafbbe41b4..325dee793b 100644 --- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc @@ -77,10 +77,10 @@ struct BucketizeFunctor<GPUDevice, T> { TF_RETURN_IF_ERROR(boundaries_array.Finalize()); CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); - BucketizeCustomKernel< - T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>( - input.size(), input.data(), boundaries_vector.size(), - boundaries_array.data(), output.data()); + BucketizeCustomKernel<T> + <<<config.block_count, config.thread_per_block, 0, d.stream()>>>( + input.size(), input.data(), boundaries_vector.size(), + boundaries_array.data(), output.data()); return Status::OK(); } diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index f819fccbfb..c2d24d1f12 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -1101,29 +1101,27 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel { bool cudnn_use_autotune_; }; - - #define REGISTER_GPU_KERNEL(T) \ REGISTER_KERNEL_BUILDER( \ Name("Conv3DBackpropInput").Device(DEVICE_GPU).TypeConstraint<T>("T"), \ Conv3DBackpropInputOp<GPUDevice, T>); \ REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropInputV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint<T>("T") \ - .HostMemory("input_sizes"), \ - Conv3DBackpropInputOp<GPUDevice, T>); \ + .Device(DEVICE_GPU) \ + .TypeConstraint<T>("T") \ + .HostMemory("input_sizes"), \ + Conv3DBackpropInputOp<GPUDevice, T>); \ REGISTER_KERNEL_BUILDER( \ - Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"), \ - Conv3DBackpropFilterOp<GPUDevice, T>); \ + Name("Conv3DBackpropFilter").Device(DEVICE_GPU).TypeConstraint<T>("T"), \ + Conv3DBackpropFilterOp<GPUDevice, T>); \ REGISTER_KERNEL_BUILDER(Name("Conv3DBackpropFilterV2") \ - .Device(DEVICE_GPU) \ - .TypeConstraint<T>("T") \ - .HostMemory("filter_sizes"), \ - Conv3DBackpropFilterOp<GPUDevice, T>); + .Device(DEVICE_GPU) \ + .TypeConstraint<T>("T") \ + .HostMemory("filter_sizes"), \ + Conv3DBackpropFilterOp<GPUDevice, T>); TF_CALL_half(REGISTER_GPU_KERNEL); TF_CALL_float(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL - + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_op_asinh.cc b/tensorflow/core/kernels/cwise_op_asinh.cc index 8d44208aa7..a7673afd0b 100644 --- a/tensorflow/core/kernels/cwise_op_asinh.cc +++ b/tensorflow/core/kernels/cwise_op_asinh.cc @@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Asinh", functor::asinh, float, double, #ifdef TENSORFLOW_USE_SYCL REGISTER2(UnaryOp, SYCL, "Asinh", functor::asinh, float, double); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Asinh", functor::asinh, float, double); diff --git a/tensorflow/core/kernels/cwise_op_atanh.cc b/tensorflow/core/kernels/cwise_op_atanh.cc index bbc69e45aa..7b688db4c5 100644 --- a/tensorflow/core/kernels/cwise_op_atanh.cc +++ b/tensorflow/core/kernels/cwise_op_atanh.cc @@ -22,7 +22,7 @@ REGISTER4(UnaryOp, CPU, "Atanh", functor::atanh, float, double, #ifdef TENSORFLOW_USE_SYCL REGISTER2(UnaryOp, SYCL, "Atanh", functor::atanh, float, double); -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL #if GOOGLE_CUDA REGISTER2(UnaryOp, GPU, "Atanh", functor::atanh, float, double); diff --git a/tensorflow/core/kernels/depthwise_conv_grad_op.cc b/tensorflow/core/kernels/depthwise_conv_grad_op.cc index 53d65a22d1..9347978d51 100644 --- a/tensorflow/core/kernels/depthwise_conv_grad_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_grad_op.cc @@ -231,7 +231,8 @@ static void CopyOutputBackpropRegion(const DepthwiseArgs& args, } // Pad to vector-register width (if needed). for (int64 d = 0; d < pad_size; ++d) { - buffer[buf_base + vectorized_size + scalar_size + d] = static_cast<T>(0); + buffer[buf_base + vectorized_size + scalar_size + d] = + static_cast<T>(0); } } } @@ -510,7 +511,8 @@ static void DepthwiseConvBackpropInputReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, Eigen::half>; +extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, + Eigen::half>; extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, float>; extern template struct LaunchDepthwiseConvBackpropInputOp<GPUDevice, double>; @@ -885,7 +887,8 @@ static void DepthwiseConvBackpropFilterReference(const DepthwiseArgs& args, #if GOOGLE_CUDA -extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, Eigen::half>; +extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, + Eigen::half>; extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, float>; extern template struct LaunchDepthwiseConvBackpropFilterOp<GPUDevice, double>; diff --git a/tensorflow/core/kernels/depthwise_conv_op.cc b/tensorflow/core/kernels/depthwise_conv_op.cc index 2759ecb2f1..30ecd0c2ba 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.cc +++ b/tensorflow/core/kernels/depthwise_conv_op.cc @@ -427,6 +427,11 @@ TF_CALL_double(REGISTER_CPU_KERNEL); #endif #if GOOGLE_CUDA +REGISTER_KERNEL_BUILDER(Name("DepthwiseConv2dNative") + .Device(DEVICE_GPU) + .TypeConstraint<Eigen::half>("T"), + DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>); + REGISTER_KERNEL_BUILDER( Name("DepthwiseConv2dNative").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), DepthwiseConv2dNativeOp<GPUDevice, Eigen::half>); diff --git a/tensorflow/core/kernels/depthwise_conv_op.h b/tensorflow/core/kernels/depthwise_conv_op.h index 11aed5b415..097a9f5bfa 100644 --- a/tensorflow/core/kernels/depthwise_conv_op.h +++ b/tensorflow/core/kernels/depthwise_conv_op.h @@ -158,7 +158,8 @@ struct DepthwiseFilterPadOp { } // Pad the remainder of output to vector-register boundary. for (int64 j = 0; j < pad_size; ++j) { - padded_filter[output_base + vectorized_size + scalar_size + j] = static_cast<T>(0); + padded_filter[output_base + vectorized_size + scalar_size + j] = + static_cast<T>(0); } } } diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index 157ce106ce..d8bdb700e6 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include <vector> +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -37,7 +38,6 @@ limitations under the License. #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/use_cudnn.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/maxpooling_op_gpu.h" @@ -359,7 +359,8 @@ class MaxPoolingGradOp<Eigen::GpuDevice, T> : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -888,7 +889,8 @@ class MaxPoolingWithArgmaxOp : public OpKernel { errors::Unimplemented( "Pooling is not yet supported on the batch dimension.")); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -1052,7 +1054,8 @@ class MaxPoolingNoMaskOp<GPUDevice, T> : public OpKernel { "Pooling is not yet supported on the batch dimension.")); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { @@ -1137,7 +1140,8 @@ class MaxPoolingNoMaskV2Op<GPUDevice, T> : public OpKernel { } OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); use_dnn_ = CanUseCudnn(); - ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, &propagate_nans_); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_MAXPOOL_NANPROP", false, + &propagate_nans_)); } void Compute(OpKernelContext* context) override { diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc index d96b844383..f8daaca4c9 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc +++ b/tensorflow/core/kernels/maxpooling_op_gpu.cu.cc @@ -405,17 +405,17 @@ bool MaxPoolForwardWithOptionalArgmax<T>::operator()( if (propagate_nans) { MaxPoolForwardNHWC<true> <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>> - (output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); } else { MaxPoolForwardNHWC<false> <<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, - kThreadsPerBlock, 0, d.stream()>>> - (output_size, bottom_data, height, width, channels, pooled_height, - pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, - top_data, mask); + kThreadsPerBlock, 0, d.stream()>>>( + output_size, bottom_data, height, width, channels, pooled_height, + pooled_width, kernel_h, kernel_w, stride_h, stride_w, pad_t, pad_l, + top_data, mask); } return d.ok(); } diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index 0a5be4fec9..c4d5a45d3c 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -101,8 +101,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. TensorShape output_shape = input_shape.GetTfShape(); Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, - output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output( + input_number, output_shape, &output_tensor)); CHECK_NOTNULL(output_tensor); // Do we need to reorder Mkl layout into TensorFlow layout? @@ -116,13 +116,13 @@ class MklToTfOp : public OpKernel { // If not, just forward input tensor to output tensor. CHECK(output_tensor->CopyFrom(input_tensor, output_shape)); } - } catch (mkldnn::error &e) { + } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + std::string(e.message) + - ", in file " + std::string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + std::string(e.message) + ", in file " + + std::string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } #else @@ -160,8 +160,8 @@ class MklToTfOp : public OpKernel { // Allocate output tensor. Tensor* output_tensor = NULL; - OP_REQUIRES_OK(context, context->allocate_output(input_number, - output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(input_number, output_shape, + &output_tensor)); dnnLayout_t output_layout = static_cast<dnnLayout_t>(input_shape.GetTfLayout()); diff --git a/tensorflow/core/kernels/ops_util.h b/tensorflow/core/kernels/ops_util.h index d3d1b56c9d..93ef512778 100644 --- a/tensorflow/core/kernels/ops_util.h +++ b/tensorflow/core/kernels/ops_util.h @@ -98,6 +98,19 @@ gtl::InlinedVector<T, 8> ComputeStride(const TensorShape& shape) { return strides; } +// Helper to compute 'strides' given an Eigen TensorDimensions +template <typename T, typename EigenDimensions> +gtl::InlinedVector<T, 8> ComputeEigenStrides(const EigenDimensions& shape) { + const int ndims = shape.rank(); + gtl::InlinedVector<T, 8> strides(ndims); + T stride = 1; + for (int i = ndims - 1; i >= 0; --i) { + strides[i] = stride; + stride *= static_cast<T>(shape[i]); + } + return strides; +} + } // namespace tensorflow #endif // TENSORFLOW_KERNELS_OPS_UTIL_H_ diff --git a/tensorflow/core/platform/posix/error.cc b/tensorflow/core/platform/posix/error.cc index f8b0285c50..cda6d7d8f9 100644 --- a/tensorflow/core/platform/posix/error.cc +++ b/tensorflow/core/platform/posix/error.cc @@ -131,8 +131,8 @@ error::Code ErrnoToCode(int err_number) { case ENETUNREACH: // Network unreachable case ENOLCK: // No locks available case ENOLINK: // Link has been severed -#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) \ - || defined(__HAIKU__)) +#if !(defined(__APPLE__) || defined(__FreeBSD__) || defined(_WIN32) || \ + defined(__HAIKU__)) case ENONET: // Machine is not on the network #endif code = error::UNAVAILABLE; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 09f69a95c1..614ee00b01 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,8 +37,8 @@ limitations under the License. #ifdef TF_USE_SNAPPY #include "snappy.h" #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ - || defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ + defined(__HAIKU__) #include <thread> #endif @@ -62,8 +62,8 @@ int NumSchedulableCPUs() { } perror("sched_getaffinity"); #endif -#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) \ - || defined(__HAIKU__) +#if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \ + defined(__HAIKU__) unsigned int count = std::thread::hardware_concurrency(); if (count > 0) return static_cast<int>(count); #endif diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 8fa0dfbed9..cf11f419a4 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -752,6 +752,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleDown(unsigned mask, T value, return __shfl_down_sync(mask, value, delta, width); } +__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleDown( + unsigned mask, Eigen::half value, int delta, int width = warpSize) { + return Eigen::half( + __shfl_down_sync(mask, static_cast<uint16>(value), delta, width)); +} + // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. @@ -774,6 +780,12 @@ __device__ EIGEN_ALWAYS_INLINE T CudaShuffleXor(unsigned mask, T value, return __shfl_xor_sync(mask, value, laneMask, width); } +__device__ EIGEN_ALWAYS_INLINE Eigen::half CudaShuffleXor( + unsigned mask, Eigen::half value, int laneMask, int width = warpSize) { + return Eigen::half( + __shfl_xor_sync(mask, static_cast<uint16>(value), laneMask, width)); +} + // Variant of the (undocumented) version from the CUDA SDK, but using unsigned // instead of float for lo and hi (which is incorrect with ftz, for example). // A bug has been filed with NVIDIA and will be fixed in the next CUDA release. diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 118ff0d0d6..148c7851bd 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -24,25 +24,25 @@ limitations under the License. #include "mkl_dnn_types.h" #include "mkl_service.h" #include "mkl_trans.h" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" -#include "tensorflow/core/graph/mkl_graph_util.h" #ifdef INTEL_MKL_DNN #include "mkldnn.hpp" +using mkldnn::engine; using mkldnn::memory; -using mkldnn::reorder; -using mkldnn::primitive; using mkldnn::padding_kind; -using mkldnn::engine; +using mkldnn::primitive; +using mkldnn::reorder; #endif // The file contains a number of utility classes and functions used by MKL @@ -56,8 +56,14 @@ namespace tensorflow { // Tensorflow tensor. typedef enum { W = 0, H = 1, C = 2, N = 3 } MklDims; -typedef enum { Dim_N = 0, Dim_C = 1, Dim_H = 2, Dim_W = 3, - Dim_O = 0, Dim_I = 1 } MklDnnDims; +typedef enum { + Dim_N = 0, + Dim_C = 1, + Dim_H = 2, + Dim_W = 3, + Dim_O = 0, + Dim_I = 1 +} MklDnnDims; class MklShape { public: @@ -236,8 +242,7 @@ class MklShape { (IS_MKL_TENSOR_OFFSET + sizeof(size_t)) // Location of dimension_ // Location of sizes. Note dim is not used here, left here // to make macros consistent. -#define SIZES_OFFSET(dims) \ - (DIMS_OFFSET + sizeof(size_t)) +#define SIZES_OFFSET(dims) (DIMS_OFFSET + sizeof(size_t)) #define STRIDES_OFFSET(dims) \ (SIZES_OFFSET(dims) + dims * sizeof(size_t)) // Location of strides #define MKL_LAYOUT_OFFSET(dims) \ @@ -332,7 +337,7 @@ class MklDnnShape { /// Number of dimensions in Tensorflow format size_t dimension_ = 0; /// Required by MKLDNN for conversions - mkldnn_dims_t sizes_; // Required by MKL for conversions + mkldnn_dims_t sizes_; // Required by MKL for conversions memory::format tf_data_format_ = memory::format::format_undef; memory::data_type T_ = memory::data_type::data_undef; // MKL layout @@ -345,15 +350,13 @@ class MklDnnShape { typedef std::remove_extent<mkldnn_dims_t>::type mkldnn_dim_t; #define INVALID_DIM_SIZE -1 - public: MklDnnShape() { - for (size_t i = 0; i < sizeof(data_.sizes_) / - sizeof(data_.sizes_[0]); ++i) { + for (size_t i = 0; i < sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); + ++i) { data_.sizes_[i] = -1; } - for (size_t i = 0; i < sizeof(data_.map_) / - sizeof(data_.map_[0]); ++i) { + for (size_t i = 0; i < sizeof(data_.map_) / sizeof(data_.map_[0]); ++i) { data_.map_[i] = -1; } } @@ -369,26 +372,26 @@ class MklDnnShape { inline void SetDimensions(const size_t dimension) { data_.dimension_ = dimension; } - inline size_t GetDimension(char dimension)const { + inline size_t GetDimension(char dimension) const { int index = GetMklDnnTensorDimIndex(dimension); CHECK(index >= 0 && index < this->GetDimension()) << "Invalid index from the dimension: " << index << ", " << dimension; return this->DimSize(index); } - inline int32 GetMklDnnTensorDimIndex(char dimension)const { + inline int32 GetMklDnnTensorDimIndex(char dimension) const { switch (dimension) { - case 'N': - return MklDnnDims::Dim_N; - case 'C': - return MklDnnDims::Dim_C; - case 'H': - return MklDnnDims::Dim_H; - case 'W': - return MklDnnDims::Dim_W; - default: - LOG(FATAL) << "Invalid dimension: " << dimension; - return -1; // Avoid compiler warning about missing return value + case 'N': + return MklDnnDims::Dim_N; + case 'C': + return MklDnnDims::Dim_C; + case 'H': + return MklDnnDims::Dim_H; + case 'W': + return MklDnnDims::Dim_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value } } @@ -403,9 +406,9 @@ class MklDnnShape { memory::dims retVal; if (data_.is_mkl_tensor_) { int dimensions = sizeof(data_.sizes_) / sizeof(data_.sizes_[0]); - for (size_t i = 0 ; i < dimensions; i++) { + for (size_t i = 0; i < dimensions; i++) { if (data_.sizes_[i] != INVALID_DIM_SIZE) - retVal.push_back(data_.sizes_[i]); + retVal.push_back(data_.sizes_[i]); } } else { CHECK_EQ(data_.is_mkl_tensor_, true); @@ -414,7 +417,7 @@ class MklDnnShape { } inline int64 DimSize(int index) const { - CHECK_LT(index, sizeof(data_.sizes_)/sizeof(data_.sizes_[0])); + CHECK_LT(index, sizeof(data_.sizes_) / sizeof(data_.sizes_[0])); return data_.sizes_[index]; } @@ -451,7 +454,7 @@ class MklDnnShape { /// We don't create primitive_descriptor for TensorFlow layout now. /// We use lazy evaluation and create it only when needed. inline void SetTfLayout(size_t dims, const memory::dims& sizes, - memory::format format) { + memory::format format) { CHECK_EQ(dims, sizes.size()); data_.dimension_ = dims; for (size_t ii = 0; ii < dims; ii++) { @@ -497,9 +500,7 @@ class MklDnnShape { SetTfDimOrder(dimension, data_format); } - inline const mkldnn_dim_t* GetTfToMklDimMap() const { - return &data_.map_[0]; - } + inline const mkldnn_dim_t* GetTfToMklDimMap() const { return &data_.map_[0]; } inline size_t TfDimIdx(int index) const { return data_.map_[index]; } inline int64 TfDimSize(int index) const { return data_.sizes_[TfDimIdx(index)]; @@ -553,9 +554,7 @@ class MklDnnShape { /// Size of buffer to hold the serialized object, the size is computed by /// following above mentioned order - inline size_t GetSerializeBufferSize() const { - return sizeof(MklShapeData); - } + inline size_t GetSerializeBufferSize() const { return sizeof(MklShapeData); } void SerializeMklDnnShape(unsigned char* buf, size_t buf_size) const { CHECK(buf_size >= GetSerializeBufferSize()) @@ -566,12 +565,12 @@ class MklDnnShape { void DeSerializeMklDnnShape(const unsigned char* buf, size_t buf_size) { // Make sure buffer holds at least is_mkl_tensor_. CHECK(buf_size >= sizeof(data_.is_mkl_tensor_)) - << "Buffer size is too small in DeSerializeMklDnnShape"; + << "Buffer size is too small in DeSerializeMklDnnShape"; const bool is_mkl_tensor = *reinterpret_cast<const bool*>(buf); if (is_mkl_tensor) { // If it is an MKL Tensor then read the rest CHECK(buf_size >= GetSerializeBufferSize()) - << "Buffer size is too small in DeSerializeMklDnnShape"; + << "Buffer size is too small in DeSerializeMklDnnShape"; data_ = *reinterpret_cast<const MklShapeData*>(buf); } } @@ -660,8 +659,7 @@ inline void GetMklShape(OpKernelContext* ctext, int n, MklShape* mklshape) { } #ifdef INTEL_MKL_DNN -inline void GetMklShape(OpKernelContext* ctext, int n, - MklDnnShape* mklshape) { +inline void GetMklShape(OpKernelContext* ctext, int n, MklDnnShape* mklshape) { mklshape->DeSerializeMklDnnShape( ctext->input(GetTensorMetaDataIndex(n, ctext->num_inputs())) .flat<uint8>() @@ -700,8 +698,7 @@ inline void GetMklShapeList(OpKernelContext* ctext, StringPiece name, /// Get shape of input tensor pointed by 'input_idx' in TensorShape format. /// If the input tensor is in MKL layout, then obtains TensorShape from /// MklShape. -inline TensorShape GetTfShape(OpKernelContext* context, - size_t input_idx) { +inline TensorShape GetTfShape(OpKernelContext* context, size_t input_idx) { // Sanity check. CHECK_NOTNULL(context); CHECK_LT(input_idx, context->num_inputs()); @@ -821,7 +818,7 @@ inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, template <typename T> inline void AllocTmpBuffer(OpKernelContext* context, Tensor* tensor_out, - TensorShape tf_shape) { + TensorShape tf_shape) { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum<T>::v(), tf_shape, tensor_out)); } @@ -1099,7 +1096,8 @@ inline void MklNCHWToNHWC(const Tensor& input, Tensor** output) { /// /// @input None /// @return memory::data_type corresponding to type T -template<typename T> static memory::data_type MklDnnType(); +template <typename T> +static memory::data_type MklDnnType(); /// Instantiation for float type. Add similar instantiations for other /// type if needed. @@ -1114,10 +1112,11 @@ memory::data_type MklDnnType<float>() { /// @return: memory::format corresponding to TensorFlow data format; /// Fails with an error if invalid data format. inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { - if (format == FORMAT_NHWC) return memory::format::nhwc; - else if (format == FORMAT_NCHW) return memory::format::nchw; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, - "Unsupported data format")); + if (format == FORMAT_NHWC) + return memory::format::nhwc; + else if (format == FORMAT_NCHW) + return memory::format::nchw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); // Return to get rid of compiler warning return memory::format::format_undef; } @@ -1128,10 +1127,11 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return: Tensorflow data format corresponding to memory::format /// Fails with an error if invalid data format. inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { - if (format == memory::format::nhwc) return FORMAT_NHWC; - else if (format == memory::format::nchw) return FORMAT_NCHW; - TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, - "Unsupported data format")); + if (format == memory::format::nhwc) + return FORMAT_NHWC; + else if (format == memory::format::nchw) + return FORMAT_NCHW; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); } /// Map TensorShape object into memory::dims required by MKL-DNN @@ -1161,7 +1161,7 @@ inline memory::dims TFShapeToMklDnnDims(const TensorShape& shape) { /// @input TensorShape object in shape /// @return memory::dims in MKL-DNN required NCHW format inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, - TensorFormat format) { + TensorFormat format) { // Check validity of format. CHECK_NE(TFDataFormatToMklDnnDataFormat(format), memory::format::format_undef); @@ -1237,21 +1237,23 @@ class MklDnnData { const engine* cpu_engine_; public: - explicit MklDnnData(const engine* e) : user_memory_(nullptr), - reorder_memory_(nullptr), - op_md_(nullptr), cpu_engine_(e) {} + explicit MklDnnData(const engine* e) + : user_memory_(nullptr), + reorder_memory_(nullptr), + op_md_(nullptr), + cpu_engine_(e) {} ~MklDnnData() { cpu_engine_ = nullptr; // We don't own this. - delete(user_memory_); - delete(reorder_memory_); - delete(op_md_); + delete (user_memory_); + delete (reorder_memory_); + delete (op_md_); } inline void* GetTensorBuffer(const Tensor* tensor) const { CHECK_NOTNULL(tensor); - return const_cast<void*>(static_cast<const void*>( - tensor->flat<T>().data())); + return const_cast<void*>( + static_cast<const void*>(tensor->flat<T>().data())); } /// Set user memory primitive using specified dimensions, memory format and @@ -1283,7 +1285,7 @@ class MklDnnData { /// @return: memory::desc object corresponding to blocked memory format /// for given dimensions and strides. static inline memory::desc CreateBlockedMemDesc(const memory::dims& dim, - const memory::dims& strides) { + const memory::dims& strides) { CHECK_EQ(dim.size(), strides.size()); // We have to construct memory descriptor in a C style. This is not at all @@ -1352,7 +1354,7 @@ class MklDnnData { CHECK_NOTNULL(cpu_engine_); // TODO(nhasabni): can we remove dynamic memory allocation? if (data_buffer) { - user_memory_ = new memory(pd, data_buffer); + user_memory_ = new memory(pd, data_buffer); } else { user_memory_ = new memory(pd); } diff --git a/tensorflow/core/util/mkl_util_test.cc b/tensorflow/core/util/mkl_util_test.cc index 6aef3d86e9..8b73eadb40 100644 --- a/tensorflow/core/util/mkl_util_test.cc +++ b/tensorflow/core/util/mkl_util_test.cc @@ -54,7 +54,6 @@ TEST(MklUtilTest, MklDnnTfShape) { EXPECT_NE(b_tf_shape_nchw, b_mkldnn_tf_shape); } - TEST(MklUtilTest, MklDnnBlockedFormatTest) { // Let's create 2D tensor of shape {3, 4} with 3 being innermost dimension // first (case 1) and then it being outermost dimension (case 2). diff --git a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java index 92cc3bd60e..313c09e1e4 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/ShapeTest.java @@ -84,11 +84,10 @@ public class ShapeTest { assertEquals(Shape.scalar(), Shape.scalar()); assertEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 3)); - assertNotEquals(Shape.make(1,2), null); - assertNotEquals(Shape.make(1,2), new Object()); + assertNotEquals(Shape.make(1, 2), null); + assertNotEquals(Shape.make(1, 2), new Object()); assertNotEquals(Shape.make(1, 2, 3), Shape.make(1, 2, 4)); - assertNotEquals(Shape.unknown(), Shape.unknown()); assertNotEquals(Shape.make(-1), Shape.make(-1)); assertNotEquals(Shape.make(1, -1, 3), Shape.make(1, -1, 3)); @@ -103,4 +102,3 @@ public class ShapeTest { assertNotEquals(Shape.make(1, 2).hashCode(), Shape.make(1, 3).hashCode()); } } - diff --git a/tensorflow/python/estimator/inputs/numpy_io.py b/tensorflow/python/estimator/inputs/numpy_io.py index 3512f66284..750af20e8a 100644 --- a/tensorflow/python/estimator/inputs/numpy_io.py +++ b/tensorflow/python/estimator/inputs/numpy_io.py @@ -117,11 +117,11 @@ def numpy_input_fn(x, raise ValueError('y cannot be empty dict, use None instead.') ordered_dict_y = collections.OrderedDict( - sorted(y.items(), key=lambda t: t[0])) + sorted(y.items(), key=lambda t: t[0])) target_keys = list(ordered_dict_y.keys()) duplicate_keys = set(feature_keys).intersection(set(target_keys)) - if len(duplicate_keys): + if duplicate_keys: raise ValueError('{} duplicate keys are found in both x and y: ' '{}'.format(len(duplicate_keys), duplicate_keys)) @@ -131,16 +131,14 @@ def numpy_input_fn(x, ordered_dict_data[target_keys] = y if len(set(v.shape[0] for v in ordered_dict_data.values())) != 1: - shape_dict_of_x = {k: ordered_dict_data[k].shape - for k in feature_keys} + shape_dict_of_x = {k: ordered_dict_data[k].shape for k in feature_keys} if target_keys is None: shape_of_y = None elif isinstance(target_keys, string_types): shape_of_y = y.shape else: - shape_of_y = {k: ordered_dict_data[k].shape - for k in target_keys} + shape_of_y = {k: ordered_dict_data[k].shape for k in target_keys} raise ValueError('Length of tensors in x and y is mismatched. All ' 'elements in x and y must have the same length.\n' @@ -155,11 +153,12 @@ def numpy_input_fn(x, enqueue_size=batch_size, num_epochs=num_epochs) - batch = (queue.dequeue_many(batch_size) if num_epochs is None - else queue.dequeue_up_to(batch_size)) + batch = ( + queue.dequeue_many(batch_size) + if num_epochs is None else queue.dequeue_up_to(batch_size)) # Remove the first `Tensor` in `batch`, which is the row number. - if len(batch) > 0: + if batch: batch.pop(0) features = dict(zip(feature_keys, batch[:len(feature_keys)])) diff --git a/tensorflow/python/estimator/inputs/numpy_io_test.py b/tensorflow/python/estimator/inputs/numpy_io_test.py index 65eae7a7dc..1374e3f7e1 100644 --- a/tensorflow/python/estimator/inputs/numpy_io_test.py +++ b/tensorflow/python/estimator/inputs/numpy_io_test.py @@ -255,7 +255,7 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) + x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor = input_fn() coord = coordinator.Coordinator() @@ -327,7 +327,7 @@ class NumpyIoTest(test.TestCase): with self.test_session() as session: input_fn = numpy_io.numpy_input_fn( - x, y, batch_size=2, shuffle=False, num_epochs=1) + x, y, batch_size=2, shuffle=False, num_epochs=1) features_tensor, targets_tensor = input_fn() coord = coordinator.Coordinator() @@ -362,13 +362,10 @@ class NumpyIoTest(test.TestCase): a = np.arange(4) * 1.0 b = np.arange(32, 36) x = {'a': a, 'b': b} - y = {'y1': np.arange(-32, -28), - 'a': a, - 'y2': np.arange(32, 28, -1), - 'b': b} + y = {'y1': np.arange(-32, -28), 'a': a, 'y2': np.arange(32, 28, -1), 'b': b} with self.test_session(): with self.assertRaisesRegexp( - ValueError, '2 duplicate keys are found in both x and y'): + ValueError, '2 duplicate keys are found in both x and y'): failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False) failing_input_fn() diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 1610214d54..4c026590c2 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -987,10 +987,9 @@ class TensorFlowTestCase(googletest.TestCase): msg: An optional string message to append to the failure message. """ # f1 == f2 is needed here as we might have: f1, f2 = inf, inf - self.assertTrue( - f1 == f2 or math.fabs(f1 - f2) <= err, - "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg - if msg is not None else "")) + self.assertTrue(f1 == f2 or math.fabs(f1 - f2) <= err, + "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg + if msg is not None else "")) def assertArrayNear(self, farray1, farray2, err): """Asserts that two float arrays are near each other. diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 76b80e60ea..1bf2b70c1b 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -114,21 +114,21 @@ class BooleanMaskTest(test_util.TensorFlowTestCase): arr = np.random.rand(*arr_shape) mask = make_mask(arr_shape[:ndims_mask]) if axis is not None: - mask = make_mask(arr_shape[axis:ndims_mask+axis]) + mask = make_mask(arr_shape[axis:ndims_mask + axis]) if axis is None or axis == 0: masked_arr = arr[mask] elif axis == 1: - masked_arr = arr[:,mask] + masked_arr = arr[:, mask] elif axis == 2: - masked_arr = arr[:,:,mask] - with self.test_session() as sess: + masked_arr = arr[:, :, mask] + with self.test_session(): masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis) # Leading dimension size of masked_tensor is always unknown until runtime # since we don't how many elements will be kept. leading = 1 if axis is None else axis + 1 self.assertAllEqual(masked_tensor.get_shape()[leading:], - masked_arr.shape[leading:]) + masked_arr.shape[leading:]) self.assertAllClose(masked_arr, masked_tensor.eval()) @@ -1078,6 +1078,7 @@ class PadTest(test_util.TensorFlowTestCase): [0, 0, 4, 5, 6, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) + class InvertPermutationTest(test_util.TensorFlowTestCase): def testInvertPermutation(self): diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 79285476b4..2767df127e 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -25,6 +25,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest + class BincountTest(test_util.TensorFlowTestCase): def test_empty(self): @@ -72,8 +73,7 @@ class BincountTest(test_util.TensorFlowTestCase): else: weights = np.random.random(num_samples) self.assertAllClose( - math_ops.bincount(arr, weights).eval(), - np.bincount(arr, weights)) + math_ops.bincount(arr, weights).eval(), np.bincount(arr, weights)) def test_random_without_weights(self): num_samples = 10000 @@ -83,8 +83,7 @@ class BincountTest(test_util.TensorFlowTestCase): arr = np.random.randint(0, 1000, num_samples) weights = np.ones(num_samples).astype(dtype) self.assertAllClose( - math_ops.bincount(arr, None).eval(), - np.bincount(arr, weights)) + math_ops.bincount(arr, None).eval(), np.bincount(arr, weights)) def test_zero_weights(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 6cbdd4cbb3..68817cc256 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -439,11 +439,10 @@ class ZerosLikeTest(test.TestCase): def testZerosLikeCPU(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, - dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, - dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, - dtypes_lib.complex64, dtypes_lib.complex128, - dtypes_lib.string + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, + dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, + dtypes_lib.complex128, dtypes_lib.string ]: self._compareZeros(dtype, fully_defined_shape=False, use_gpu=False) self._compareZeros(dtype, fully_defined_shape=True, use_gpu=False) @@ -574,10 +573,10 @@ class OnesLikeTest(test.TestCase): def testOnesLike(self): for dtype in [ - dtypes_lib.float32, dtypes_lib.float64, - dtypes_lib.int8, dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, - dtypes_lib.int32, dtypes_lib.int64, dtypes_lib.bool, - dtypes_lib.complex64, dtypes_lib.complex128 + dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.int8, + dtypes_lib.uint8, dtypes_lib.int16, dtypes_lib.uint16, dtypes_lib.int32, + dtypes_lib.int64, dtypes_lib.bool, dtypes_lib.complex64, + dtypes_lib.complex128 ]: numpy_dtype = dtype.as_numpy_dtype with self.test_session(): diff --git a/tensorflow/python/kernel_tests/conv1d_test.py b/tensorflow/python/kernel_tests/conv1d_test.py index a7e23ead1c..d92797a7d3 100644 --- a/tensorflow/python/kernel_tests/conv1d_test.py +++ b/tensorflow/python/kernel_tests/conv1d_test.py @@ -52,7 +52,6 @@ class Conv1DTest(test.TestCase): self.assertEqual(len(output), 2) self.assertAllClose(output, [2 * 1 + 1 * 2, 2 * 3 + 1 * 4]) - def testConv1DTranspose(self): with self.test_session(): stride = 2 @@ -93,5 +92,6 @@ class Conv1DTest(test.TestCase): self.assertAllClose(cache_values, value) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/conv_ops_3d_test.py b/tensorflow/python/kernel_tests/conv_ops_3d_test.py index 116681fc4c..ec8ac74163 100644 --- a/tensorflow/python/kernel_tests/conv_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_3d_test.py @@ -68,8 +68,8 @@ class Conv3DTest(test.TestCase): total_size_2 *= s # Initializes the input tensor with array containing numbers from 0 to 1. - # We keep the input tensor values fairly small to avoid overflowing a float16 - # tensor during the conv3d + # We keep the input tensor values fairly small to avoid overflowing float16 + # during the conv3d. x1 = [f * 1.0 / total_size_1 for f in range(1, total_size_1 + 1)] x2 = [f * 1.0 / total_size_2 for f in range(1, total_size_2 + 1)] with self.test_session(use_gpu=use_gpu): @@ -115,15 +115,13 @@ class Conv3DTest(test.TestCase): if value.dtype == np.float16: tol = 1e-3 - self.assertAllClose(expected, value.flatten(), atol=tol, - rtol=tol) + self.assertAllClose(expected, value.flatten(), atol=tol, rtol=tol) def testConv3D1x1x1Filter(self): expected_output = [ - 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5 , - 0.59259259, 0.62962963, 0.77777778, 0.92592593, 0.85185185, - 1.05555556, 1.25925926, 1.07407407, 1.33333333, 1.59259259, - 1.2962963 , 1.61111111, 1.92592593 + 0.18518519, 0.22222222, 0.25925926, 0.40740741, 0.5, 0.59259259, + 0.62962963, 0.77777778, 0.92592593, 0.85185185, 1.05555556, 1.25925926, + 1.07407407, 1.33333333, 1.59259259, 1.2962963, 1.61111111, 1.92592593 ] # These are equivalent to the Conv2D1x1 case. @@ -149,10 +147,10 @@ class Conv3DTest(test.TestCase): # Expected values computed using scipy's correlate function. def testConv3D2x2x2Filter(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 4.2650463 , 4.35763889, - 4.45023148, 6.73032407, 6.89236111, 7.05439815, 7.22337963, - 7.39930556, 7.57523148, 9.68865741, 9.93402778, 10.17939815, - 10.18171296, 10.44097222, 10.70023148 + 3.77199074, 3.85069444, 3.92939815, 4.2650463, 4.35763889, 4.45023148, + 6.73032407, 6.89236111, 7.05439815, 7.22337963, 7.39930556, 7.57523148, + 9.68865741, 9.93402778, 10.17939815, 10.18171296, 10.44097222, + 10.70023148 ] # expected_shape = [1, 3, 1, 2, 5] self._VerifyValues( @@ -164,19 +162,17 @@ class Conv3DTest(test.TestCase): def testConv3DStrides(self): expected_output = [ - 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, - 0.13988095, 0.08452381, 0.26071429, 0.35238095, 0.36488095, - 0.37738095, 0.38988095, 0.40238095, 0.23452381, 0.46071429, - 0.61488095, 0.62738095, 0.63988095, 0.65238095, 0.66488095, - 0.38452381, 1.12738095, 1.48988095, 1.50238095, 1.51488095, - 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, - 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, - 1.52738095, 2.01488095, 2.02738095, 2.03988095, 2.05238095, - 2.06488095, 1.18452381, 2.19404762, 2.88988095, 2.90238095, - 2.91488095, 2.92738095, 2.93988095, 1.68452381, 2.39404762, - 3.15238095, 3.16488095, 3.17738095, 3.18988095, 3.20238095, - 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, - 3.45238095, 3.46488095, 1.98452381 + 0.06071429, 0.08988095, 0.10238095, 0.11488095, 0.12738095, 0.13988095, + 0.08452381, 0.26071429, 0.35238095, 0.36488095, 0.37738095, 0.38988095, + 0.40238095, 0.23452381, 0.46071429, 0.61488095, 0.62738095, 0.63988095, + 0.65238095, 0.66488095, 0.38452381, 1.12738095, 1.48988095, 1.50238095, + 1.51488095, 1.52738095, 1.53988095, 0.88452381, 1.32738095, 1.75238095, + 1.76488095, 1.77738095, 1.78988095, 1.80238095, 1.03452381, 1.52738095, + 2.01488095, 2.02738095, 2.03988095, 2.05238095, 2.06488095, 1.18452381, + 2.19404762, 2.88988095, 2.90238095, 2.91488095, 2.92738095, 2.93988095, + 1.68452381, 2.39404762, 3.15238095, 3.16488095, 3.17738095, 3.18988095, + 3.20238095, 1.83452381, 2.59404762, 3.41488095, 3.42738095, 3.43988095, + 3.45238095, 3.46488095, 1.98452381 ] self._VerifyValues( tensor_in_sizes=[1, 5, 8, 7, 1], @@ -187,8 +183,7 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, - 10.17939815 + 3.77199074, 3.85069444, 3.92939815, 9.68865741, 9.93402778, 10.17939815 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -199,14 +194,12 @@ class Conv3DTest(test.TestCase): def testConv3DStride3(self): expected_output = [ - 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, - 1.68998016, 1.6155754 , 1.68179563, 1.74801587, 1.9280754 , - 2.01215278, 2.09623016, 1.98015873, 2.0672123 , 2.15426587, - 2.03224206, 2.12227183, 2.21230159, 4.4280754 , 4.65500992, - 4.88194444, 4.48015873, 4.71006944, 4.93998016, 4.53224206, - 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, - 4.8968254 , 5.15054563, 5.40426587, 4.94890873, 5.20560516, - 5.46230159 + 1.51140873, 1.57167659, 1.63194444, 1.56349206, 1.62673611, 1.68998016, + 1.6155754, 1.68179563, 1.74801587, 1.9280754, 2.01215278, 2.09623016, + 1.98015873, 2.0672123, 2.15426587, 2.03224206, 2.12227183, 2.21230159, + 4.4280754, 4.65500992, 4.88194444, 4.48015873, 4.71006944, 4.93998016, + 4.53224206, 4.76512897, 4.99801587, 4.84474206, 5.09548611, 5.34623016, + 4.8968254, 5.15054563, 5.40426587, 4.94890873, 5.20560516, 5.46230159 ] self._VerifyValues( tensor_in_sizes=[1, 6, 7, 8, 2], @@ -217,9 +210,8 @@ class Conv3DTest(test.TestCase): def testConv3D2x2x2FilterStride2Same(self): expected_output = [ - 3.77199074, 3.85069444, 3.92939815, 2.0162037 , 2.06597222, - 2.11574074, 9.68865741, 9.93402778, 10.17939815, 4.59953704, - 4.73263889, 4.86574074 + 3.77199074, 3.85069444, 3.92939815, 2.0162037, 2.06597222, 2.11574074, + 9.68865741, 9.93402778, 10.17939815, 4.59953704, 4.73263889, 4.86574074 ] self._VerifyValues( tensor_in_sizes=[1, 4, 2, 3, 3], @@ -230,8 +222,8 @@ class Conv3DTest(test.TestCase): def testKernelSmallerThanStride(self): expected_output = [ - 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037 , - 0.77777778, 0.92592593, 1. + 0.03703704, 0.11111111, 0.25925926, 0.33333333, 0.7037037, 0.77777778, + 0.92592593, 1. ] self._VerifyValues( tensor_in_sizes=[1, 3, 3, 3, 1], @@ -247,12 +239,11 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, - 0.40306122, 0.41873178, 0.4340379 , 0.19642857, 2.46938776, - 2.50874636, 1.1377551 , 2.74489796, 2.78425656, 1.26020408, - 1.16873178, 1.1840379 , 0.51785714, 1.09511662, 1.10604956, - 0.44642857, 1.17164723, 1.18258017, 0.47704082, 0.3691691 , - 0.37244898, 0.125 + 0.54081633, 0.58017493, 0.28061224, 0.81632653, 0.85568513, 0.40306122, + 0.41873178, 0.4340379, 0.19642857, 2.46938776, 2.50874636, 1.1377551, + 2.74489796, 2.78425656, 1.26020408, 1.16873178, 1.1840379, 0.51785714, + 1.09511662, 1.10604956, 0.44642857, 1.17164723, 1.18258017, 0.47704082, + 0.3691691, 0.37244898, 0.125 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -262,8 +253,8 @@ class Conv3DTest(test.TestCase): expected=expected_output) expected_output = [ - 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, - 2.744898, 2.784257 + 0.540816, 0.580175, 0.816327, 0.855685, 2.469388, 2.508746, 2.744898, + 2.784257 ] self._VerifyValues( tensor_in_sizes=[1, 7, 7, 7, 1], @@ -278,7 +269,7 @@ class Conv3DTest(test.TestCase): filter_in_sizes=[2, 1, 2, 1, 2], stride=1, padding="VALID", - expected=[1.5625, 1.875]) + expected=[1.5625, 1.875]) def _ConstructAndTestGradientForConfig( self, batch, input_shape, filter_shape, in_depth, out_depth, stride, @@ -318,7 +309,6 @@ class Conv3DTest(test.TestCase): input_data = [x * 1.0 / input_size for x in range(0, input_size)] filter_data = [x * 1.0 / filter_size for x in range(0, filter_size)] - for data_type in self._DtypesToTest(use_gpu=use_gpu): # TODO(mjanusz): Modify gradient_checker to also provide max relative # error and synchronize the tolerance levels between the tests for forward @@ -330,12 +320,11 @@ class Conv3DTest(test.TestCase): elif data_type == dtypes.float16: tolerance = 1e-3 - with self.test_session(use_gpu=use_gpu): orig_input_tensor = constant_op.constant( - input_data, shape=input_shape, dtype=data_type, name="input") + input_data, shape=input_shape, dtype=data_type, name="input") filter_tensor = constant_op.constant( - filter_data, shape=filter_shape, dtype=data_type, name="filter") + filter_data, shape=filter_shape, dtype=data_type, name="filter") if data_format == "NCDHW": input_tensor = test_util.NHWCToNCHW(orig_input_tensor) @@ -345,25 +334,23 @@ class Conv3DTest(test.TestCase): new_strides = strides conv = nn_ops.conv3d( - input_tensor, filter_tensor, new_strides, padding, - data_format=data_format, name="conv") + input_tensor, + filter_tensor, + new_strides, + padding, + data_format=data_format, + name="conv") if data_format == "NCDHW": conv = test_util.NCHWToNHWC(conv) - if test_input: - jacob_t, jacob_n = gradient_checker.compute_gradient(orig_input_tensor, - input_shape, - conv, - output_shape) + jacob_t, jacob_n = gradient_checker.compute_gradient( + orig_input_tensor, input_shape, conv, output_shape) else: - jacob_t, jacob_n = gradient_checker.compute_gradient(filter_tensor, - filter_shape, - conv, - output_shape) - - + jacob_t, jacob_n = gradient_checker.compute_gradient( + filter_tensor, filter_shape, conv, output_shape) + if data_type != dtypes.float16: reference_jacob_t = jacob_t err = np.fabs(jacob_t - jacob_n).max() @@ -375,7 +362,6 @@ class Conv3DTest(test.TestCase): print("conv3d gradient error = ", err) self.assertLess(err, tolerance) - def ConstructAndTestGradient(self, **kwargs): for data_format, use_gpu in GetTestConfigs(): self._ConstructAndTestGradientForConfig(data_format=data_format, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 150e2ff7f2..6be8997cab 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -18,8 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np import os +import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -1442,7 +1442,6 @@ class PoolingTest(test.TestCase): use_gpu=True, v2=v2) - # Propagate the diff in cases of NaNs os.environ["TF_ENABLE_MAXPOOL_NANPROP"] = "1" expected_input_backprop_cudnn = expected_input_backprop_tf_cpu diff --git a/tensorflow/python/kernel_tests/reader_ops_test.py b/tensorflow/python/kernel_tests/reader_ops_test.py index 8e54d10f32..223a4b2c87 100644 --- a/tensorflow/python/kernel_tests/reader_ops_test.py +++ b/tensorflow/python/kernel_tests/reader_ops_test.py @@ -1018,15 +1018,15 @@ class LMDBReaderTest(test.TestCase): with self.test_session() as sess: reader1 = io_ops.LMDBReader(name="test_read_from_same_file1") reader2 = io_ops.LMDBReader(name="test_read_from_same_file2") - filename_queue = input_lib.string_input_producer([self.db_path], - num_epochs=None) + filename_queue = input_lib.string_input_producer( + [self.db_path], num_epochs=None) key1, value1 = reader1.read(filename_queue) key2, value2 = reader2.read(filename_queue) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - for i in range(3): - for j in range(10): + for _ in range(3): + for _ in range(10): k1, v1, k2, v2 = sess.run([key1, value1, key2, value2]) self.assertAllEqual(compat.as_bytes(k1), compat.as_bytes(k2)) self.assertAllEqual(compat.as_bytes(v1), compat.as_bytes(v2)) @@ -1054,14 +1054,14 @@ class LMDBReaderTest(test.TestCase): def testReadFromFileRepeatedly(self): with self.test_session() as sess: reader = io_ops.LMDBReader(name="test_read_from_file_repeated") - filename_queue = input_lib.string_input_producer([self.db_path], - num_epochs=None) + filename_queue = input_lib.string_input_producer( + [self.db_path], num_epochs=None) key, value = reader.read(filename_queue) coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) # Iterate over the lmdb 3 times. - for i in range(3): + for _ in range(3): # Go over all 10 records each time. for j in range(10): k, v = sess.run([key, value]) @@ -1071,5 +1071,6 @@ class LMDBReaderTest(test.TestCase): coord.request_stop() coord.join(threads) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 3a02f24902..99f9f09690 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -380,7 +380,7 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): # Replace np_ans[8] with 0 for the value np_ans[8:] = 0 # Replace 8 with -1 in indices - np.place(indices, indices==8, [-1]) + np.place(indices, indices == 8, [-1]) s = math_ops.unsorted_segment_sum( data=tf_x, segment_ids=indices, num_segments=num_segments) tf_ans = s.eval() diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 04758ce45a..6390b7c518 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -87,6 +87,7 @@ class UniqueTest(test.TestCase): for i in range(len(x)): self.assertEqual(x[i], tf_y[tf_idx[i]]) + class UniqueWithCountsTest(test.TestCase): def testInt32(self): diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 4d5fb97845..83237b8733 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -267,34 +267,34 @@ class BatchNormalization(base.Layer): self.axis[idx] = x + 1 # Account for added dimension if self.scale: - self.gamma = self.add_variable(name='gamma', - shape=param_shape, - dtype=param_dtype, - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint, - trainable=True) + self.gamma = self.add_variable( + name='gamma', + shape=param_shape, + dtype=param_dtype, + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint, + trainable=True) else: self.gamma = None if self.fused: - self._gamma_const = array_ops.constant(1.0, - dtype=param_dtype, - shape=param_shape) + self._gamma_const = array_ops.constant( + 1.0, dtype=param_dtype, shape=param_shape) if self.center: - self.beta = self.add_variable(name='beta', - shape=param_shape, - dtype=param_dtype, - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint, - trainable=True) + self.beta = self.add_variable( + name='beta', + shape=param_shape, + dtype=param_dtype, + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint, + trainable=True) else: self.beta = None if self.fused: - self._beta_const = array_ops.constant(0.0, - dtype=param_dtype, - shape=param_shape) + self._beta_const = array_ops.constant( + 0.0, dtype=param_dtype, shape=param_shape) # Disable variable partitioning when creating the moving mean and variance try: @@ -327,11 +327,12 @@ class BatchNormalization(base.Layer): # stack to be cleared. The nested ones use a `lambda` to set the desired # device and ignore any devices that may be set by the custom getter. def _renorm_variable(name, shape): - var = self.add_variable(name=name, - shape=shape, - dtype=param_dtype, - initializer=init_ops.zeros_initializer(), - trainable=False) + var = self.add_variable( + name=name, + shape=shape, + dtype=param_dtype, + initializer=init_ops.zeros_initializer(), + trainable=False) return var with ops.device(None): diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index b2876c58c2..7c91c3284e 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -101,15 +101,13 @@ class BNTest(test.TestCase): loss_val = sess.run(loss, feed_dict={image: image_val}) return loss_val - def _trainEvalSequence(self, - dtype, - train1_use_gpu, - train2_use_gpu, + def _trainEvalSequence(self, dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu): batch, height, width, input_channels = 2, 4, 5, 3 shape = [batch, height, width, input_channels] checkpoint = os.path.join(self.get_temp_dir(), 'cp_%s_%s_%s_%s' % - (dtype, train1_use_gpu, train2_use_gpu, infer_use_gpu)) + (dtype, train1_use_gpu, train2_use_gpu, + infer_use_gpu)) self._train( checkpoint, @@ -130,30 +128,27 @@ class BNTest(test.TestCase): dtype=dtype) np.random.seed(0) - image_val = np.random.rand(batch, - height, - width, - input_channels).astype(dtype.as_numpy_dtype) - loss_val = self._infer(checkpoint, image_val, shape, - use_gpu=infer_use_gpu, is_fused=True) + image_val = np.random.rand(batch, height, width, input_channels).astype( + dtype.as_numpy_dtype) + loss_val = self._infer( + checkpoint, image_val, shape, use_gpu=infer_use_gpu, is_fused=True) return train_vars, loss_val def testHalfPrecision(self): - ref_vars, ref_loss = self._trainEvalSequence(dtype=dtypes.float32, - train1_use_gpu=True, - train2_use_gpu=True, - infer_use_gpu=True) - + ref_vars, ref_loss = self._trainEvalSequence( + dtype=dtypes.float32, + train1_use_gpu=True, + train2_use_gpu=True, + infer_use_gpu=True) + self.assertEqual(len(ref_vars), 5) for train1_use_gpu in [True, False]: for train2_use_gpu in [True, False]: for infer_use_gpu in [True, False]: - test_vars, test_loss = self._trainEvalSequence(dtypes.float16, - train1_use_gpu, - train2_use_gpu, - infer_use_gpu) + test_vars, test_loss = self._trainEvalSequence( + dtypes.float16, train1_use_gpu, train2_use_gpu, infer_use_gpu) self.assertEqual(len(test_vars), 5) for test_var, ref_var in zip(test_vars, ref_vars): self.assertAllClose(test_var, ref_var, rtol=1.e-3, atol=1.e-3) @@ -281,9 +276,8 @@ class BNTest(test.TestCase): def testCreateFusedBNFloat16(self): # Call layer. bn = normalization_layers.BatchNormalization(axis=1, fused=True) - inputs = random_ops.random_uniform((5, 4, 3, 3), - seed=1, - dtype=dtypes.float16) + inputs = random_ops.random_uniform( + (5, 4, 3, 3), seed=1, dtype=dtypes.float16) training = array_ops.placeholder(dtype='bool') outputs = bn.apply(inputs, training=training) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 43238757c7..38eff54c69 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1194,18 +1194,19 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): "Number of mask dimensions must be specified, even if some dimensions" " are None. E.g. shape=[None] is ok, but shape=None is not.") axis = 0 if axis is None else axis - shape_tensor[axis:axis+ndims_mask].assert_is_compatible_with(shape_mask) + shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod(shape(tensor)[axis:axis+ndims_mask], [0]) + leading_size = gen_math_ops._prod( + shape(tensor)[axis:axis + ndims_mask], [0]) tensor = reshape(tensor, - concat([shape(tensor)[:axis], - [leading_size], - shape(tensor)[axis+ndims_mask:]], 0)) - first_dim = shape_tensor[axis:axis+ndims_mask].num_elements() + concat([ + shape(tensor)[:axis], [leading_size], + shape(tensor)[axis + ndims_mask:] + ], 0)) + first_dim = shape_tensor[axis:axis + ndims_mask].num_elements() tensor.set_shape( - tensor_shape.as_shape(shape_tensor[:axis]) - .concatenate([first_dim]) - .concatenate(shape_tensor[axis+ndims_mask:])) + tensor_shape.as_shape(shape_tensor[:axis]).concatenate([first_dim]) + .concatenate(shape_tensor[axis + ndims_mask:])) mask = reshape(mask, [-1]) return _apply_mask_1d(tensor, mask, axis) diff --git a/tensorflow/python/ops/distributions/multinomial.py b/tensorflow/python/ops/distributions/multinomial.py index d49fac59ca..04762565c2 100644 --- a/tensorflow/python/ops/distributions/multinomial.py +++ b/tensorflow/python/ops/distributions/multinomial.py @@ -23,10 +23,10 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops -from tensorflow.python.ops import functional_ops from tensorflow.python.ops.distributions import distribution from tensorflow.python.ops.distributions import util as distribution_util @@ -243,25 +243,26 @@ class Multinomial(distribution.Distribution): n_draws[..., array_ops.newaxis], dtype=self.logits.dtype) * self.logits # flatten the total_count and logits - flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] - flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] + flat_logits = array_ops.reshape(logits, [-1, k]) # [B1B2...Bm, k] + flat_ndraws = n * array_ops.reshape(n_draws, [-1]) # [B1B2...Bm] # computes each total_count and logits situation by map_fn def _sample_single(args): - logits, n_draw = args[0], args[1] # [K], [] - x = random_ops.multinomial(logits[array_ops.newaxis, ...], - n_draw, seed) # [1, n*n_draw] - x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] - x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] + logits, n_draw = args[0], args[1] # [K], [] + x = random_ops.multinomial(logits[array_ops.newaxis, ...], n_draw, + seed) # [1, n*n_draw] + x = array_ops.reshape(x, shape=[n, -1]) # [n, n_draw] + x = math_ops.reduce_sum(array_ops.one_hot(x, depth=k), axis=-2) # [n, k] return x - x = functional_ops.map_fn(_sample_single, - [flat_logits, flat_ndraws], - dtype=self.dtype) # [B1B2...Bm, n, k] + + x = functional_ops.map_fn( + _sample_single, [flat_logits, flat_ndraws], + dtype=self.dtype) # [B1B2...Bm, n, k] # reshape the results to proper shape x = array_ops.transpose(x, perm=[1, 0, 2]) final_shape = array_ops.concat([[n], self.batch_shape_tensor(), [k]], 0) - x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] + x = array_ops.reshape(x, final_shape) # [n, B1, B2,..., Bm, k] return x @distribution_util.AppendDocstring(_multinomial_sample_note) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 7c23321ca5..b9c89d62d5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1119,9 +1119,8 @@ def rgb_to_grayscale(images, name=None): # https://en.wikipedia.org/wiki/Luma_%28video%29 rgb_weights = [0.2989, 0.5870, 0.1140] rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) - gray_float = math_ops.reduce_sum(flt_image * rgb_weights, - rank_1, - keepdims=True) + gray_float = math_ops.reduce_sum( + flt_image * rgb_weights, rank_1, keepdims=True) gray_float.set_shape(images.get_shape()[:-1].concatenate([1])) return convert_image_dtype(gray_float, orig_dtype, name=name) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 14a039ffd0..be9beee633 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -30,7 +30,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.gen_linalg_ops import * # pylint: enable=wildcard-import from tensorflow.python.util import compat -from tensorflow.python.util.deprecation import deprecated_args +from tensorflow.python.util import deprecation # Names below are lower_case. # pylint: disable=invalid-name @@ -439,9 +439,13 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): # pylint: disable=redefined-builtin -@deprecated_args(None, "keep_dims is deprecated, use keepdims instead", - "keep_dims") -def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, +@deprecation.deprecated_args( + None, 'keep_dims is deprecated, use keepdims instead', 'keep_dims') +def norm(tensor, + ord='euclidean', + axis=None, + keepdims=None, + name=None, keep_dims=None): r"""Computes the norm of vectors, matrices, and tensors. @@ -478,6 +482,7 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, keepdims: If True, the axis indicated in `axis` are kept with size 1. Otherwise, the dimensions in `axis` are removed from the output shape. name: The name of the op. + keep_dims: Deprecated alias for `keepdims`. Returns: output: A `Tensor` of the same type as tensor, containing the vector or @@ -500,11 +505,8 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, higher order tensors. @end_compatibility """ - - if keep_dims is not None: - if keepdims is not None: - raise ValueError("Cannot specify both 'keep_dims' and 'keepdims'") - keepdims = keep_dims + keepdims = deprecation.deprecated_argument_lookup('keepdims', keepdims, + 'keep_dims', keep_dims) if keepdims is None: keepdims = False @@ -555,8 +557,8 @@ def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None, else: # General p-norms (positive p only) result = math_ops.pow( - math_ops.reduce_sum( - math_ops.pow(result, ord), axis, keepdims=True), 1.0 / ord) + math_ops.reduce_sum(math_ops.pow(result, ord), axis, keepdims=True), + 1.0 / ord) if not keepdims: result = array_ops.squeeze(result, axis) return result diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index d30f6b92ad..e04121ee31 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -792,9 +792,10 @@ def mean_cosine_distance(labels, predictions, dim, weights=None, predictions, labels, weights = _remove_squeezable_dimensions( predictions=predictions, labels=labels, weights=weights) radial_diffs = math_ops.multiply(predictions, labels) - radial_diffs = math_ops.reduce_sum(radial_diffs, - reduction_indices=[dim,], - keepdims=True) + radial_diffs = math_ops.reduce_sum( + radial_diffs, reduction_indices=[ + dim, + ], keepdims=True) mean_distance, update_op = mean(radial_diffs, weights, None, None, diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index da037a7983..654eb1c118 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -333,6 +333,7 @@ def l2_normalize(x, axis=None, epsilon=1e-12, name=None, dim=None): epsilon: A lower bound value for the norm. Will use `sqrt(epsilon)` as the divisor if `norm < sqrt(epsilon)`. name: A name for this operation (optional). + dim: Deprecated alias for axis. Returns: A `Tensor` with the same shape as `x`. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 61fa462988..ec7b9372ca 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -23,7 +23,6 @@ import numbers import numpy as np from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util from tensorflow.python.framework import ops @@ -38,11 +37,10 @@ from tensorflow.python.ops import random_ops # pylint: disable=wildcard-import from tensorflow.python.ops.gen_nn_ops import * # pylint: enable=wildcard-import -from tensorflow.python.util.deprecation import deprecated_args -from tensorflow.python.util.deprecation import deprecated_argument_lookup from tensorflow.python.util import deprecation + # Aliases for some automatically-generated names. local_response_normalization = gen_nn_ops.lrn @@ -1648,7 +1646,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): return output -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") def softmax(logits, axis=None, name=None, dim=None): """Computes softmax activations. @@ -1662,6 +1660,7 @@ def softmax(logits, axis=None, name=None, dim=None): axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). + dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type and shape as `logits`. @@ -1670,13 +1669,13 @@ def softmax(logits, axis=None, name=None, dim=None): InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - axis = deprecated_argument_lookup("axis", axis, "dim", dim) + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 return _softmax(logits, gen_nn_ops._softmax, axis, name) -@deprecated_args(None, "dim is deprecated, use axis instead", "dim") +@deprecation.deprecated_args(None, "dim is deprecated, use axis instead", "dim") def log_softmax(logits, axis=None, name=None, dim=None): """Computes log softmax activations. @@ -1690,6 +1689,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). + dim: Deprecated alias for `axis`. Returns: A `Tensor`. Has the same type as `logits`. Same shape as `logits`. @@ -1698,7 +1698,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): InvalidArgumentError: if `logits` is empty or `axis` is beyond the last dimension of `logits`. """ - axis = deprecated_argument_lookup("axis", axis, "dim", dim) + axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 return _softmax(logits, gen_nn_ops._log_softmax, axis, name) @@ -2316,13 +2316,14 @@ def conv1d(value, filters, stride, padding, return array_ops.squeeze(result, [spatial_start_dim]) -def conv1d_transpose(value, - filter, - output_shape, - stride, - padding="SAME", - data_format="NWC", - name=None): +def conv1d_transpose( + value, + filter, # pylint: disable=redefined-builtin + output_shape, + stride, + padding="SAME", + data_format="NWC", + name=None): """The transpose of `conv1d`. This operation is sometimes called "deconvolution" after [Deconvolutional @@ -2357,8 +2358,8 @@ def conv1d_transpose(value, [value, filter, output_shape]) as name: output_shape_ = ops.convert_to_tensor(output_shape, name="output_shape") if not output_shape_.get_shape().is_compatible_with(tensor_shape.vector(3)): - raise ValueError("output_shape must have shape (3,), got {}" - .format(output_shape_.get_shape())) + raise ValueError("output_shape must have shape (3,), got {}".format( + output_shape_.get_shape())) # The format could be either NWC or NCW, map to NHWC or NCHW if data_format is None or data_format == "NWC": @@ -2380,7 +2381,8 @@ def conv1d_transpose(value, if not filter.get_shape()[1].is_compatible_with(output_shape[axis]): raise ValueError( "output_shape does not match filter's output channels, " - "{} != {}".format(output_shape[axis], filter.get_shape()[1])) + "{} != {}".format(output_shape[axis], + filter.get_shape()[1])) if padding != "VALID" and padding != "SAME": raise ValueError("padding must be either VALID or SAME:" @@ -2388,25 +2390,26 @@ def conv1d_transpose(value, # Reshape the input tensor to [batch, 1, in_width, in_channels] if data_format_2d == "NHWC": - output_shape_ = array_ops.concat([output_shape_[:1], [1], - output_shape_[1:]], axis=0) + output_shape_ = array_ops.concat( + [output_shape_[:1], [1], output_shape_[1:]], axis=0) spatial_start_dim = 1 strides = [1, 1, stride, 1] else: - output_shape_ = array_ops.concat([output_shape_[:2], [1], - output_shape_[2:]], axis=0) + output_shape_ = array_ops.concat( + [output_shape_[:2], [1], output_shape_[2:]], axis=0) spatial_start_dim = 2 strides = [1, 1, 1, stride] value = array_ops.expand_dims(value, spatial_start_dim) filter = array_ops.expand_dims(filter, 0) - result = gen_nn_ops.conv2d_backprop_input(input_sizes=output_shape_, - filter=filter, - out_backprop=value, - strides=strides, - padding=padding, - data_format=data_format_2d, - name=name) + result = gen_nn_ops.conv2d_backprop_input( + input_sizes=output_shape_, + filter=filter, + out_backprop=value, + strides=strides, + padding=padding, + data_format=data_format_2d, + name=name) return array_ops.squeeze(result, [spatial_start_dim]) diff --git a/tensorflow/stream_executor/dnn.cc b/tensorflow/stream_executor/dnn.cc index 29fd6d0e87..6fd0e69905 100644 --- a/tensorflow/stream_executor/dnn.cc +++ b/tensorflow/stream_executor/dnn.cc @@ -470,6 +470,7 @@ string ConvolutionDescriptor::ToShortString() const { PoolingDescriptor::PoolingDescriptor(int ndims) : mode_(dnn::PoolingMode::kMaximum), ndims_(ndims), + propagate_nans_(false), window_(ndims, 0), padding_(ndims, 0), strides_(ndims, 1), diff --git a/third_party/sycl/crosstool/trisycl.tpl b/third_party/sycl/crosstool/trisycl.tpl index b470772fbf..87a70d8f95 100644 --- a/third_party/sycl/crosstool/trisycl.tpl +++ b/third_party/sycl/crosstool/trisycl.tpl @@ -11,10 +11,12 @@ CPU_C_COMPILER = ('%{host_c_compiler}') CURRENT_DIR = os.path.dirname(sys.argv[0]) TRISYCL_INCLUDE_DIR = CURRENT_DIR + '/../sycl/include' + def main(): compiler_flags = [] - remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', '-Wignored-attributes', '-fno-exceptions') + remove_flags = ('-Wl,--no-undefined', '-Wno-unused-but-set-variable', + '-Wignored-attributes', '-fno-exceptions') # remove -fsamotoze-coverage from string with g++ if 'g++' in CPU_CXX_COMPILER: remove_flags += ('-fsanitize-coverage',) @@ -22,52 +24,62 @@ def main(): else: compiler_flags += ['-fopenmp=libomp'] - compiler_flags += [flag for flag in sys.argv[1:] if not flag.startswith(remove_flags)] - + compiler_flags += [ + flag for flag in sys.argv[1:] if not flag.startswith(remove_flags) + ] output_file_index = compiler_flags.index('-o') + 1 output_file_name = compiler_flags[output_file_index] - if(output_file_index == 1): + if (output_file_index == 1): # we are linking - return call([CPU_CXX_COMPILER] + compiler_flags + - ['-Wl,--no-undefined']) + return call([CPU_CXX_COMPILER] + compiler_flags + ['-Wl,--no-undefined']) # find what we compile compiling_cpp = 0 - if('-c' in compiler_flags): - compiled_file_index = compiler_flags.index('-c') + 1 - compiled_file_name = compiler_flags[compiled_file_index] - if(compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', - '.C', '.cxx'))): - compiling_cpp = 1; - - debug_flags = ['-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', '-lpthread', '-lboost_log', '-g', '-rdynamic'] + if ('-c' in compiler_flags): + compiled_file_index = compiler_flags.index('-c') + 1 + compiled_file_name = compiler_flags[compiled_file_index] + if (compiled_file_name.endswith(('.cc', '.c++', '.cpp', '.CPP', '.C', + '.cxx'))): + compiling_cpp = 1 + + debug_flags = [ + '-DTRISYCL_DEBUG', '-DBOOST_LOG_DYN_LINK', '-DTRISYCL_TRACE_KERNEL', + '-lpthread', '-lboost_log', '-g', '-rdynamic' + ] opt_flags = ['-DNDEBUG', '-DBOOST_DISABLE_ASSERTS', '-O3'] - compiler_flags = compiler_flags + ['-DEIGEN_USE_SYCL=1', - '-DEIGEN_HAS_C99_MATH', - '-DEIGEN_MAX_ALIGN_BYTES=16', - '-DTENSORFLOW_USE_SYCL'] + opt_flags + compiler_flags = compiler_flags + [ + '-DEIGEN_USE_SYCL=1', '-DEIGEN_HAS_C99_MATH', + '-DEIGEN_MAX_ALIGN_BYTES=16', '-DTENSORFLOW_USE_SYCL' + ] + opt_flags - if(compiling_cpp == 1): + if (compiling_cpp == 1): # create a blacklist of folders that will be skipped when compiling # with triSYCL - skip_extensions = [".cu.cc"] - skip_folders = ["tensorflow/compiler", "tensorflow/docs_src", "tensorflow/tensorboard", "third_party", "external", "hexagon"] + skip_extensions = ['.cu.cc'] + skip_folders = [ + 'tensorflow/compiler', 'tensorflow/docs_src', 'tensorflow/tensorboard', + 'third_party', 'external', 'hexagon' + ] skip_folders = [(folder + '/') for folder in skip_folders] # if compiling external project skip triSYCL - if any(compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any(_folder in output_file_name for _folder in skip_folders): + if any( + compiled_file_name.endswith(_ext) for _ext in skip_extensions) or any( + _folder in output_file_name for _folder in skip_folders): return call([CPU_CXX_COMPILER] + compiler_flags) - host_compiler_flags = ['-xc++', '-Wno-unused-variable', - '-I', TRISYCL_INCLUDE_DIR] + compiler_flags + host_compiler_flags = [ + '-xc++', '-Wno-unused-variable', '-I', TRISYCL_INCLUDE_DIR + ] + compiler_flags x = call([CPU_CXX_COMPILER] + host_compiler_flags) return x else: # compile for C return call([CPU_C_COMPILER] + compiler_flags) + if __name__ == '__main__': sys.exit(main()) diff --git a/third_party/sycl/sycl_configure.bzl b/third_party/sycl/sycl_configure.bzl index a0c9e4e43a..5b9d0eb383 100644 --- a/third_party/sycl/sycl_configure.bzl +++ b/third_party/sycl/sycl_configure.bzl @@ -67,7 +67,6 @@ def find_computecpp_root(repository_ctx): def find_trisycl_include_dir(repository_ctx): """Find triSYCL include directory. """ - sycl_name = "" if _TRISYCL_INCLUDE_DIR in repository_ctx.os.environ: sycl_name = repository_ctx.os.environ[_TRISYCL_INCLUDE_DIR].strip() if sycl_name.startswith("/"): |