From 584e3495b50db8fd0a894de8b6d85fcf4268a855 Mon Sep 17 00:00:00 2001 From: Sunitha Kambhampati Date: Tue, 13 Mar 2018 11:43:01 -0700 Subject: Fix floating point exception with bps calculation modified: tensorflow/contrib/tensorboard/db/loader.cc --- tensorflow/contrib/tensorboard/db/loader.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc index 4d7337a53d..9134296c74 100644 --- a/tensorflow/contrib/tensorboard/db/loader.cc +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -112,8 +112,10 @@ int main(int argc, char* argv[]) { } uint64 elapsed = env->NowMicros() - start; LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " - << AddCommas(records) << " records at " - << AddCommas(offset / (elapsed / 1000000)) << " bps"; + << AddCommas(records) << " records"; + if (elapsed > 0) { + LOG(INFO) << "bps=" << (uint64)(offset / (elapsed / 1000000.0)); + } return 0; } -- cgit v1.2.3 From 548415b9be78839a23a3909044329c3f221fa4b3 Mon Sep 17 00:00:00 2001 From: Sunitha Kambhampati Date: Wed, 28 Mar 2018 21:25:23 -0700 Subject: Use the same log line for bps and also report bps when elapsed is 0 --- tensorflow/contrib/tensorboard/db/loader.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc index 9134296c74..97b9daa361 100644 --- a/tensorflow/contrib/tensorboard/db/loader.cc +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -112,11 +112,10 @@ int main(int argc, char* argv[]) { } uint64 elapsed = env->NowMicros() - start; LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " - << AddCommas(records) << " records"; - if (elapsed > 0) { - LOG(INFO) << "bps=" << (uint64)(offset / (elapsed / 1000000.0)); - } - + << AddCommas(records) << " records at " + << (elapsed == 0 ? offset : static_cast( + offset / (elapsed / 1000000.0))) + << " bps"; return 0; } -- cgit v1.2.3 From b621ac047e43540992b3ac0e9055b9e7225e74da Mon Sep 17 00:00:00 2001 From: Sunitha Kambhampati Date: Thu, 29 Mar 2018 11:51:02 -0700 Subject: Add the commas back --- tensorflow/contrib/tensorboard/db/loader.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc index 97b9daa361..6439328022 100644 --- a/tensorflow/contrib/tensorboard/db/loader.cc +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -111,11 +111,10 @@ int main(int argc, char* argv[]) { ++records; } uint64 elapsed = env->NowMicros() - start; + uint64 bps = (elapsed == 0 ? offset : static_cast( + offset / (elapsed / 1000000.0))); LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " - << AddCommas(records) << " records at " - << (elapsed == 0 ? offset : static_cast( - offset / (elapsed / 1000000.0))) - << " bps"; + << AddCommas(records) << " records at " << AddCommas(bps) << " bps"; return 0; } -- cgit v1.2.3 From c22d996c3d6a16db292bd3464b2ef7b91adae676 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Tue, 17 Apr 2018 01:00:44 +0800 Subject: Fix expand_dims of dims argument has been deprecated with axis --- tensorflow/contrib/layers/python/layers/target_column.py | 4 ++-- tensorflow/contrib/learn/python/learn/estimators/head.py | 10 +++++----- .../python/timeseries/state_space_models/state_space_model.py | 2 +- tensorflow/tools/compatibility/testdata/test_file_v0_11.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py index 3e639a180e..f3377f2a05 100644 --- a/tensorflow/contrib/layers/python/layers/target_column.py +++ b/tensorflow/contrib/layers/python/layers/target_column.py @@ -396,7 +396,7 @@ class _BinarySvmTargetColumn(_MultiClassTargetColumn): def _mean_squared_loss(logits, target): # To prevent broadcasting inside "-". if len(target.get_shape()) == 1: - target = array_ops.expand_dims(target, dim=[1]) + target = array_ops.expand_dims(target, axis=1) logits.get_shape().assert_is_compatible_with(target.get_shape()) return math_ops.square(logits - math_ops.to_float(target)) @@ -405,7 +405,7 @@ def _mean_squared_loss(logits, target): def _log_loss_with_two_classes(logits, target): # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. if len(target.get_shape()) == 1: - target = array_ops.expand_dims(target, dim=[1]) + target = array_ops.expand_dims(target, axis=1) loss_vec = nn.sigmoid_cross_entropy_with_logits( labels=math_ops.to_float(target), logits=logits) return loss_vec diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 2b4b6eff39..06f4173170 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -563,10 +563,10 @@ def _mean_squared_loss(labels, logits, weights=None): labels = ops.convert_to_tensor(labels) # To prevent broadcasting inside "-". if len(labels.get_shape()) == 1: - labels = array_ops.expand_dims(labels, dim=(1,)) + labels = array_ops.expand_dims(labels, axis=1) # TODO(zakaria): make sure it does not recreate the broadcast bug. if len(logits.get_shape()) == 1: - logits = array_ops.expand_dims(logits, dim=(1,)) + logits = array_ops.expand_dims(logits, axis=1) logits.get_shape().assert_is_compatible_with(labels.get_shape()) loss = math_ops.square(logits - math_ops.to_float(labels), name=name) return _compute_weighted_loss(loss, weights) @@ -579,10 +579,10 @@ def _poisson_loss(labels, logits, weights=None): labels = ops.convert_to_tensor(labels) # To prevent broadcasting inside "-". if len(labels.get_shape()) == 1: - labels = array_ops.expand_dims(labels, dim=(1,)) + labels = array_ops.expand_dims(labels, axis=1) # TODO(zakaria): make sure it does not recreate the broadcast bug. if len(logits.get_shape()) == 1: - logits = array_ops.expand_dims(logits, dim=(1,)) + logits = array_ops.expand_dims(logits, axis=1) logits.get_shape().assert_is_compatible_with(labels.get_shape()) loss = nn.log_poisson_loss(labels, logits, compute_full_loss=True, name=name) @@ -797,7 +797,7 @@ def _log_loss_with_two_classes(labels, logits, weights=None): # TODO(ptucker): This will break for dynamic shapes. # sigmoid_cross_entropy_with_logits requires [batch_size, 1] labels. if len(labels.get_shape()) == 1: - labels = array_ops.expand_dims(labels, dim=(1,)) + labels = array_ops.expand_dims(labels, axis=1) loss = nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits, name=name) return _compute_weighted_loss(loss, weights) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py index 951c6546d5..d04c721007 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py @@ -909,7 +909,7 @@ class StateSpaceModel(model.SequentialTimeSeriesModel): elif unbroadcasted_shape.ndims == 2: # Unbroadcasted shape [num features x state dimension] broadcasted_model = array_ops.tile( - array_ops.expand_dims(unbroadcasted_model, dim=0), + array_ops.expand_dims(unbroadcasted_model, axis=0), [array_ops.shape(times)[0], 1, 1]) elif unbroadcasted_shape.ndims == 3: broadcasted_model = unbroadcasted_model diff --git a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py index 01f37d8768..40526d930c 100644 --- a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py +++ b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py @@ -94,7 +94,7 @@ class TestUpgrade(test_util.TensorFlowTestCase): self.assertAllClose( tf.reduce_logsumexp(a, [0, 1]).eval(), 6.45619344711) self.assertAllEqual( - tf.expand_dims([[1, 2], [3, 4]], dim=1).eval(), + tf.expand_dims([[1, 2], [3, 4]], axis=1).eval(), [[[1, 2]], [[3, 4]]]) def testArgMinMax(self): -- cgit v1.2.3 From f35dc0a522ae630902baa5be16d2a53b59266770 Mon Sep 17 00:00:00 2001 From: Bruno Goncalves <882745+brunomorishita@users.noreply.github.com> Date: Sat, 28 Apr 2018 19:24:22 -0300 Subject: Fix cmake library path for libpng16.a --- tensorflow/contrib/cmake/external/png.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index ad2af01bc0..1a147e9c8e 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================== include (ExternalProject) +include (GNUInstallDirs) set(png_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/png_archive) set(png_URL https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz) @@ -35,7 +36,7 @@ if(WIN32) endif() endif() else() - set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/lib/libpng16.a) + set(png_STATIC_LIBRARIES ${CMAKE_BINARY_DIR}/png/install/${CMAKE_INSTALL_LIBDIR}/libpng16.a) endif() set(png_HEADERS -- cgit v1.2.3 From f78fd433118830482dddbf6055751898a19265de Mon Sep 17 00:00:00 2001 From: jiefangxuanyan <505745416@qq.com> Date: Wed, 13 Jun 2018 17:28:23 +0800 Subject: Specify endianness in expected_result array to fix #15767. --- tensorflow/python/kernel_tests/decode_raw_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py index 122a9ed469..0bd8bc3c7b 100644 --- a/tensorflow/python/kernel_tests/decode_raw_op_test.py +++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py @@ -79,7 +79,7 @@ class DecodeRawOpTest(test.TestCase): decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.float16) self.assertEqual([None, None], decode.get_shape().as_list()) - expected_result = np.matrix([[1, -2, -3, 4]], dtype=np.float16) + expected_result = np.matrix([[1, -2, -3, 4]], dtype=" Date: Sun, 1 Jul 2018 01:13:06 +0800 Subject: Removed unused lambda capture --- tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc index f9f36443a8..6824e0f89f 100644 --- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -50,7 +50,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass { } for (Node* n : matches) { AttrSlice n_attrs = n->attrs(); - auto base_make_node = [n, g, &n_attrs](const string& op, + auto base_make_node = [n, &n_attrs](const string& op, const string& name) { NodeBuilder node_builder(name, op); node_builder.Device(n->requested_device()); -- cgit v1.2.3 From f7a00dbf1799f3fb3900b0788047e460a9abfd31 Mon Sep 17 00:00:00 2001 From: naurril Date: Sun, 1 Jul 2018 01:47:25 +0800 Subject: Removed unused lambda capture --- tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc index 6824e0f89f..0f853ae52a 100644 --- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -60,7 +60,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass { } return node_builder; }; - auto make_node = [n, g, &n_attrs, &base_make_node](string op) { + auto make_node = [n, g, &base_make_node](string op) { return base_make_node( op, g->NewName(strings::StrCat(n->name(), "/Internal"))); }; -- cgit v1.2.3 From e5a7c13a8f15b0f98df849fbe3196f2ecedec04e Mon Sep 17 00:00:00 2001 From: naurril Date: Tue, 3 Jul 2018 00:21:25 +0800 Subject: cleanup CondContext at execption --- tensorflow/python/ops/control_flow_ops.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index fc37805c79..386305ba30 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -2044,22 +2044,26 @@ def cond(pred, # Build the graph for the true branch in a new context. context_t = CondContext(pred, pivot_1, branch=1) - context_t.Enter() - orig_res_t, res_t = context_t.BuildCondBranch(true_fn) - if orig_res_t is None: - raise ValueError("true_fn must have a return value.") - context_t.ExitResult(res_t) - context_t.Exit() + try: + context_t.Enter() + orig_res_t, res_t = context_t.BuildCondBranch(true_fn) + if orig_res_t is None: + raise ValueError("true_fn must have a return value.") + context_t.ExitResult(res_t) + finally: + context_t.Exit() # Build the graph for the false branch in a new context. context_f = CondContext(pred, pivot_2, branch=0) - context_f.Enter() - orig_res_f, res_f = context_f.BuildCondBranch(false_fn) - if orig_res_f is None: - raise ValueError("false_fn must have a return value.") - context_f.ExitResult(res_f) - context_f.Exit() - + try: + context_f.Enter() + orig_res_f, res_f = context_f.BuildCondBranch(false_fn) + if orig_res_f is None: + raise ValueError("false_fn must have a return value.") + context_f.ExitResult(res_f) + finally: + context_f.Exit() + if not strict: orig_res_t = _UnpackIfSingleton(orig_res_t) orig_res_f = _UnpackIfSingleton(orig_res_f) -- cgit v1.2.3 From 9bab0c89c4ffeeb780e7a3dc415ab888164b9b00 Mon Sep 17 00:00:00 2001 From: "candy.dc" Date: Thu, 26 Jul 2018 11:36:30 +0800 Subject: fix: No need to convert to tensor when using ResourceVariable in embedding_lookup, because ResourceVariable support ResourceGather OP. --- tensorflow/contrib/layers/python/layers/embedding_ops.py | 7 ++++--- tensorflow/python/feature_column/feature_column_v2.py | 7 ++++--- tensorflow/python/ops/embedding_ops.py | 7 ++++--- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 60e1d85ea9..897aed527d 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -112,9 +112,10 @@ def safe_embedding_lookup_sparse(embedding_weights, dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable): + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index b4dd23f58d..220a4f7ed6 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -3283,9 +3283,10 @@ def _safe_embedding_lookup_sparse(embedding_weights, raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable): + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 27c2fa7017..fe422f5095 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -545,9 +545,10 @@ def safe_embedding_lookup_sparse(embedding_weights, raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + if not isinstance(embedding_weights[0], resource_variable_ops.ResourceVariable): + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, -- cgit v1.2.3 From aba7fcaf87f8d4099212db2e3bffad1dbab168a2 Mon Sep 17 00:00:00 2001 From: shaohua Date: Thu, 26 Jul 2018 15:00:53 +0800 Subject: Fix gcc6.3 build link issue Signed-off-by: shaohua --- tensorflow/tensorflow.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 340d3f393c..054d68d42c 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -404,7 +404,7 @@ def tf_gen_op_wrapper_cc(name, tf_cc_binary( name=tool, copts=tf_copts(), - linkopts=if_not_windows(["-lm"]), + linkopts=if_not_windows(["-lm","-Wl,-ldl"]), linkstatic=1, # Faster to link this one-time-use binary dynamically deps=[op_gen] + deps) @@ -573,7 +573,7 @@ def tf_gen_op_wrapper_py(name, deps = [str(Label("//tensorflow/core:" + name + "_op_lib"))] tf_cc_binary( name=tool_name, - linkopts=if_not_windows(["-lm"]) + cc_linkopts, + linkopts=if_not_windows(["-lm","-Wl,-ldl"]) + cc_linkopts, copts=tf_copts(), linkstatic=1, # Faster to link this one-time-use binary dynamically deps=([ -- cgit v1.2.3 From 27de8e717c1bec91398f5a6be6c7287b657fc960 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 02:29:43 +0000 Subject: Improve shape function for CudnnRNNParamsSize In cudnn_rnn_ops.cc, the CudnnRNNParamsSize does not have restrictions on num_layers, num_units, and input_size, though they all should be scalars. This fix adds the shape check of num_layers, num_units, and input_size for CudnnRNNParamsSize. Signed-off-by: Yong Tang --- tensorflow/core/ops/cudnn_rnn_ops.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc index f78f7a897a..7eb141aa8c 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops.cc @@ -52,6 +52,12 @@ REGISTER_OP("CudnnRNNParamsSize") .Attr("seed2: int = 0") .Output("params_size: S") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + // num_layers, num_units, and input_size should be scalars. + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(0, c->Vector(1)); return Status::OK(); }); -- cgit v1.2.3 From 01387ccddcf5c23d48c5745f4a6a49a670f528aa Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 04:28:08 +0000 Subject: Add test cases for shape function of CudnnRNNParamsSize Signed-off-by: Yong Tang --- .../python/kernel_tests/cudnn_rnn_ops_test.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py index 5a667485be..675b7ce185 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py @@ -413,6 +413,28 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase): self._testOneLSTMParamsSize(num_layers, num_units, input_size, direction) + @unittest.skipUnless(test.is_built_with_cuda(), + "Test only applicable when running on GPUs") + def testLSTMParamsSizeShape(self): + with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + constant_op.constant([4]), 200, 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, constant_op.constant([200]), 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, 200, constant_op.constant([200]), + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + class CudnnRNNTestInference(TensorFlowTestCase): -- cgit v1.2.3 From d27b5a3e5458c82ce1ca3cda1a9879149c779959 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 14:52:52 +0000 Subject: Pylint fix Signed-off-by: Yong Tang --- .../python/kernel_tests/cudnn_rnn_ops_test.py | 39 ++++++++++++---------- 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py index 675b7ce185..c59d3682d4 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py @@ -416,24 +416,27 @@ class CudnnRNNTestParamsSize(TensorFlowTestCase): @unittest.skipUnless(test.is_built_with_cuda(), "Test only applicable when running on GPUs") def testLSTMParamsSizeShape(self): - with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): - model = _CreateModel( - cudnn_rnn_ops.CUDNN_LSTM, - constant_op.constant([4]), 200, 200, - direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) - params_size = model.params_size() - with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): - model = _CreateModel( - cudnn_rnn_ops.CUDNN_LSTM, - 4, constant_op.constant([200]), 200, - direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) - params_size = model.params_size() - with self.assertRaisesRegexp(ValueError, "Shape must be rank 0 but is rank 1"): - model = _CreateModel( - cudnn_rnn_ops.CUDNN_LSTM, - 4, 200, constant_op.constant([200]), - direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) - params_size = model.params_size() + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + constant_op.constant([4]), 200, 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, constant_op.constant([200]), 200, + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1"): + model = _CreateModel( + cudnn_rnn_ops.CUDNN_LSTM, + 4, 200, constant_op.constant([200]), + direction=cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION) + params_size = model.params_size() class CudnnRNNTestInference(TensorFlowTestCase): -- cgit v1.2.3 From c86327921c6e5e918250652558e4075abd88c6f4 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 25 Jul 2018 14:53:02 +0000 Subject: Add additional unit test in c++ for cudnn_rnn_ops Signed-off-by: Yong Tang --- tensorflow/core/ops/cudnn_rnn_ops_test.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc index 2dd867561b..095ee1fc95 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc @@ -26,7 +26,19 @@ namespace tensorflow { TEST(CudnnRNNOpsTest, ParamsSize_ShapeFn) { ShapeInferenceTestOp op("CudnnRNNParamsSize"); - INFER_OK(op, "[1];[1];[1]", "[1]"); + INFER_OK(op, "[];[];[]", "[1]"); + INFER_OK(op, "?;[];[]", "[1]"); + INFER_OK(op, "[];?;[]", "[1]"); + INFER_OK(op, "[];[];?", "[1]"); + INFER_OK(op, "[];?;?", "[1]"); + INFER_OK(op, "?;?;?", "[1]"); + + INFER_ERROR("Shape must be rank 0 ", op, + "[1,2];?;[]"); + INFER_ERROR("Shape must be rank 0 ", op, + "?;[2];[]"); + INFER_ERROR("Shape must be rank 0 ", op, + "?;?;[1]"); } TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) { -- cgit v1.2.3 From 0d7b11f4d63f9bae0d0e4001dd96ce840810210b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 5 Aug 2018 17:23:47 +0000 Subject: Fix op_scope warning in adjust_gamma While running the following op_scope causes the warning: ``` Python 3.5.2 (default, Nov 23 2017, 16:37:01) [GCC 5.4.0 20160609] on linux Type "help", "copyright", "credits" or "license" for more information. >>> import tensorflow as tf i>>> import numpy as np >>> tf.image.adjust_gamma(np.random.uniform(0.0, 255.0, (8, 8)), gamma=1) WARNING:tensorflow:tf.op_scope(values, name, default_name) is deprecated, use tf.name_scope(name, default_name, values) >>> ``` This fix fixes the warning by switching op_scope to name_scope. Signed-off-by: Yong Tang --- tensorflow/python/ops/image_ops_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 855a4d0c33..1b11b8b074 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1377,7 +1377,7 @@ def adjust_gamma(image, gamma=1, gain=1): [1] http://en.wikipedia.org/wiki/Gamma_correction """ - with ops.op_scope([image, gamma, gain], None, 'adjust_gamma'): + with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: # Convert pixel value to DT_FLOAT for computing adjusted image. img = ops.convert_to_tensor(image, name='img', dtype=dtypes.float32) # Keep image dtype for computing the scale of corresponding dtype. -- cgit v1.2.3 From 731fc1ecaac8a527ac606ff595f313ab9ebbb7fa Mon Sep 17 00:00:00 2001 From: rasmi Date: Wed, 8 Aug 2018 14:34:16 -0700 Subject: Add deprecation warning to tf.gfile.FastGFile. Fixes #12663. --- tensorflow/python/platform/gfile.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/python/platform/gfile.py b/tensorflow/python/platform/gfile.py index 45de047894..510701e344 100644 --- a/tensorflow/python/platform/gfile.py +++ b/tensorflow/python/platform/gfile.py @@ -33,6 +33,7 @@ from tensorflow.python.lib.io.file_io import rename as Rename from tensorflow.python.lib.io.file_io import stat as Stat from tensorflow.python.lib.io.file_io import walk as Walk # pylint: enable=unused-import +from tensorflow.python.util.deprecation import deprecated from tensorflow.python.util.tf_export import tf_export @@ -52,6 +53,7 @@ class GFile(_FileIO): @tf_export('gfile.FastGFile') +@deprecated(None, 'Use tf.gfile.GFile.') class FastGFile(_FileIO): """File I/O wrappers without thread locking. -- cgit v1.2.3 From 6c14d85b41c565ed9dabc3677aedf76757097242 Mon Sep 17 00:00:00 2001 From: rasmi Date: Wed, 8 Aug 2018 16:35:12 -0700 Subject: Changed order of export and deprecated decorators. --- tensorflow/python/platform/gfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/platform/gfile.py b/tensorflow/python/platform/gfile.py index 510701e344..ac53609434 100644 --- a/tensorflow/python/platform/gfile.py +++ b/tensorflow/python/platform/gfile.py @@ -52,8 +52,8 @@ class GFile(_FileIO): super(GFile, self).__init__(name=name, mode=mode) -@tf_export('gfile.FastGFile') @deprecated(None, 'Use tf.gfile.GFile.') +@tf_export('gfile.FastGFile') class FastGFile(_FileIO): """File I/O wrappers without thread locking. -- cgit v1.2.3 From b81f4bb5468b0fdf9e36591d3a7d56740bedb7dd Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Thu, 9 Aug 2018 14:34:52 +0800 Subject: ENH: implement feature importances --- .../python/estimator/canned/boosted_trees.py | 105 ++++++++++++++++++++- 1 file changed, 101 insertions(+), 4 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 8b423f76de..060f5cb3fa 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -21,6 +21,11 @@ import abc import collections import functools +import numpy as np + +from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.client import session as tf_session +from tensorflow.python.eager import context from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib @@ -38,7 +43,9 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary +from tensorflow.python.training import checkpoint_management from tensorflow.python.training import distribute as distribute_lib +from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import estimator_export @@ -54,6 +61,8 @@ _HOLD_FOR_MULTI_DIM_SUPPORT = object() _DUMMY_NUM_BUCKETS = -1 _DUMMY_NODE_ID = -1 +_BOOSTED_TREES_SERIALIZED_PROTO = '_BOOSTED_TREES_SERIALIZED_PROTO' + def _get_transformed_features(features, sorted_feature_columns): """Gets the transformed features from features/feature_columns pair. @@ -736,6 +745,8 @@ def _bt_model_fn( bucketized_features=input_feature_list, logits_dimension=head.logits_dimension) else: + _, serialized_proto = tree_ensemble.serialize() + ops.add_to_collection(_BOOSTED_TREES_SERIALIZED_PROTO, serialized_proto) if is_single_machine: local_tree_ensemble = tree_ensemble ensemble_reload = control_flow_ops.no_op() @@ -910,8 +921,92 @@ def _create_regression_head(label_dimension, weight_column=None): # pylint: enable=protected-access +def _compute_feature_importance_for_tree(tree, num_features, normalize): + importances = np.zeros(num_features) + + for node in tree.nodes: + node_type = node.WhichOneof('node') + if node_type == 'bucketized_split': + feature_id = node.bucketized_split.feature_id + importances[feature_id] += node.metadata.gain + elif node_type == 'leaf': + assert node.metadata.gain == 0 + else: + raise ValueError('Unexpected split type %s', node_type) + + if normalize: + normalizer = np.sum(importances) + if normalizer > 0.0: + # Avoid dividing by zero (e.g., when root is pure) + importances /= normalizer + + return importances + + +def compute_feature_importances(tree_ensemble, num_features, normalize=True): + tree_importances = [_compute_feature_importance_for_tree(tree, + num_features, + normalize) + for tree in tree_ensemble.trees] + tree_importances = np.array(tree_importances) + tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) + feature_importances = np.sum(tree_importances * tree_weights, + axis=0) / np.sum(tree_weights) + if normalize: + normalizer = np.sum(feature_importances) + if normalizer > 0.0: + feature_importances /= normalizer + + sorted_feature = np.argsort(feature_importances)[::-1] + return sorted_feature, feature_importances[sorted_feature] + + +class _BoostedTrees(estimator.Estimator): + + def __init__(self, model_fn, model_dir, config, feature_columns): + super(_BoostedTrees, self).__init__( + model_fn=model_fn, model_dir=model_dir, config=config) + + sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) + self._num_features = _calculate_num_features(sorted_feature_columns) + + def compute_feature_importances(self, normalize=True): + tree_ensemble = self._read_tree_ensemble_from_checkpoint() + if tree_ensemble: + return compute_feature_importances(tree_ensemble, + self._num_features, + normalize) + else: + return [], [] + + def _read_tree_ensemble_from_checkpoint(self): + with context.graph_mode(): + checkpoint_path = checkpoint_management.latest_checkpoint( + self._model_dir) + if not checkpoint_path: + raise ValueError("Couldn't find trained model at %s." % self._model_dir) + + with ops.Graph().as_default() as g: + with tf_session.Session(config=self._session_config) as session: + meta_file = checkpoint_path + '.meta' + graph_saver = saver.import_meta_graph(meta_file) + graph_saver.restore(session, checkpoint_path) + + serialized_proto = ops.get_collection(_BOOSTED_TREES_SERIALIZED_PROTO) + assert len(serialized_proto) == 1 + serialized_proto_string = session.run(serialized_proto[0]) + + if serialized_proto_string: + tree_ensemble = boosted_trees_pb2.TreeEnsemble() + tree_ensemble.ParseFromString(serialized_proto_string) + return tree_ensemble + else: + # serialized_proto_string is empty string before training. + return None + + @estimator_export('estimator.BoostedTreesClassifier') -class BoostedTreesClassifier(estimator.Estimator): +class BoostedTreesClassifier(_BoostedTrees): """A Classifier for Tensorflow Boosted Trees models. @compatibility(eager) @@ -1046,11 +1141,12 @@ class BoostedTreesClassifier(estimator.Estimator): closed_form_grad_and_hess_fn=closed_form) super(BoostedTreesClassifier, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) + model_fn=_model_fn, model_dir=model_dir, config=config, + feature_columns=feature_columns) @estimator_export('estimator.BoostedTreesRegressor') -class BoostedTreesRegressor(estimator.Estimator): +class BoostedTreesRegressor(_BoostedTrees): """A Regressor for Tensorflow Boosted Trees models. @compatibility(eager) @@ -1169,4 +1265,5 @@ class BoostedTreesRegressor(estimator.Estimator): n_batches_per_layer, config) super(BoostedTreesRegressor, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) + model_fn=_model_fn, model_dir=model_dir, config=config, + feature_columns=feature_columns) -- cgit v1.2.3 From 54fbe83c1bc50510a7712ab78aaf369ba562538e Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Thu, 9 Aug 2018 14:35:19 +0800 Subject: TST: add test case --- .../python/estimator/canned/boosted_trees_test.py | 94 ++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index ec597e4686..054d820527 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -91,6 +91,17 @@ def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None): return _input_fn +def _compute_feature_importances_np(feature_gains, normalize): + if normalize: + feature_gains /= np.sum(feature_gains, axis=1, keepdims=True) + feature_gains = np.nan_to_num(feature_gains) + feature_importances = np.sum(feature_gains, axis=0) / len(feature_gains) + feature_importances /= np.sum(feature_importances) + return np.nan_to_num(feature_importances) + else: + return np.sum(feature_gains, axis=0) / len(feature_gains) + + class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): def setUp(self): @@ -154,6 +165,10 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) + self.assertEqual(3, est._num_features) # pylint:disable=protected-access + sorted_features, importances = est.compute_feature_importances() + self.assertAllEqual([], sorted_features) + self.assertAllEqual([], importances) def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -544,6 +559,85 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) + def testCalculateFeatureImportances(self): + input_fn = _make_train_input_fn(is_classification=True) + + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + self.assertEqual(3, est._num_features) # pylint:disable=protected-access + # It will stop after 5 steps because of the max depth and num trees. + num_steps = 100 + # Train for a few steps, and validate final checkpoint. + est.train(input_fn, steps=num_steps) + + # TreeEnsemble Proto: + # tree_ensemble: trees { + # nodes { + # bucketized_split { + # feature_id: 2 + # threshold: 2 + # left_id: 1 + # right_id: 2 + # } + # metadata { + # gain: 0.426666676998 + # } + # } + # ...... + # nodes { + # bucketized_split { + # threshold: 1 + # left_id: 5 + # right_id: 6 + # } + # metadata { + # gain: 0.133481562138 + # original_leaf { + # scalar: 0.066666662693 + # } + # } + # } + # ...... + # nodes { + # bucketized_split { + # left_id: 11 + # right_id: 12 + # } + # metadata { + # gain: 0.400360047817 + # original_leaf { + # scalar: 0.0599950700998 + # } + # } + # } + # } + # trees { + # nodes { + # leaf { + # } + # } + # } + # tree_weights: 1.0 + # tree_weights: 1.0 + # ...... + sorted_features_expected = [0, 2, 1] + feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. + [0.0, 0.0, 0.0]] # 2nd tree. + + sorted_features, importances = est.compute_feature_importances(normalize=False) + self.assertAllEqual(sorted_features_expected, sorted_features) + self.assertAllClose(_compute_feature_importances_np(feature_gains, False), + importances) + + sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + self.assertAllEqual(sorted_features_expected, sorted_features1) + self.assertAllClose(_compute_feature_importances_np(feature_gains, True), + importances1) + class ModelFnTests(test_util.TensorFlowTestCase): """Tests bt_model_fn including unexposed internal functionalities.""" -- cgit v1.2.3 From c3c6c45987692e8bc73eff2f10f9ec1a82f55287 Mon Sep 17 00:00:00 2001 From: rasmi Date: Thu, 9 Aug 2018 10:27:37 -0700 Subject: Moved @deprecated decorator to __init__ --- tensorflow/python/platform/gfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/platform/gfile.py b/tensorflow/python/platform/gfile.py index ac53609434..5927bc2409 100644 --- a/tensorflow/python/platform/gfile.py +++ b/tensorflow/python/platform/gfile.py @@ -52,7 +52,6 @@ class GFile(_FileIO): super(GFile, self).__init__(name=name, mode=mode) -@deprecated(None, 'Use tf.gfile.GFile.') @tf_export('gfile.FastGFile') class FastGFile(_FileIO): """File I/O wrappers without thread locking. @@ -64,6 +63,7 @@ class FastGFile(_FileIO): invocations in network filesystems). """ + @deprecated(None, 'Use tf.gfile.GFile.') def __init__(self, name, mode='r'): super(FastGFile, self).__init__(name=name, mode=mode) -- cgit v1.2.3 From b127c201cda558db21ce5f48f5899593d73da46b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 10 Aug 2018 20:37:32 +0000 Subject: Fix clang-format issue in `Experimental clang-format Check` Signed-off-by: Yong Tang --- tensorflow/core/ops/cudnn_rnn_ops.cc | 3 --- tensorflow/core/ops/cudnn_rnn_ops_test.cc | 9 +++------ 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc index 7eb141aa8c..f84142c992 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops.cc @@ -37,7 +37,6 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; - REGISTER_OP("CudnnRNNParamsSize") .Input("num_layers: int32") .Input("num_units: int32") @@ -62,7 +61,6 @@ REGISTER_OP("CudnnRNNParamsSize") return Status::OK(); }); - REGISTER_OP("CudnnRNN") .Input("input: T") .Input("input_h: T") @@ -254,7 +252,6 @@ REGISTER_OP("CudnnRNNParamsToCanonical") return Status::OK(); }); - REGISTER_OP("CudnnRNNCanonicalToParams") .Input("num_layers: int32") .Input("num_units: int32") diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc index 095ee1fc95..13c3b933f4 100644 --- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc +++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc @@ -33,12 +33,9 @@ TEST(CudnnRNNOpsTest, ParamsSize_ShapeFn) { INFER_OK(op, "[];?;?", "[1]"); INFER_OK(op, "?;?;?", "[1]"); - INFER_ERROR("Shape must be rank 0 ", op, - "[1,2];?;[]"); - INFER_ERROR("Shape must be rank 0 ", op, - "?;[2];[]"); - INFER_ERROR("Shape must be rank 0 ", op, - "?;?;[1]"); + INFER_ERROR("Shape must be rank 0 ", op, "[1,2];?;[]"); + INFER_ERROR("Shape must be rank 0 ", op, "?;[2];[]"); + INFER_ERROR("Shape must be rank 0 ", op, "?;?;[1]"); } TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) { -- cgit v1.2.3 From 7ad604778ed69303458145376f2b6ec403fc5345 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Mon, 13 Aug 2018 15:57:54 +0800 Subject: ENH: mapping idx to feature_name --- .../python/estimator/canned/boosted_trees.py | 38 ++++++++-- .../python/estimator/canned/boosted_trees_test.py | 88 ++++++++++++++++++++-- 2 files changed, 113 insertions(+), 13 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 060f5cb3fa..ba90b361b3 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -201,6 +201,23 @@ def _calculate_num_features(sorted_feature_columns): return num_features +def _generate_feature_name_for_index(sorted_feature_columns): + names = [] + for column in sorted_feature_columns: + if isinstance(column, feature_column_lib._IndicatorColumn): # pylint:disable=protected-access + categorical_column = column.categorical_column + if isinstance(categorical_column, + feature_column_lib._VocabularyListCategoricalColumn): # pylint:disable=protected-access + for voc in categorical_column.vocabulary_list: + names.append('{}:{}'.format(column.name, voc)) + else: + for num in categorical_column._num_buckets: # pylint:disable=protected-access + names.append('{}:{}'.format(column.name, num)) + else: + names.append(column.name) + return names + + def _cache_transformed_features(features, sorted_feature_columns, batch_size): """Transform features and cache, then returns (cached_features, cache_op).""" num_features = _calculate_num_features(sorted_feature_columns) @@ -943,7 +960,9 @@ def _compute_feature_importance_for_tree(tree, num_features, normalize): return importances -def compute_feature_importances(tree_ensemble, num_features, normalize=True): +def compute_feature_importances(tree_ensemble, + num_features, + normalize=True): tree_importances = [_compute_feature_importance_for_tree(tree, num_features, normalize) @@ -957,8 +976,8 @@ def compute_feature_importances(tree_ensemble, num_features, normalize=True): if normalizer > 0.0: feature_importances /= normalizer - sorted_feature = np.argsort(feature_importances)[::-1] - return sorted_feature, feature_importances[sorted_feature] + sorted_feature_idx = np.argsort(feature_importances)[::-1] + return sorted_feature_idx, feature_importances[sorted_feature_idx] class _BoostedTrees(estimator.Estimator): @@ -967,15 +986,18 @@ class _BoostedTrees(estimator.Estimator): super(_BoostedTrees, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) - sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) - self._num_features = _calculate_num_features(sorted_feature_columns) + self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) def compute_feature_importances(self, normalize=True): tree_ensemble = self._read_tree_ensemble_from_checkpoint() if tree_ensemble: - return compute_feature_importances(tree_ensemble, - self._num_features, - normalize) + num_features = _calculate_num_features(self._sorted_feature_columns) + names_for_idx = np.array( + _generate_feature_name_for_index(self._sorted_feature_columns)) + idx, importances = compute_feature_importances(tree_ensemble, + num_features, + normalize) + return names_for_idx[idx], importances else: return [], [] diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 054d820527..880f0f10ba 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -165,7 +165,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - self.assertEqual(3, est._num_features) # pylint:disable=protected-access sorted_features, importances = est.compute_feature_importances() self.assertAllEqual([], sorted_features) self.assertAllEqual([], importances) @@ -568,7 +567,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): n_trees=1, max_depth=5) - self.assertEqual(3, est._num_features) # pylint:disable=protected-access # It will stop after 5 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. @@ -624,17 +622,97 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): # tree_weights: 1.0 # tree_weights: 1.0 # ...... - sorted_features_expected = [0, 2, 1] + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. [0.0, 0.0, 0.0]] # 2nd tree. sorted_features, importances = est.compute_feature_importances(normalize=False) - self.assertAllEqual(sorted_features_expected, sorted_features) + self.assertAllEqual(feature_names_expected, sorted_features) self.assertAllClose(_compute_feature_importances_np(feature_gains, False), importances) sorted_features1, importances1 = est.compute_feature_importances(normalize=True) - self.assertAllEqual(sorted_features_expected, sorted_features1) + self.assertAllEqual(feature_names_expected, sorted_features1) + self.assertAllClose(_compute_feature_importances_np(feature_gains, True), + importances1) + + def testCalculateFeatureImportancesWithIndicatorColumn(self): + categorical = feature_column.categorical_column_with_vocabulary_list( + key='categorical', vocabulary_list=('bad', 'good', 'ok')) + feature_indicator = feature_column.indicator_column(categorical) + bucketized_col = feature_column.bucketized_column( + feature_column.numeric_column( + 'an_uninformative_feature', dtype=dtypes.float32), + BUCKET_BOUNDARIES) + + labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) + # Our categorical feature defines the labels perfectly + input_fn = numpy_io.numpy_input_fn( + x={ + 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), + 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), + }, + y=labels, + batch_size=5, + shuffle=False) + + # Train depth 1 tree. + est = boosted_trees.BoostedTreesRegressor( + feature_columns=[bucketized_col, feature_indicator], + n_batches_per_layer=1, + n_trees=1, + learning_rate=1.0, + max_depth=1) + + num_steps = 1 + est.train(input_fn, steps=num_steps) + + # TreeEnsemble Proto: + # trees { + # nodes { + # bucketized_split { + # feature_id: 2 + # left_id: 1 + # right_id: 2 + # } + # metadata { + # gain: 15.5952005386 + # } + # } + # nodes { + # leaf { + # } + # } + # nodes { + # leaf { + # scalar: 5.7000002861 + # } + # } + # } + # trees { + # nodes { + # leaf { + # } + # } + # } + # tree_weights: 1.0 + # tree_weights: 1.0 + feature_names_expected = ['categorical_indicator:good', + # Reverse order because feature importances + # are sorted by np.argsort(f)[::-1] + 'categorical_indicator:ok', + 'categorical_indicator:bad', + 'an_uninformative_feature_bucketized'] + feature_gains = [[15.5952005386, 0.0, 0.0, 0.0], # 1st tree. + [0.0, 0.0, 0.0, 0.0]] # 2nd tree. + + sorted_features, importances = est.compute_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, sorted_features) + self.assertAllClose(_compute_feature_importances_np(feature_gains, False), + importances) + + sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, sorted_features1) self.assertAllClose(_compute_feature_importances_np(feature_gains, True), importances1) -- cgit v1.2.3 From 0845a01256fd3797804f247f76a1655a56c119a6 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Fri, 17 Aug 2018 11:24:21 +0800 Subject: CLN: revise code according to comments --- .../python/estimator/canned/boosted_trees.py | 81 +++++++++++++++------- .../python/estimator/canned/boosted_trees_test.py | 10 +-- 2 files changed, 62 insertions(+), 29 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index ba90b361b3..848698311c 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -201,15 +201,23 @@ def _calculate_num_features(sorted_feature_columns): return num_features -def _generate_feature_name_for_index(sorted_feature_columns): +def _generate_feature_name_mapping(sorted_feature_columns): + """Return a list of feature name for feature ids. + + Args: + sorted_feature_columns: a list/set of tf.feature_column sorted by name. + + Returns: + feature_name_mapping: a list of feature name. + """ names = [] for column in sorted_feature_columns: if isinstance(column, feature_column_lib._IndicatorColumn): # pylint:disable=protected-access categorical_column = column.categorical_column if isinstance(categorical_column, feature_column_lib._VocabularyListCategoricalColumn): # pylint:disable=protected-access - for voc in categorical_column.vocabulary_list: - names.append('{}:{}'.format(column.name, voc)) + for value in categorical_column.vocabulary_list: + names.append('{}:{}'.format(column.name, value)) else: for num in categorical_column._num_buckets: # pylint:disable=protected-access names.append('{}:{}'.format(column.name, num)) @@ -938,7 +946,8 @@ def _create_regression_head(label_dimension, weight_column=None): # pylint: enable=protected-access -def _compute_feature_importance_for_tree(tree, num_features, normalize): +def _compute_feature_importances_per_tree(tree, num_features): + """Computes the importance of each feature in the tree.""" importances = np.zeros(num_features) for node in tree.nodes: @@ -951,21 +960,29 @@ def _compute_feature_importance_for_tree(tree, num_features, normalize): else: raise ValueError('Unexpected split type %s', node_type) - if normalize: - normalizer = np.sum(importances) - if normalizer > 0.0: - # Avoid dividing by zero (e.g., when root is pure) - importances /= normalizer - return importances -def compute_feature_importances(tree_ensemble, - num_features, - normalize=True): - tree_importances = [_compute_feature_importance_for_tree(tree, - num_features, - normalize) +def _compute_feature_importances(tree_ensemble, + num_features, + normalize=True): + """Compute the feature importances. + + The higher the value, the more important the feature. + + Args: + tree_ensemble: TreeEnsemble. + num_features: The total number of feature ids. + normalize: If True, normalize the feature importances. + + Returns: + sorted_feature_idx: A list of feature_id which is sorted + by its feature importance. + feature_importances: A list of corresponding feature importance. + """ + tree_importances = [_compute_feature_importances_per_tree(tree, + num_features, + normalize) for tree in tree_ensemble.trees] tree_importances = np.array(tree_importances) tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) @@ -973,8 +990,8 @@ def compute_feature_importances(tree_ensemble, axis=0) / np.sum(tree_weights) if normalize: normalizer = np.sum(feature_importances) - if normalizer > 0.0: - feature_importances /= normalizer + assert normalizer > 0, 'Trees are all empty or root node only.' + feature_importances /= normalizer sorted_feature_idx = np.argsort(feature_importances)[::-1] return sorted_feature_idx, feature_importances[sorted_feature_idx] @@ -988,18 +1005,34 @@ class _BoostedTrees(estimator.Estimator): self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) - def compute_feature_importances(self, normalize=True): + def experimental_feature_importances(self, normalize=True): + """Compute the feature importances. + + The higher the value, the more important the corresponding feature. + + Args: + normalize: If True, normalize the feature importances. + + Returns: + sorted_feature_names: A list of feature name which is sorted + by its feature importance. + feature_importances: A list of corresponding feature importance. + + Raises: + ValueError: Empty ensemble. + """ tree_ensemble = self._read_tree_ensemble_from_checkpoint() if tree_ensemble: num_features = _calculate_num_features(self._sorted_feature_columns) names_for_idx = np.array( - _generate_feature_name_for_index(self._sorted_feature_columns)) - idx, importances = compute_feature_importances(tree_ensemble, - num_features, - normalize) + _generate_feature_name_mapping(self._sorted_feature_columns)) + idx, importances = _compute_feature_importances(tree_ensemble, + num_features, + normalize) return names_for_idx[idx], importances else: - return [], [] + raise ValueError('Found empty serialized string for TreeEnsemble.' + 'You should only call the method after training.') def _read_tree_ensemble_from_checkpoint(self): with context.graph_mode(): diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 880f0f10ba..8625c7d968 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -165,7 +165,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - sorted_features, importances = est.compute_feature_importances() + sorted_features, importances = est.experimental_feature_importances() self.assertAllEqual([], sorted_features) self.assertAllEqual([], importances) @@ -626,12 +626,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. [0.0, 0.0, 0.0]] # 2nd tree. - sorted_features, importances = est.compute_feature_importances(normalize=False) + sorted_features, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, sorted_features) self.assertAllClose(_compute_feature_importances_np(feature_gains, False), importances) - sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, sorted_features1) self.assertAllClose(_compute_feature_importances_np(feature_gains, True), importances1) @@ -706,12 +706,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_gains = [[15.5952005386, 0.0, 0.0, 0.0], # 1st tree. [0.0, 0.0, 0.0, 0.0]] # 2nd tree. - sorted_features, importances = est.compute_feature_importances(normalize=False) + sorted_features, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, sorted_features) self.assertAllClose(_compute_feature_importances_np(feature_gains, False), importances) - sorted_features1, importances1 = est.compute_feature_importances(normalize=True) + sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, sorted_features1) self.assertAllClose(_compute_feature_importances_np(feature_gains, True), importances1) -- cgit v1.2.3 From 196f5478d780b6e069290366fd4b85bb09d8141d Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Fri, 17 Aug 2018 12:22:13 +0800 Subject: CLN: use CheckpointReader to load TreeEnsemble proto --- .../python/estimator/canned/boosted_trees.py | 60 +++++----------------- 1 file changed, 14 insertions(+), 46 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 848698311c..62757ef588 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -24,8 +24,6 @@ import functools import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 -from tensorflow.python.client import session as tf_session -from tensorflow.python.eager import context from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn from tensorflow.python.estimator.canned import head as head_lib @@ -43,9 +41,8 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.losses import losses from tensorflow.python.summary import summary -from tensorflow.python.training import checkpoint_management +from tensorflow.python.training import checkpoint_utils from tensorflow.python.training import distribute as distribute_lib -from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util from tensorflow.python.util.tf_export import estimator_export @@ -61,8 +58,6 @@ _HOLD_FOR_MULTI_DIM_SUPPORT = object() _DUMMY_NUM_BUCKETS = -1 _DUMMY_NODE_ID = -1 -_BOOSTED_TREES_SERIALIZED_PROTO = '_BOOSTED_TREES_SERIALIZED_PROTO' - def _get_transformed_features(features, sorted_feature_columns): """Gets the transformed features from features/feature_columns pair. @@ -770,8 +765,6 @@ def _bt_model_fn( bucketized_features=input_feature_list, logits_dimension=head.logits_dimension) else: - _, serialized_proto = tree_ensemble.serialize() - ops.add_to_collection(_BOOSTED_TREES_SERIALIZED_PROTO, serialized_proto) if is_single_machine: local_tree_ensemble = tree_ensemble ensemble_reload = control_flow_ops.no_op() @@ -980,9 +973,7 @@ def _compute_feature_importances(tree_ensemble, by its feature importance. feature_importances: A list of corresponding feature importance. """ - tree_importances = [_compute_feature_importances_per_tree(tree, - num_features, - normalize) + tree_importances = [_compute_feature_importances_per_tree(tree, num_features) for tree in tree_ensemble.trees] tree_importances = np.array(tree_importances) tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) @@ -1021,43 +1012,20 @@ class _BoostedTrees(estimator.Estimator): Raises: ValueError: Empty ensemble. """ - tree_ensemble = self._read_tree_ensemble_from_checkpoint() - if tree_ensemble: - num_features = _calculate_num_features(self._sorted_feature_columns) - names_for_idx = np.array( - _generate_feature_name_mapping(self._sorted_feature_columns)) - idx, importances = _compute_feature_importances(tree_ensemble, - num_features, - normalize) - return names_for_idx[idx], importances - else: + reader = checkpoint_utils.load_checkpoint(self._model_dir) + serialized = reader.get_tensor('boosted_trees:0_serialized') + if not serialized: raise ValueError('Found empty serialized string for TreeEnsemble.' 'You should only call the method after training.') - - def _read_tree_ensemble_from_checkpoint(self): - with context.graph_mode(): - checkpoint_path = checkpoint_management.latest_checkpoint( - self._model_dir) - if not checkpoint_path: - raise ValueError("Couldn't find trained model at %s." % self._model_dir) - - with ops.Graph().as_default() as g: - with tf_session.Session(config=self._session_config) as session: - meta_file = checkpoint_path + '.meta' - graph_saver = saver.import_meta_graph(meta_file) - graph_saver.restore(session, checkpoint_path) - - serialized_proto = ops.get_collection(_BOOSTED_TREES_SERIALIZED_PROTO) - assert len(serialized_proto) == 1 - serialized_proto_string = session.run(serialized_proto[0]) - - if serialized_proto_string: - tree_ensemble = boosted_trees_pb2.TreeEnsemble() - tree_ensemble.ParseFromString(serialized_proto_string) - return tree_ensemble - else: - # serialized_proto_string is empty string before training. - return None + ensemble_proto = boosted_trees_pb2.TreeEnsemble() + ensemble_proto.ParseFromString(serialized) + + num_features = _calculate_num_features(self._sorted_feature_columns) + names_for_feature_id = np.array( + _generate_feature_name_mapping(self._sorted_feature_columns)) + sorted_feature_id, importances = _compute_feature_importances( + ensemble_proto, num_features, normalize) + return names_for_feature_id[sorted_feature_id], importances @estimator_export('estimator.BoostedTreesClassifier') -- cgit v1.2.3 From 7ed06809ba3aabf1d93cf726a0b9b6416d80ef85 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Fri, 17 Aug 2018 14:11:50 +0800 Subject: TST: revise test case --- .../python/estimator/canned/boosted_trees_test.py | 547 +++++++++++++++------ 1 file changed, 410 insertions(+), 137 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 8625c7d968..80d9ac7552 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -17,9 +17,13 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + +from google.protobuf import text_format import numpy as np from tensorflow.core.kernels.boosted_trees import boosted_trees_pb2 +from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.estimator import model_fn from tensorflow.python.estimator import run_config @@ -31,10 +35,12 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gen_boosted_trees_ops +from tensorflow.python.ops import boosted_trees_ops from tensorflow.python.ops import resources from tensorflow.python.ops import variables from tensorflow.python.platform import googletest from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import saver as saver_lib from tensorflow.python.training import session_run_hook NUM_FEATURES = 3 @@ -91,17 +97,6 @@ def _make_train_input_fn_dataset(is_classification, batch=None, repeat=None): return _input_fn -def _compute_feature_importances_np(feature_gains, normalize): - if normalize: - feature_gains /= np.sum(feature_gains, axis=1, keepdims=True) - feature_gains = np.nan_to_num(feature_gains) - feature_importances = np.sum(feature_gains, axis=0) / len(feature_gains) - feature_importances /= np.sum(feature_importances) - return np.nan_to_num(feature_importances) - else: - return np.sum(feature_gains, axis=0) / len(feature_gains) - - class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): def setUp(self): @@ -165,9 +160,12 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - sorted_features, importances = est.experimental_feature_importances() - self.assertAllEqual([], sorted_features) - self.assertAllEqual([], importances) + + with self.assertRaisesRegexp(ValueError, 'empty'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(ValueError, 'empty'): + est.experimental_feature_importances(normalize=True) def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -558,7 +556,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) - def testCalculateFeatureImportances(self): + def testExperimentalFeatureImportancesWithTraining(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -572,71 +570,358 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - # TreeEnsemble Proto: - # tree_ensemble: trees { - # nodes { - # bucketized_split { - # feature_id: 2 - # threshold: 2 - # left_id: 1 - # right_id: 2 - # } - # metadata { - # gain: 0.426666676998 - # } - # } - # ...... - # nodes { - # bucketized_split { - # threshold: 1 - # left_id: 5 - # right_id: 6 - # } - # metadata { - # gain: 0.133481562138 - # original_leaf { - # scalar: 0.066666662693 - # } - # } - # } - # ...... - # nodes { - # bucketized_split { - # left_id: 11 - # right_id: 12 - # } - # metadata { - # gain: 0.400360047817 - # original_leaf { - # scalar: 0.0599950700998 - # } - # } - # } - # } - # trees { - # nodes { - # leaf { - # } - # } - # } - # tree_weights: 1.0 - # tree_weights: 1.0 - # ...... feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_gains = [[0.133481562138 + 0.400360047817, 0.426666676998, 0.0], # 1st tree. - [0.0, 0.0, 0.0]] # 2nd tree. - sorted_features, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, sorted_features) - self.assertAllClose(_compute_feature_importances_np(feature_gains, False), - importances) + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.2669208, 0.21333334, 0.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.55579074, 0.44420926, 0.0], importances) + + def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text): + with ops.Graph().as_default(): + with ops.name_scope('boosted_trees') as name: + tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) + tree_ensemble_proto = boosted_trees_pb2.TreeEnsemble() + text_format.Merge(tree_ensemble_text, tree_ensemble_proto) + stamp_token, _ = tree_ensemble.serialize() + restore_op = tree_ensemble.deserialize( + stamp_token, tree_ensemble_proto.SerializeToString()) + + with session.Session() as sess: + resources.initialize_resources(resources.shared_resources()).run() + restore_op.run() + saver = saver_lib.Saver() + save_path = os.path.join(est.model_dir, 'model.ckpt') + saver.save(sess, save_path) + + def testExperimentalCalculateFeatureImportances(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 3 + right_id: 4 + } + metadata { + gain: 3.0 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 5 + right_id: 6 + } + metadata { + gain: 2.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 7 + right_id: 8 + } + metadata { + gain: 1.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 1.0 + } + } + nodes { + bucketized_split { + feature_id: 2 + left_id: 3 + right_id: 4 + } + metadata { + gain: 1.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([2.5, 1.5, 1.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + + def testExperimentalCalculateFeatureImportancesWithTreeWeights(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 12.5 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 3 + right_id: 4 + } + metadata { + gain: 5.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 5.0 + } + } + } + tree_weights: 0.4 + tree_weights: 0.6 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([5.0, 3.0, 2.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + + def testExperimentalCalculateFeatureImportancesWithEmptyTree(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 3.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 3 + right_id: 4 + } + metadata { + gain: 1.0 + } + } + } + trees { + nodes { + leaf { + scalar: 0.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([1.5, 0.5, 0.0], importances) - sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, sorted_features1) - self.assertAllClose(_compute_feature_importances_np(feature_gains, True), - importances1) + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.75, 0.25, 0.0], importances) - def testCalculateFeatureImportancesWithIndicatorColumn(self): + def testExperimentalCalculateFeatureImportancesWithAllEmptyTree(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + leaf { + scalar: 0.0 + } + } + } + trees { + nodes { + leaf { + scalar: 0.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + # Reverse order because feature importances are sorted by np.argsort(f)[::-1] + feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.0, 0.0, 0.0], importances) + + with self.assertRaisesRegexp(AssertionError, 'empty or root node'): + est.experimental_feature_importances(normalize=True) + + def testExperimentalCalculateFeatureImportancesWithMoreTrees(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=5, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 4.0 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 3 + right_id: 4 + } + metadata { + gain: 3.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 1 + left_id: 1 + right_id: 2 + } + metadata { + gain: 1.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 8.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + tree_weights: 1.0 + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([2, 1.2, 0.8], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + + def testExperimentalFeatureImportancesWithIndicatorColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) @@ -645,76 +930,64 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'an_uninformative_feature', dtype=dtypes.float32), BUCKET_BOUNDARIES) - labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) - # Our categorical feature defines the labels perfectly - input_fn = numpy_io.numpy_input_fn( - x={ - 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), - 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), - }, - y=labels, - batch_size=5, - shuffle=False) - - # Train depth 1 tree. est = boosted_trees.BoostedTreesRegressor( feature_columns=[bucketized_col, feature_indicator], n_batches_per_layer=1, - n_trees=1, + n_trees=2, learning_rate=1.0, max_depth=1) - num_steps = 1 - est.train(input_fn, steps=num_steps) + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 5.0 + } + } + nodes { + bucketized_split { + feature_id: 3 + left_id: 3 + right_id: 4 + } + metadata { + gain: 2.0 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 3.0 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - # TreeEnsemble Proto: - # trees { - # nodes { - # bucketized_split { - # feature_id: 2 - # left_id: 1 - # right_id: 2 - # } - # metadata { - # gain: 15.5952005386 - # } - # } - # nodes { - # leaf { - # } - # } - # nodes { - # leaf { - # scalar: 5.7000002861 - # } - # } - # } - # trees { - # nodes { - # leaf { - # } - # } - # } - # tree_weights: 1.0 - # tree_weights: 1.0 feature_names_expected = ['categorical_indicator:good', - # Reverse order because feature importances - # are sorted by np.argsort(f)[::-1] + 'an_uninformative_feature_bucketized', 'categorical_indicator:ok', - 'categorical_indicator:bad', - 'an_uninformative_feature_bucketized'] - feature_gains = [[15.5952005386, 0.0, 0.0, 0.0], # 1st tree. - [0.0, 0.0, 0.0, 0.0]] # 2nd tree. - - sorted_features, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, sorted_features) - self.assertAllClose(_compute_feature_importances_np(feature_gains, False), - importances) - - sorted_features1, importances1 = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, sorted_features1) - self.assertAllClose(_compute_feature_importances_np(feature_gains, True), - importances1) + 'categorical_indicator:bad'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([2.5, 1.5, 1.0, 0.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances) class ModelFnTests(test_util.TensorFlowTestCase): -- cgit v1.2.3 From 52d637e604dacd3bff836a27bd991f95966226e8 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sun, 19 Aug 2018 17:28:12 +0800 Subject: CLN: normalize is False by default --- tensorflow/python/estimator/canned/boosted_trees.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 62757ef588..c59b59b653 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -956,9 +956,7 @@ def _compute_feature_importances_per_tree(tree, num_features): return importances -def _compute_feature_importances(tree_ensemble, - num_features, - normalize=True): +def _compute_feature_importances(tree_ensemble, num_features, normalize): """Compute the feature importances. The higher the value, the more important the feature. @@ -972,6 +970,9 @@ def _compute_feature_importances(tree_ensemble, sorted_feature_idx: A list of feature_id which is sorted by its feature importance. feature_importances: A list of corresponding feature importance. + + Raises: + AssertionError: Trees are all empty or root node only when normalizing. """ tree_importances = [_compute_feature_importances_per_tree(tree, num_features) for tree in tree_ensemble.trees] @@ -996,7 +997,7 @@ class _BoostedTrees(estimator.Estimator): self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) - def experimental_feature_importances(self, normalize=True): + def experimental_feature_importances(self, normalize=False): """Compute the feature importances. The higher the value, the more important the corresponding feature. @@ -1005,9 +1006,9 @@ class _BoostedTrees(estimator.Estimator): normalize: If True, normalize the feature importances. Returns: - sorted_feature_names: A list of feature name which is sorted + sorted_feature_names: 1-D array of feature name which is sorted by its feature importance. - feature_importances: A list of corresponding feature importance. + feature_importances: 1-D array of the corresponding feature importance. Raises: ValueError: Empty ensemble. -- cgit v1.2.3 From ad18b2dd923329ef598ee12b9bafd7fc63d7013d Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Mon, 20 Aug 2018 00:41:57 +0900 Subject: Implement extract_volume_patches --- .../base_api/api_def_ExtractVolumePatches.pbtxt | 49 ++++++ tensorflow/core/kernels/BUILD | 14 ++ .../core/kernels/extract_volume_patches_op.cc | 189 +++++++++++++++++++++ .../core/kernels/extract_volume_patches_op.h | 58 +++++++ .../kernels/extract_volume_patches_op_gpu.cu.cc | 38 +++++ tensorflow/core/ops/array_ops.cc | 103 +++++++++++ tensorflow/python/kernel_tests/BUILD | 12 ++ .../kernel_tests/extract_volume_patches_op_test.py | 130 ++++++++++++++ 8 files changed, 593 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt create mode 100644 tensorflow/core/kernels/extract_volume_patches_op.cc create mode 100644 tensorflow/core/kernels/extract_volume_patches_op.h create mode 100644 tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc create mode 100644 tensorflow/python/kernel_tests/extract_volume_patches_op_test.py (limited to 'tensorflow') diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt new file mode 100644 index 0000000000..3499ade368 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt @@ -0,0 +1,49 @@ +op { + graph_op_name: "ExtractVolumePatches" + in_arg { + name: "images" + description: < +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/util/tensor_format.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + +static inline void ParseAttributeVec5(OpKernelConstruction* context, + const string& attr_name, + std::vector* attr) { + OP_REQUIRES_OK(context, context->GetAttr(attr_name, attr)); + OP_REQUIRES( + context, (*attr)[0] == 1 && (*attr)[4] == 1, + errors::Unimplemented("Only support ", attr_name, " across space.")); + OP_REQUIRES(context, (*attr)[1] >= 1 && (*attr)[2] >= 1 && (*attr)[3] >= 1, + errors::OutOfRange(attr_name, " is out of range.")); +} + +template +class ExtractVolumePatchesOp : public UnaryOp { + public: + explicit ExtractVolumePatchesOp(OpKernelConstruction* context) + : UnaryOp(context) { + ParseAttributeVec5(context, "ksizes", &ksizes_); + ParseAttributeVec5(context, "strides", &strides_); + //ParseAttributeVec5(context, "rates", &rates_); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + } + + void Compute(OpKernelContext* context) override { + // Input tensor is of the following dimensions: + // [ batch, in_planes, in_rows, in_cols, channels ] + const Tensor& input = context->input(0); + OP_REQUIRES(context, input.dims() == 5, + errors::InvalidArgument("input must be 5-dimensional", + input.shape().DebugString())); + + const int batch = input.dim_size(0); + const int in_planes = input.dim_size(1); + const int in_rows = input.dim_size(2); + const int in_cols = input.dim_size(3); + const int depth = input.dim_size(4); + + const int ksize_planes = ksizes_[1]; + const int ksize_rows = ksizes_[2]; + const int ksize_cols = ksizes_[3]; + + const int stride_planes = strides_[1]; + const int stride_rows = strides_[2]; + const int stride_cols = strides_[3]; + + /* + // In order to enable rates, uncomment the following lines and use + // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize + // calls. + + const int rate_planes = rates_[1]; + const int rate_rows = rates_[2]; + const int rate_cols = rates_[3]; + + const int ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1); + const int ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1); + const int ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1); + */ + + int64 out_planes = 0, out_rows = 0, out_cols = 0; + int64 pad_planes = 0, pad_rows = 0, pad_cols = 0; + OP_REQUIRES_OK(context, + GetWindowedOutputSize(in_planes, ksize_planes, stride_planes, + padding_, &out_planes, &pad_planes)); + OP_REQUIRES_OK(context, + GetWindowedOutputSize(in_rows, ksize_rows, stride_rows, + padding_, &out_rows, &pad_rows)); + OP_REQUIRES_OK(context, + GetWindowedOutputSize(in_cols, ksize_cols, stride_cols, + padding_, &out_cols, &pad_cols)); + + const std::vector out_sizes = {batch, out_planes, out_rows, out_cols, + ksize_planes * ksize_rows * ksize_cols * depth}; + TensorShape out_shape(out_sizes); + + Tensor* output = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output)); + + // If there is nothing to compute, return. + if (out_shape.num_elements() == 0) { + return; + } + + functor::ExtractVolumePatchesForward()( + context->eigen_device(), input.tensor(), + ksize_planes, ksize_rows, ksize_cols, + stride_planes, stride_rows, stride_cols, + /* rate_planes, rate_rows, rate_cols, */ + BrainPadding2EigenPadding(padding_), output->tensor()); + } + + private: + std::vector ksizes_; + std::vector strides_; + // std::vector rates_; + + Padding padding_; + + TF_DISALLOW_COPY_AND_ASSIGN(ExtractVolumePatchesOp); +}; + +// Registration of the CPU implementations. +#define REGISTER(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ExtractVolumePatches").Device(DEVICE_CPU).TypeConstraint("T"), \ + ExtractVolumePatchesOp); + +TF_CALL_REAL_NUMBER_TYPES(REGISTER); + +#undef REGISTER + +#if GOOGLE_CUDA + +// Forward declarations of the functor specializations for GPU. +namespace functor { + +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void ExtractVolumePatchesForward::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor input, \ + int patch_planes, int patch_rows, int patch_cols, \ + int stride_planes, int stride_rows, int stride_cols, \ + /* int rate_planes, int rate_rows, int rate_cols, */ \ + const Eigen::PaddingType& padding, \ + typename TTypes::Tensor output); \ + extern template struct ExtractVolumePatchesForward; + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); + +#undef DECLARE_GPU_SPEC + +} // namespace functor + +// Registration of the GPU implementations. +#define REGISTER(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("ExtractVolumePatches").Device(DEVICE_GPU).TypeConstraint("T"), \ + ExtractVolumePatchesOp); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER); + +#undef REGISTER + +#endif // GOOGLE_CUDA + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/extract_volume_patches_op.h b/tensorflow/core/kernels/extract_volume_patches_op.h new file mode 100644 index 0000000000..e2418334ac --- /dev/null +++ b/tensorflow/core/kernels/extract_volume_patches_op.h @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ +#define TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/eigen_volume_patch.h" + +namespace tensorflow { +namespace functor { + +template +struct ExtractVolumePatchesForward { + void operator()(const Device& d, typename TTypes::ConstTensor input, + int patch_planes, int patch_rows, int patch_cols, + int stride_planes, int stride_rows, int stride_cols, + /* int rate_planes, int rate_rows, int rate_cols, */ + const Eigen::PaddingType& padding, + typename TTypes::Tensor output) { + const int64 N = std::max(input.size(), output.size()); + if (N <= std::numeric_limits::max()) { + auto output_32bit = To32Bit(output); + output_32bit.device(d) = + To32Bit(input) + .extract_volume_patches(patch_cols, patch_rows, patch_planes, + stride_cols, stride_rows, stride_planes, + padding) + .reshape(output_32bit.dimensions()); + } else { + output.device(d) = + input + .extract_volume_patches(patch_cols, patch_rows, patch_planes, + stride_cols, stride_rows, stride_planes, + padding) + .reshape(output.dimensions()); + } + } +}; + +} // namespace functor +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_EXTRACT_VOLUME_PATCHES_OP_H_ diff --git a/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc new file mode 100644 index 0000000000..08b3386c13 --- /dev/null +++ b/tensorflow/core/kernels/extract_volume_patches_op_gpu.cu.cc @@ -0,0 +1,38 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/extract_volume_patches_op.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +namespace functor { + +#define REGISTER(T) template struct ExtractVolumePatchesForward; + +TF_CALL_GPU_NUMBER_TYPES(REGISTER); + +#undef REGISTER + +} // end namespace functor +} // end namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index ef8ad7972c..48d8327a9e 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2549,6 +2549,109 @@ REGISTER_OP("ExtractImagePatches") // -------------------------------------------------------------------------- +// To enable rates, uncomment all lines commented below and use ksize_*_eff +// as the second parameter of all GetWindowedOutputSizeVerbose calls instead +// of ksize_*. +REGISTER_OP("ExtractVolumePatches") + .Input("images: T") + .Output("patches: T") + .Attr("ksizes: list(int) >= 5") + .Attr("strides: list(int) >= 5") + /* .Attr("rates: list(int) >= 5") */ + .Attr("T: realnumbertype") + .Attr(GetPaddingAttrString()) + .SetShapeFn([](InferenceContext* c) { + ShapeHandle input_shape; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 5, &input_shape)); + + std::vector ksizes; + TF_RETURN_IF_ERROR(c->GetAttr("ksizes", &ksizes)); + if (ksizes.size() != 5) { + return errors::InvalidArgument( + "ExtractVolumePatches requires the ksizes attribute to contain 5 " + "values, but got: ", + ksizes.size()); + } + + std::vector strides; + TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides)); + if (strides.size() != 5) { + return errors::InvalidArgument( + "ExtractVolumePatches requires the stride attribute to contain 5 " + "values, but got: ", + strides.size()); + } + + /* + std::vector rates; + TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates)); + if (rates.size() != 5) { + return errors::InvalidArgument( + "ExtractVolumePatches requires the rates attribute to contain 5 " + "values, but got: ", + rates.size()); + } + */ + + int32 ksize_planes = ksizes[1]; + int32 ksize_rows = ksizes[2]; + int32 ksize_cols = ksizes[3]; + + int32 stride_planes = strides[1]; + int32 stride_rows = strides[2]; + int32 stride_cols = strides[3]; + + /* + int32 rate_planes = rates[1]; + int32 rate_rows = rates[2]; + int32 rate_cols = rates[3]; + + int32 ksize_planes_eff = ksize_planes + (ksize_planes - 1) * (rate_planes - 1); + int32 ksize_rows_eff = ksize_rows + (ksize_rows - 1) * (rate_rows - 1); + int32 ksize_cols_eff = ksize_cols + (ksize_cols - 1) * (rate_cols - 1); + */ + + DimensionHandle batch_size_dim = c->Dim(input_shape, 0); + DimensionHandle in_planes_dim = c->Dim(input_shape, 1); + DimensionHandle in_rows_dim = c->Dim(input_shape, 2); + DimensionHandle in_cols_dim = c->Dim(input_shape, 3); + DimensionHandle output_depth_dim; + TF_RETURN_IF_ERROR(c->Multiply( + c->Dim(input_shape, 4), ksize_planes * ksize_rows * ksize_cols, &output_depth_dim)); + + if (!c->ValueKnown(in_planes_dim) || !c->ValueKnown(in_rows_dim) || !c->ValueKnown(in_cols_dim)) { + ShapeHandle output_shape = + c->MakeShape({batch_size_dim, InferenceContext::kUnknownDim, + InferenceContext::kUnknownDim, output_depth_dim}); + c->set_output(0, output_shape); + return Status::OK(); + } + auto in_planes = c->Value(in_planes_dim); + auto in_rows = c->Value(in_rows_dim); + auto in_cols = c->Value(in_cols_dim); + + Padding padding; + TF_RETURN_IF_ERROR(c->GetAttr("padding", &padding)); + + int64 output_planes, output_rows, output_cols; + int64 padding_before, padding_after; + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( + in_planes, ksize_planes, stride_planes, padding, &output_planes, + &padding_before, &padding_after)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( + in_rows, ksize_rows, stride_rows, padding, &output_rows, + &padding_before, &padding_after)); + TF_RETURN_IF_ERROR(GetWindowedOutputSizeVerbose( + in_cols, ksize_cols, stride_cols, padding, &output_cols, + &padding_before, &padding_after)); + ShapeHandle output_shape = c->MakeShape( + {batch_size_dim, output_planes, output_rows, output_cols, output_depth_dim}); + c->set_output(0, output_shape); + return Status::OK(); + }); + +// -------------------------------------------------------------------------- + REGISTER_OP("Bitcast") .Input("input: T") .Output("output: type") diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 2451dc7257..bb896085f2 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1582,6 +1582,18 @@ cuda_py_test( ], ) +cuda_py_test( + name = "extract_volume_patches_op_test", + size = "small", + srcs = ["extract_volume_patches_op_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + ], +) + cuda_py_test( name = "functional_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py new file mode 100644 index 0000000000..215474f6db --- /dev/null +++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py @@ -0,0 +1,130 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for ExtractVolumePatches op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + +class ExtractVolumePatches(test.TestCase): + """Functional tests for ExtractVolumePatches op.""" + + def _VerifyValues(self, image, ksizes, strides, padding, patches): + """Tests input-output pairs for the ExtractVolumePatches op. + + Args: + image: Input tensor with shape: + [batch, in_planes, in_rows, in_cols, depth]. + ksizes: Patch size specified as: [ksize_planes, ksize_rows, ksize_cols]. + strides: Output strides, specified as: + [stride_planes, stride_rows, stride_cols]. + padding: Padding type. + patches: Expected output. + + Note: + rates are not supported as of now. + """ + ksizes = [1] + ksizes + [1] + strides = [1] + strides + [1] + + with self.test_session(use_gpu=True): + out_tensor = array_ops.extract_volume_patches( + constant_op.constant(image), + ksizes=ksizes, + strides=strides, + padding=padding, + name="im2col_3d") + self.assertAllClose(patches, out_tensor.eval()) + + def testKsize1x1x1Stride1x1x1(self): + """Verifies that for 1x1x1 kernel the output equals the input.""" + image = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6]) + 1 + patches = image + for padding in ["VALID", "SAME"]: + self._VerifyValues( + image, + ksizes=[1, 1, 1], + strides=[1, 1, 1], + padding=padding, + patches=patches) + + def testKsize1x1x1Stride2x3x4(self): + """Test for 1x1x1 kernel and strides.""" + image = np.arange(6 * 2 * 4 * 5 * 3).reshape([6, 2, 4, 5, 3]) + 1 + patches = image[:, ::2, ::3, ::4, :] + for padding in ["VALID", "SAME"]: + self._VerifyValues( + image, + ksizes=[1, 1, 1], + strides=[2, 3, 4], + padding=padding, + patches=patches) + + def testKsize1x1x2Stride2x2x3(self): + """Test for 1x1x2 kernel and strides.""" + image = np.arange(45).reshape([1, 3, 3, 5, 1]) + 1 + patches = np.array([[[[[ 1, 2], + [ 4, 5]], + [[11, 12], + [14, 15]]], + [[[31, 32], + [34, 35]], + [[41, 42], + [44, 45]]]]]) + for padding in ["VALID", "SAME"]: + self._VerifyValues( + image, + ksizes=[1, 1, 2], + strides=[2, 2, 3], + padding=padding, + patches=patches) + + def testKsize2x2x2Stride1x1x1Valid(self): + """Test for 2x2x2 kernel with VALID padding.""" + image = np.arange(8).reshape([1, 2, 2, 2, 1]) + 1 + patches = np.array([[[[[1, 2, 3, 4, 5, 6, 7, 8]]]]]) + self._VerifyValues( + image, + ksizes=[2, 2, 2], + strides=[1, 1, 1], + padding="VALID", + patches=patches) + + def testKsize2x2x2Stride1x1x1Same(self): + """Test for 2x2x2 kernel with SAME padding.""" + image = np.arange(8).reshape([1, 2, 2, 2, 1]) + 1 + patches = np.array([[[[[1, 2, 3, 4, 5, 6, 7, 8], + [2, 0, 4, 0, 6, 0, 8, 0]], + [[3, 4, 0, 0, 7, 8, 0, 0], + [4, 0, 0, 0, 8, 0, 0, 0]]], + [[[5, 6, 7, 8, 0, 0, 0, 0], + [6, 0, 8, 0, 0, 0, 0, 0]], + [[7, 8, 0, 0, 0, 0, 0, 0], + [8, 0, 0, 0, 0, 0, 0, 0]]]]]) + self._VerifyValues( + image, + ksizes=[2, 2, 2], + strides=[1, 1, 1], + padding="SAME", + patches=patches) + +if __name__ == "__main__": + test.main() -- cgit v1.2.3 From 5630efcca924563b549a788b4b5ec93fea91e559 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 21 Aug 2018 13:06:02 +0800 Subject: CLN: revise according to comments --- .../estimator/python/estimator/boosted_trees.py | 5 +-- .../python/estimator/canned/boosted_trees.py | 19 ++++++----- .../python/estimator/canned/boosted_trees_test.py | 37 ++++++++++++++++------ 3 files changed, 42 insertions(+), 19 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index 7ed77bcce6..e6bdc97fe5 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -33,7 +33,7 @@ def _validate_input_fn_and_repeat_dataset(train_input_fn): return _input_fn -class _BoostedTreesEstimator(estimator.Estimator): +class _BoostedTreesEstimator(canned_boosted_trees._BoostedTrees): # pylint: disable=protected-access """An Estimator for Tensorflow Boosted Trees models.""" def __init__(self, @@ -115,7 +115,8 @@ class _BoostedTreesEstimator(estimator.Estimator): config=config) super(_BoostedTreesEstimator, self).__init__( - model_fn=_model_fn, model_dir=model_dir, config=config) + model_fn=_model_fn, model_dir=model_dir, config=config, + feature_columns=feature_columns) # pylint:enable=protected-access diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index c59b59b653..d051399b52 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -203,7 +203,7 @@ def _generate_feature_name_mapping(sorted_feature_columns): sorted_feature_columns: a list/set of tf.feature_column sorted by name. Returns: - feature_name_mapping: a list of feature name. + feature_name_mapping: a list of feature names indexed by the feature ids. """ names = [] for column in sorted_feature_columns: @@ -962,17 +962,19 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): The higher the value, the more important the feature. Args: - tree_ensemble: TreeEnsemble. + tree_ensemble: a trained tree ensemble, instance of proto + boosted_trees.TreeEnsemble. num_features: The total number of feature ids. normalize: If True, normalize the feature importances. Returns: sorted_feature_idx: A list of feature_id which is sorted by its feature importance. - feature_importances: A list of corresponding feature importance. + feature_importances: A list of corresponding feature importances. Raises: - AssertionError: Trees are all empty or root node only when normalizing. + AssertionError: If normalize = True and normalization is not possible + (e.g. ensemble is empty or trees contain only a root node). """ tree_importances = [_compute_feature_importances_per_tree(tree, num_features) for tree in tree_ensemble.trees] @@ -982,7 +984,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): axis=0) / np.sum(tree_weights) if normalize: normalizer = np.sum(feature_importances) - assert normalizer > 0, 'Trees are all empty or root node only.' + assert normalizer > 0, 'Trees are all empty or contains only a root node.' feature_importances /= normalizer sorted_feature_idx = np.argsort(feature_importances)[::-1] @@ -990,15 +992,17 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): class _BoostedTrees(estimator.Estimator): + """Base class for boosted trees estimators.""" def __init__(self, model_fn, model_dir, config, feature_columns): super(_BoostedTrees, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) + self._num_features = _calculate_num_features(self._sorted_feature_columns) def experimental_feature_importances(self, normalize=False): - """Compute the feature importances. + """Computes gain-based feature importances. The higher the value, the more important the corresponding feature. @@ -1021,11 +1025,10 @@ class _BoostedTrees(estimator.Estimator): ensemble_proto = boosted_trees_pb2.TreeEnsemble() ensemble_proto.ParseFromString(serialized) - num_features = _calculate_num_features(self._sorted_feature_columns) names_for_feature_id = np.array( _generate_feature_name_mapping(self._sorted_feature_columns)) sorted_feature_id, importances = _compute_feature_importances( - ensemble_proto, num_features, normalize) + ensemble_proto, self._num_features, normalize) return names_for_feature_id[sorted_feature_id], importances diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 80d9ac7552..c764831279 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -161,12 +161,6 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllClose([[0], [0], [0], [0], [0]], [pred['class_ids'] for pred in predictions]) - with self.assertRaisesRegexp(ValueError, 'empty'): - est.experimental_feature_importances(normalize=False) - - with self.assertRaisesRegexp(ValueError, 'empty'): - est.experimental_feature_importances(normalize=True) - def testTrainAndEvaluateBinaryClassifier(self): input_fn = _make_train_input_fn(is_classification=True) @@ -556,7 +550,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) - def testExperimentalFeatureImportancesWithTraining(self): + def testExperimentalFeatureImportancesWithTrainedEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -580,6 +574,31 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.55579074, 0.44420926, 0.0], importances) + def testFeatureImportancesOnEmtpyEnsemble(self): + input_fn = _make_train_input_fn(is_classification=True) + + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + class BailOutWithoutTraining(session_run_hook.SessionRunHook): + + def before_run(self, run_context): + raise StopIteration('to bail out.') + + # The step-0 checkpoint will have only an empty ensemble. + est.train(input_fn, + steps=100, # must stop at 0 anyway. + hooks=[BailOutWithoutTraining()]) + + with self.assertRaisesRegexp(ValueError, 'empty serialized string'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(ValueError, 'empty serialized string'): + est.experimental_feature_importances(normalize=True) + def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text): with ops.Graph().as_default(): with ops.name_scope('boosted_trees') as name: @@ -823,7 +842,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.0, 0.0, 0.0], importances) - with self.assertRaisesRegexp(AssertionError, 'empty or root node'): + with self.assertRaisesRegexp(AssertionError, 'empty or contains'): est.experimental_feature_importances(normalize=True) def testExperimentalCalculateFeatureImportancesWithMoreTrees(self): @@ -921,7 +940,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testExperimentalFeatureImportancesWithIndicatorColumn(self): + def TestFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) -- cgit v1.2.3 From e39bbe4947801c10c41e96fe4cbbb77817136e1d Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 21 Aug 2018 13:52:38 +0800 Subject: TST: add test case for negative feature importances --- .../python/estimator/canned/boosted_trees.py | 5 ++- .../python/estimator/canned/boosted_trees_test.py | 52 +++++++++++++++++++--- 2 files changed, 50 insertions(+), 7 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index d051399b52..85bc934a0e 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -973,7 +973,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): feature_importances: A list of corresponding feature importances. Raises: - AssertionError: If normalize = True and normalization is not possible + AssertionError: If feature importances contain negative value. + Or if normalize = True and normalization is not possible (e.g. ensemble is empty or trees contain only a root node). """ tree_importances = [_compute_feature_importances_per_tree(tree, num_features) @@ -982,6 +983,8 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) feature_importances = np.sum(tree_importances * tree_weights, axis=0) / np.sum(tree_weights) + assert np.all(feature_importances >= 0), ('feature_importances ' + 'must be non-negative.') if normalize: normalizer = np.sum(feature_importances) assert normalizer > 0, 'Trees are all empty or contains only a root node.' diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index c764831279..9362b927e2 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -550,7 +550,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertEqual(1, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold) - def testExperimentalFeatureImportancesWithTrainedEnsemble(self): + def testFeatureImportancesWithTrainedEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -616,7 +616,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): save_path = os.path.join(est.model_dir, 'model.ckpt') saver.save(sess, save_path) - def testExperimentalCalculateFeatureImportances(self): + def testFeatureImportances(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -702,7 +702,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testExperimentalCalculateFeatureImportancesWithTreeWeights(self): + def testFeatureImportancesWithTreeWeights(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -758,7 +758,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testExperimentalCalculateFeatureImportancesWithEmptyTree(self): + def testFeatureImportancesWithEmptyTree(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -809,7 +809,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.75, 0.25, 0.0], importances) - def testExperimentalCalculateFeatureImportancesWithAllEmptyTree(self): + def testFeatureImportancesWithAllEmptyTree(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -845,7 +845,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(AssertionError, 'empty or contains'): est.experimental_feature_importances(normalize=True) - def testExperimentalCalculateFeatureImportancesWithMoreTrees(self): + def testFeatureImportancesWithMoreTrees(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -1008,6 +1008,46 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances) + def testNegativeFeatureImportances(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 1 + left_id: 1 + right_id: 2 + } + metadata { + gain: -5.0 + } + } + nodes { + bucketized_split { + feature_id: 2 + left_id: 3 + right_id: 4 + } + metadata { + gain: 2.0 + } + } + } + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=True) + class ModelFnTests(test_util.TensorFlowTestCase): """Tests bt_model_fn including unexposed internal functionalities.""" -- cgit v1.2.3 From 88d722c13418fd177c3e03e954307fdfa86a474b Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 21 Aug 2018 14:07:55 +0800 Subject: ENH: don't divide by the sum of tree weights --- tensorflow/python/estimator/canned/boosted_trees.py | 3 +-- tensorflow/python/estimator/canned/boosted_trees_test.py | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 85bc934a0e..2f5e46b559 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -981,8 +981,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): for tree in tree_ensemble.trees] tree_importances = np.array(tree_importances) tree_weights = np.array(tree_ensemble.tree_weights).reshape(-1, 1) - feature_importances = np.sum(tree_importances * tree_weights, - axis=0) / np.sum(tree_weights) + feature_importances = np.sum(tree_importances * tree_weights, axis=0) assert np.all(feature_importances >= 0), ('feature_importances ' 'must be non-negative.') if normalize: diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 9362b927e2..54ad052915 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -556,7 +556,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, - n_trees=1, + n_trees=2, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. @@ -568,11 +568,11 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.2669208, 0.21333334, 0.0], importances) + self.assertAllClose([0.833933, 0.606342, 0.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.55579074, 0.44420926, 0.0], importances) + self.assertAllClose([0.579010, 0.420990, 0.0], importances) def testFeatureImportancesOnEmtpyEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) @@ -696,7 +696,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([2.5, 1.5, 1.0], importances) + self.assertAllClose([5.0, 3.0, 2.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) @@ -803,7 +803,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([1.5, 0.5, 0.0], importances) + self.assertAllClose([3.0, 1.0, 0.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) @@ -934,7 +934,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([2, 1.2, 0.8], importances) + self.assertAllClose([10, 6.0, 4.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) @@ -1002,7 +1002,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'categorical_indicator:bad'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([2.5, 1.5, 1.0, 0.0], importances) + self.assertAllClose([5.0, 3.0, 2.0, 0.0], importances) feature_names, importances = est.experimental_feature_importances(normalize=True) self.assertAllEqual(feature_names_expected, feature_names) -- cgit v1.2.3 From 73c8cbb413029cf3e540e99b883ae89f4b08fc11 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 21 Aug 2018 14:18:27 +0800 Subject: TST: add test case for full tree with leaves --- .../python/estimator/canned/boosted_trees_test.py | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 54ad052915..13e1d224bc 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -845,6 +845,117 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(AssertionError, 'empty or contains'): est.experimental_feature_importances(normalize=True) + def testFeatureImportancesWithFullTrees(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=2, + max_depth=5) + + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 2 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + nodes { + bucketized_split { + feature_id: 0 + left_id: 3 + right_id: 4 + } + metadata { + gain: 3.0 + } + } + nodes { + bucketized_split { + feature_id: 1 + left_id: 5 + right_id: 6 + } + metadata { + gain: 2.0 + } + } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } + nodes { + leaf { + scalar: 0.0 + } + } + nodes { + leaf { + scalar: 3.34 + } + } + } + trees { + nodes { + bucketized_split { + feature_id: 0 + left_id: 1 + right_id: 2 + } + metadata { + gain: 2.0 + } + } + nodes { + leaf { + scalar: -0.88 + } + } + nodes { + bucketized_split { + feature_id: 2 + left_id: 3 + right_id: 4 + } + metadata { + gain: 1.0 + } + } + nodes { + leaf { + scalar: 1.88 + } + } + nodes { + leaf { + scalar: -2.88 + } + } + } + tree_weights: 1.0 + tree_weights: 1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances(normalize=False) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([5.0, 3.0, 2.0], importances) + + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) + def testFeatureImportancesWithMoreTrees(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, -- cgit v1.2.3 From 4979d7314dd1f1788751781b2dfbfb9e47c8e20e Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Wed, 22 Aug 2018 11:34:50 +0800 Subject: CLN: revise codes --- .../python/estimator/canned/boosted_trees.py | 18 +- .../python/estimator/canned/boosted_trees_test.py | 338 ++++++--------------- 2 files changed, 101 insertions(+), 255 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index 2f5e46b559..b1d5d60fb0 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -957,7 +957,7 @@ def _compute_feature_importances_per_tree(tree, num_features): def _compute_feature_importances(tree_ensemble, num_features, normalize): - """Compute the feature importances. + """Computes gain-based feature importances. The higher the value, the more important the feature. @@ -986,7 +986,7 @@ def _compute_feature_importances(tree_ensemble, num_features, normalize): 'must be non-negative.') if normalize: normalizer = np.sum(feature_importances) - assert normalizer > 0, 'Trees are all empty or contains only a root node.' + assert normalizer > 0, 'Trees are all empty or contain only a root node.' feature_importances /= normalizer sorted_feature_idx = np.argsort(feature_importances)[::-1] @@ -1000,8 +1000,11 @@ class _BoostedTrees(estimator.Estimator): super(_BoostedTrees, self).__init__( model_fn=model_fn, model_dir=model_dir, config=config) - self._sorted_feature_columns = sorted(feature_columns, key=lambda tc: tc.name) + self._sorted_feature_columns = sorted(feature_columns, + key=lambda tc: tc.name) self._num_features = _calculate_num_features(self._sorted_feature_columns) + self._names_for_feature_id = np.array( + _generate_feature_name_mapping(self._sorted_feature_columns)) def experimental_feature_importances(self, normalize=False): """Computes gain-based feature importances. @@ -1017,21 +1020,20 @@ class _BoostedTrees(estimator.Estimator): feature_importances: 1-D array of the corresponding feature importance. Raises: - ValueError: Empty ensemble. + ValueError: When attempting to normalize on an empty ensemble + or an ensemble of trees which have no splits. """ reader = checkpoint_utils.load_checkpoint(self._model_dir) serialized = reader.get_tensor('boosted_trees:0_serialized') if not serialized: raise ValueError('Found empty serialized string for TreeEnsemble.' - 'You should only call the method after training.') + 'You should only call this method after training.') ensemble_proto = boosted_trees_pb2.TreeEnsemble() ensemble_proto.ParseFromString(serialized) - names_for_feature_id = np.array( - _generate_feature_name_mapping(self._sorted_feature_columns)) sorted_feature_id, importances = _compute_feature_importances( ensemble_proto, self._num_features, normalize) - return names_for_feature_id[sorted_feature_id], importances + return self._names_for_feature_id[sorted_feature_id], importances @estimator_export('estimator.BoostedTreesClassifier') diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 13e1d224bc..24d3a3501e 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -574,7 +574,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.579010, 0.420990, 0.0], importances) - def testFeatureImportancesOnEmtpyEnsemble(self): + def testFeatureImportancesOnEmptyEnsemble(self): input_fn = _make_train_input_fn(is_classification=True) est = boosted_trees.BoostedTreesClassifier( @@ -616,7 +616,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): save_path = os.path.join(est.model_dir, 'model.ckpt') saver.save(sess, save_path) - def testFeatureImportances(self): + def testFeatureImportancesOnNonEmptyEnsemble(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, @@ -656,130 +656,60 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): } } nodes { - bucketized_split { - feature_id: 0 - left_id: 7 - right_id: 8 - } - metadata { - gain: 1.0 + leaf { + scalar: -0.34 } } - } - trees { nodes { - bucketized_split { - feature_id: 0 - left_id: 1 - right_id: 2 - } - metadata { - gain: 1.0 + leaf { + scalar: 1.34 } } nodes { - bucketized_split { - feature_id: 2 - left_id: 3 - right_id: 4 - } - metadata { - gain: 1.0 + leaf { + scalar: 0.0 } } - } - tree_weights: 1.0 - tree_weights: 1.0 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([5.0, 3.0, 2.0], importances) - - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2], importances) - - def testFeatureImportancesWithTreeWeights(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - max_depth=5) - - tree_ensemble_text = """ - trees { nodes { bucketized_split { feature_id: 0 - left_id: 1 - right_id: 2 + left_id: 7 + right_id: 8 } metadata { - gain: 12.5 + gain: 1.0 } } nodes { - bucketized_split { - feature_id: 1 - left_id: 3 - right_id: 4 + leaf { + scalar: 3.34 } - metadata { - gain: 5.0 + } + nodes { + leaf { + scalar: 1.34 } } } trees { nodes { bucketized_split { - feature_id: 2 + feature_id: 0 left_id: 1 right_id: 2 } metadata { - gain: 5.0 + gain: 1.0 } } - } - tree_weights: 0.4 - tree_weights: 0.6 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([5.0, 3.0, 2.0], importances) - - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2], importances) - - def testFeatureImportancesWithEmptyTree(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - max_depth=5) - - tree_ensemble_text = """ - trees { nodes { - bucketized_split { - feature_id: 2 - left_id: 1 - right_id: 2 - } - metadata { - gain: 3.0 + leaf { + scalar: 3.34 } } nodes { bucketized_split { - feature_id: 0 + feature_id: 2 left_id: 3 right_id: 4 } @@ -787,47 +717,14 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): gain: 1.0 } } - } - trees { - nodes { - leaf { - scalar: 0.0 - } - } - } - tree_weights: 1.0 - tree_weights: 1.0 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_2_bucketized', 'f_0_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([3.0, 1.0, 0.0], importances) - - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.75, 0.25, 0.0], importances) - - def testFeatureImportancesWithAllEmptyTree(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=2, - max_depth=5) - - tree_ensemble_text = """ - trees { nodes { leaf { - scalar: 0.0 + scalar: 3.34 } } - } - trees { nodes { leaf { - scalar: 0.0 + scalar: 1.34 } } } @@ -836,52 +733,42 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - # Reverse order because feature importances are sorted by np.argsort(f)[::-1] - feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] + feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.0, 0.0, 0.0], importances) + self.assertAllClose([5.0, 3.0, 2.0], importances) - with self.assertRaisesRegexp(AssertionError, 'empty or contains'): - est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances(normalize=True) + self.assertAllEqual(feature_names_expected, feature_names) + self.assertAllClose([0.5, 0.3, 0.2], importances) - def testFeatureImportancesWithFullTrees(self): + def testFeatureImportancesWithTreeWeights(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, - n_trees=2, + n_trees=3, max_depth=5) tree_ensemble_text = """ trees { nodes { bucketized_split { - feature_id: 2 + feature_id: 0 left_id: 1 right_id: 2 } metadata { - gain: 2.0 + gain: 12.5 } } nodes { bucketized_split { - feature_id: 0 + feature_id: 1 left_id: 3 right_id: 4 } metadata { - gain: 3.0 - } - } - nodes { - bucketized_split { - feature_id: 1 - left_id: 5 - right_id: 6 - } - metadata { - gain: 2.0 + gain: 5.0 } } nodes { @@ -899,50 +786,38 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): scalar: 0.0 } } - nodes { - leaf { - scalar: 3.34 - } - } } trees { nodes { bucketized_split { - feature_id: 0 + feature_id: 2 left_id: 1 right_id: 2 } metadata { - gain: 2.0 + gain: 5.0 } } nodes { leaf { - scalar: -0.88 - } - } - nodes { - bucketized_split { - feature_id: 2 - left_id: 3 - right_id: 4 - } - metadata { - gain: 1.0 + scalar: -0.34 } } nodes { leaf { - scalar: 1.88 + scalar: 1.34 } } + } + trees { nodes { leaf { - scalar: -2.88 + scalar: 0.0 } } } - tree_weights: 1.0 + tree_weights: 0.4 + tree_weights: 0.6 tree_weights: 1.0 """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) @@ -956,100 +831,42 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) - def testFeatureImportancesWithMoreTrees(self): + def testFeatureImportancesWithAllEmptyTree(self): est = boosted_trees.BoostedTreesClassifier( feature_columns=self._feature_columns, n_batches_per_layer=1, - n_trees=5, + n_trees=2, max_depth=5) tree_ensemble_text = """ trees { nodes { - bucketized_split { - feature_id: 2 - left_id: 1 - right_id: 2 - } - metadata { - gain: 4.0 - } - } - nodes { - bucketized_split { - feature_id: 1 - left_id: 3 - right_id: 4 - } - metadata { - gain: 3.0 - } - } - } - trees { - nodes { - bucketized_split { - feature_id: 2 - left_id: 1 - right_id: 2 - } - metadata { - gain: 2.0 - } - } - } - trees { - nodes { - bucketized_split { - feature_id: 1 - left_id: 1 - right_id: 2 - } - metadata { - gain: 1.0 - } - } - } - trees { - nodes { - bucketized_split { - feature_id: 0 - left_id: 1 - right_id: 2 - } - metadata { - gain: 8.0 + leaf { + scalar: 0.0 } } } trees { nodes { - bucketized_split { - feature_id: 0 - left_id: 1 - right_id: 2 - } - metadata { - gain: 2.0 + leaf { + scalar: 0.0 } } } tree_weights: 1.0 tree_weights: 1.0 - tree_weights: 1.0 - tree_weights: 1.0 - tree_weights: 1.0 """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + # Reverse order because feature importances are sorted by np.argsort(f)[::-1] + feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] feature_names, importances = est.experimental_feature_importances(normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([10, 6.0, 4.0], importances) + self.assertAllClose([0.0, 0.0, 0.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) - self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2], importances) + with self.assertRaisesRegexp(AssertionError, + 'all empty or contain only a root node'): + est.experimental_feature_importances(normalize=True) def TestFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( @@ -1089,6 +906,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): gain: 2.0 } } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } + nodes { + leaf { + scalar: 0.0 + } + } } trees { nodes { @@ -1101,6 +933,16 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): gain: 3.0 } } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } } tree_weights: 1.0 tree_weights: 1.0 @@ -1126,6 +968,8 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): n_trees=1, max_depth=5) + # In order to generate a negative feature importances, + # We assign an invalid value -1 to tree_weights here. tree_ensemble_text = """ trees { nodes { @@ -1135,21 +979,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): right_id: 2 } metadata { - gain: -5.0 + gain: 5.0 } } nodes { - bucketized_split { - feature_id: 2 - left_id: 3 - right_id: 4 + leaf { + scalar: -0.34 } - metadata { - gain: 2.0 + } + nodes { + leaf { + scalar: 1.34 } } } - tree_weights: 1.0 + tree_weights: -1.0 """ self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) -- cgit v1.2.3 From 56ea7fc45559f372315b2aedd0a2df15113f5f93 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Wed, 22 Aug 2018 17:51:17 +0800 Subject: ENH: div_no_nan supports to treate negative as zero --- tensorflow/python/ops/math_ops.py | 5 ++++- tensorflow/python/ops/math_ops_test.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 67ea534639..a693b1ebac 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1039,13 +1039,14 @@ def div(x, y, name=None): @tf_export("div_no_nan") -def div_no_nan(x, y, name=None): +def div_no_nan(x, y, name=None, negative_to_zero=False): """Computes an unsafe divide which returns 0 if the y is zero. Args: x: A `Tensor`. Must be one of the following types: `float32`, `float64`. y: A `Tensor` whose dtype is compatible with `x`. name: A name for the operation (optional). + negative_to_zero: If `True`, negative is treated as zero in denominator. Returns: The element-wise value of the x divided by y. """ @@ -1058,6 +1059,8 @@ def div_no_nan(x, y, name=None): if x_dtype != y_dtype: raise TypeError("x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype)) + if negative_to_zero: + y = gen_math_ops.maximum(y, 0, name='negative_to_zero') return gen_math_ops.div_no_nan(x, y, name=name) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 6bd41020c5..6e1e5f37c8 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -487,6 +487,19 @@ class DivNoNanTest(test_util.TensorFlowTestCase): tf_result = math_ops.div_no_nan(nums, divs).eval() self.assertAllEqual(tf_result, np_result) + def testNegativeToZero(self): + for dtype in [np.float32, np.float64]: + nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1) + divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24) + + np_result = np.true_divide(nums, divs) + np_result[:, divs[0] <= 0] = 0 + + with self.cached_session(): + tf_result = math_ops.div_no_nan(nums, divs, + negative_to_zero=True).eval() + self.assertAllEqual(tf_result, np_result) + if __name__ == "__main__": googletest.main() -- cgit v1.2.3 From 4c2f6aeaaf4aeafccc85a289a5a105d52738b410 Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Fri, 17 Aug 2018 17:06:47 -0400 Subject: Simplyfing the evaluation step by taking argmax of the softmax of the predictions instead of tf.multinomial --- .../examples/generative_examples/image_captioning_with_attention.ipynb | 2 +- .../eager/python/examples/generative_examples/text_generation.ipynb | 2 +- .../eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb index 315d7a4893..e0f7137184 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb @@ -1056,7 +1056,7 @@ "\n", " attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n", "\n", - " predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()\n", + " predicted_id = tf.argmax(tf.nn.softmax(predictions[0])).numpy()\n", " result.append(index_word[predicted_id])\n", "\n", " if index_word[predicted_id] == '':\n", diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb index 40bc098724..b13e5aae9b 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb @@ -610,7 +610,7 @@ "\n", " # using a multinomial distribution to predict the word returned by the model\n", " predictions = predictions / temperature\n", - " predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()\n", + " predicted_id = tf.argmax(tf.nn.softmax(predictions[0])).numpy()\n", " \n", " # We pass the predicted word as the next input to the model\n", " # along with the previous hidden state\n", diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb index f1e1f99c57..3e02d9fbb0 100644 --- a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb @@ -677,7 +677,7 @@ " attention_weights = tf.reshape(attention_weights, (-1, ))\n", " attention_plot[t] = attention_weights.numpy()\n", "\n", - " predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()\n", + " predicted_id = tf.argmax(tf.nn.softmax(predictions[0])).numpy()\n", "\n", " result += targ_lang.idx2word[predicted_id] + ' '\n", "\n", -- cgit v1.2.3 From c36ff7ae1d667979fa49899bf97de26cf35321de Mon Sep 17 00:00:00 2001 From: Yash Katariya Date: Fri, 17 Aug 2018 20:44:14 -0400 Subject: Removing tf.nn.softmax --- .../examples/generative_examples/image_captioning_with_attention.ipynb | 2 +- .../eager/python/examples/generative_examples/text_generation.ipynb | 2 +- .../eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb index e0f7137184..5c753ec0f5 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb @@ -1056,7 +1056,7 @@ "\n", " attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n", "\n", - " predicted_id = tf.argmax(tf.nn.softmax(predictions[0])).numpy()\n", + " predicted_id = tf.argmax(predictions[0]).numpy()\n", " result.append(index_word[predicted_id])\n", "\n", " if index_word[predicted_id] == '':\n", diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb index b13e5aae9b..e0d5e494d4 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb @@ -610,7 +610,7 @@ "\n", " # using a multinomial distribution to predict the word returned by the model\n", " predictions = predictions / temperature\n", - " predicted_id = tf.argmax(tf.nn.softmax(predictions[0])).numpy()\n", + " predicted_id = tf.argmax(predictions[0]).numpy()\n", " \n", " # We pass the predicted word as the next input to the model\n", " # along with the previous hidden state\n", diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb index 3e02d9fbb0..560fc8c5a2 100644 --- a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb @@ -677,7 +677,7 @@ " attention_weights = tf.reshape(attention_weights, (-1, ))\n", " attention_plot[t] = attention_weights.numpy()\n", "\n", - " predicted_id = tf.argmax(tf.nn.softmax(predictions[0])).numpy()\n", + " predicted_id = tf.argmax(predictions[0]).numpy()\n", "\n", " result += targ_lang.idx2word[predicted_id] + ' '\n", "\n", -- cgit v1.2.3 From c05bb4efcaf53d4cbc315ef6d12de822f2557a13 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Wed, 22 Aug 2018 18:13:37 +0800 Subject: CLN: replace safe_div method by div_no_nan --- .../contrib/losses/python/losses/loss_ops.py | 40 ++++----------- .../contrib/metrics/python/ops/metric_ops.py | 46 +++++++---------- tensorflow/contrib/rate/rate.py | 11 ++--- tensorflow/python/keras/engine/training_utils.py | 3 +- tensorflow/python/keras/metrics.py | 19 +------- tensorflow/python/kernel_tests/losses_test.py | 14 ------ tensorflow/python/ops/losses/losses_impl.py | 40 ++++----------- tensorflow/python/ops/metrics_impl.py | 57 +++++++++------------- 8 files changed, 67 insertions(+), 163 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 651de4e2f4..29f7953c3b 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -66,32 +66,6 @@ def _scale_losses(losses, weights): return math_ops.reduce_sum(reduced_losses) -def _safe_div(numerator, denominator, name="value"): - """Computes a safe divide which returns 0 if the denominator is zero. - - Note that the function contains an additional conditional check that is - necessary for avoiding situations where the loss is zero causing NaNs to - creep into the gradient computation. - - Args: - numerator: An arbitrary `Tensor`. - denominator: A `Tensor` whose shape matches `numerator` and whose values are - assumed to be non-negative. - name: An optional name for the returned op. - - Returns: - The element-wise value of the numerator divided by the denominator. - """ - return array_ops.where( - math_ops.greater(denominator, 0), - math_ops.div(numerator, - array_ops.where( - math_ops.equal(denominator, 0), - array_ops.ones_like(denominator), denominator)), - array_ops.zeros_like(numerator), - name=name) - - def _safe_mean(losses, num_present): """Computes a safe mean of the losses. @@ -104,7 +78,8 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return _safe_div(total_loss, num_present) + return math_ops.div_no_nan(total_loss, num_present, + negative_to_zero=True, name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -609,11 +584,16 @@ def mean_pairwise_squared_error(predictions, math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch) + term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, + num_present_per_batch, + negative_to_zero=True, + name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) - term2 = 2.0 * _safe_div( - math_ops.square(sum_diff), math_ops.square(num_present_per_batch)) + term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff), + math_ops.square(num_present_per_batch), + negative_to_zero=True, + name="value") loss = _scale_losses(term1 - term2, weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index a328670526..d972e7da53 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -45,24 +45,6 @@ from tensorflow.python.util.deprecation import deprecated _EPSILON = 1e-7 -def _safe_div(numerator, denominator, name): - """Divides two values, returning 0 if the denominator is <= 0. - - Args: - numerator: A real `Tensor`. - denominator: A real `Tensor`, with dtype matching `numerator`. - name: Name for the returned op. - - Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` - """ - return array_ops.where( - math_ops.greater(denominator, 0), - math_ops.truediv(numerator, denominator), - 0, - name=name) - - @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the ' 'order of the labels and predictions arguments has been switched.') def streaming_true_positives(predictions, @@ -3205,22 +3187,28 @@ def streaming_covariance(predictions, # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation - batch_mean_prediction = _safe_div( + batch_mean_prediction = math_ops.div_no_nan( math_ops.reduce_sum(weighted_predictions), batch_count, - 'batch_mean_prediction') - delta_mean_prediction = _safe_div( + negative_to_zero=True, + name='batch_mean_prediction') + delta_mean_prediction = math_ops.div_no_nan( (batch_mean_prediction - mean_prediction) * batch_count, update_count, - 'delta_mean_prediction') + negative_to_zero=True, + name='delta_mean_prediction') update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) # prev_mean_prediction is E[x_A] in the update equation prev_mean_prediction = update_mean_prediction - delta_mean_prediction # batch_mean_label is E[y_B] in the update equation - batch_mean_label = _safe_div( - math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label') - delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count, - update_count, 'delta_mean_label') + batch_mean_label = math_ops.div_no_nan( + math_ops.reduce_sum(weighted_labels), batch_count, + negative_to_zero=True, + name='batch_mean_label') + delta_mean_label = math_ops.div_no_nan( + (batch_mean_label - mean_label) * batch_count, update_count, + negative_to_zero=True, + name='delta_mean_label') update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation prev_mean_label = update_mean_label - delta_mean_label @@ -3882,8 +3870,10 @@ def cohen_kappa(labels, po_sum = math_ops.reduce_sum(po) total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( - metrics_impl._safe_div( # pylint: disable=protected-access - pe_row * pe_col, total, None)) + math_ops.div_no_nan( + pe_row * pe_col, total, + negative_to_zero=True, + name=None)) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), math_ops.to_double(total)) diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 24d586479a..68f5a6e58a 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -108,13 +108,6 @@ class Rate(object): def variables(self): return self._vars - def _safe_div(self, numerator, denominator, name): - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero, name=name) - def _add_variable(self, name, shape=None, dtype=None): """Private method for adding variables to the graph.""" if self._built: @@ -148,4 +141,6 @@ class Rate(object): state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) - return self._safe_div(self.numer, self.denom, name="safe_rate") + return math_ops.div_no_nan(self.numer, self.denom, + negative_to_zero=True, + name="safe_rate") diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index f94697c913..12ea75c5ea 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -607,7 +607,8 @@ def weighted_masked_objective(fn): score_array = math_ops.multiply(score_array, weights) score_array = math_ops.reduce_sum(score_array) weights = math_ops.reduce_sum(weights) - score_array = metrics_module.safe_div(score_array, weights) + score_array = math_ops.div_no_nan(score_array, weights, + negative_to_zero=True) return K.mean(score_array) return weighted diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 0983d62c59..6f4353f96a 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -136,23 +136,6 @@ def result_wrapper(result_fn): return tf_decorator.make_decorator(result_fn, decorated) -def safe_div(numerator, denominator): - """Divides two tensors element-wise, returning 0 if the denominator is <= 0. - - Args: - numerator: A `Tensor`. - denominator: A `Tensor`, with dtype matching `numerator`. - - Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` - """ - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero) - - def squeeze_or_expand_dimensions(y_pred, y_true, sample_weight): """Squeeze or expand last dimension if needed. @@ -472,7 +455,7 @@ class Mean(Metric): state_ops.assign_add(self.count, num_values) def result(self): - return safe_div(self.total, self.count) + return math_ops.div_no_nan(self.total, self.count, negative_to_zero=True) class MeanMetricWrapper(Mean): diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index 87fc715783..c45b5035de 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -34,25 +34,11 @@ from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.ops.losses import losses -from tensorflow.python.ops.losses import losses_impl from tensorflow.python.ops.losses import util from tensorflow.python.platform import test from tensorflow.python.training import momentum as momentum_lib -safe_div = losses_impl._safe_div # pylint: disable=protected-access - - -class SafeDivTest(test.TestCase): - - def testEager(self): - with context.eager_mode(): - self.assertAllEqual(safe_div(constant_op.constant(1.0), - constant_op.constant(0.0)), 0.0) - self.assertAllEqual(safe_div(constant_op.constant(1.0), - 0.0), 0.0) - - class AbsoluteDifferenceLossTest(test.TestCase): def setUp(self): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 806539747e..1e65aac115 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -74,31 +74,6 @@ class Reduction(object): raise ValueError("Invalid ReductionKey %s." % key) -def _safe_div(numerator, denominator, name="value"): - """Computes a safe divide which returns 0 if the denominator is zero. - - Note that the function contains an additional conditional check that is - necessary for avoiding situations where the loss is zero causing NaNs to - creep into the gradient computation. - - Args: - numerator: An arbitrary `Tensor`. - denominator: `Tensor` whose shape matches `numerator` and whose values are - assumed to be non-negative. - name: An optional name for the returned op. - - Returns: - The element-wise value of the numerator divided by the denominator. - """ - return array_ops.where( - math_ops.greater(denominator, 0), - math_ops.div(numerator, array_ops.where( - math_ops.equal(denominator, 0), - array_ops.ones_like(denominator), denominator)), - array_ops.zeros_like(numerator), - name=name) - - def _safe_mean(losses, num_present): """Computes a safe mean of the losses. @@ -111,7 +86,8 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return _safe_div(total_loss, num_present) + return math_ops.div_no_nan(total_loss, num_present, + negative_to_zero=True, name="value") def _num_present(losses, weights, per_batch=False): @@ -599,14 +575,18 @@ def mean_pairwise_squared_error( keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, - num_present_per_batch - 1) + term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, + num_present_per_batch - 1, + negative_to_zero=True, + name="value") sum_diff = math_ops.reduce_sum( diffs, reduction_indices=reduction_indices, keepdims=True) - term2 = 2.0 * _safe_div( + term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), - math_ops.multiply(num_present_per_batch, num_present_per_batch - 1)) + math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), + negative_to_zero=True, + name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) loss = math_ops.reduce_sum(weighted_losses) diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 763877c2d2..32f8fd3ed7 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -213,24 +213,6 @@ def _maybe_expand_labels(labels, predictions): lambda: array_ops.expand_dims(labels, -1, name=scope), lambda: labels) -def _safe_div(numerator, denominator, name): - """Divides two tensors element-wise, returning 0 if the denominator is <= 0. - - Args: - numerator: A real `Tensor`. - denominator: A real `Tensor`, with dtype matching `numerator`. - name: Name for the returned op. - - Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` - """ - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero, name=name) - - def _safe_scalar_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is 0. @@ -244,13 +226,7 @@ def _safe_scalar_div(numerator, denominator, name): """ numerator.get_shape().with_rank_at_most(1) denominator.get_shape().with_rank_at_most(1) - return control_flow_ops.cond( - math_ops.equal( - array_ops.constant(0.0, dtype=dtypes.float64), denominator), - lambda: array_ops.constant(0.0, dtype=dtypes.float64), - lambda: math_ops.div(numerator, denominator), - name=name) - + return math_ops.div_no_nan(numerator, denominator, name=name) def _streaming_confusion_matrix(labels, predictions, num_classes, weights=None): """Calculate a streaming confusion matrix. @@ -402,11 +378,13 @@ def mean(values, with ops.control_dependencies([values]): update_count_op = state_ops.assign_add(count, num_values) - compute_mean = lambda _, t, c: _safe_div(t, c, 'value') + compute_mean = lambda _, t, c: math_ops.div_no_nan( + t, c, negative_to_zero=True, name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = _safe_div(update_total_op, update_count_op, 'update_op') + update_op = math_ops.div_no_nan(update_total_op, update_count_op, + negative_to_zero=True, name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -778,16 +756,21 @@ def auc(labels, """ dtp = tp[:num_thresholds - 1] - tp[1:] p = tp + fp - prec_slope = _safe_div(dtp, p[:num_thresholds - 1] - p[1:], 'prec_slope') + prec_slope = math_ops.div_no_nan(dtp, p[:num_thresholds - 1] - p[1:], + negative_to_zero=True, + name='prec_slope') intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:]) safe_p_ratio = array_ops.where( math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0), - _safe_div(p[:num_thresholds - 1], p[1:], 'recall_relative_ratio'), + math_ops.div_no_nan(p[:num_thresholds - 1], p[1:], + negative_to_zero=True, + name='recall_relative_ratio'), array_ops.ones_like(p[1:])) return math_ops.reduce_sum( - _safe_div( + math_ops.div_no_nan( prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)), tp[1:] + fn[1:], + negative_to_zero=True, name='pr_auc_increment'), name='interpolate_pr_auc') @@ -1068,7 +1051,8 @@ def mean_per_class_accuracy(labels, update_count_op = state_ops.scatter_add(count, labels, is_correct) def compute_mean_accuracy(_, count, total): - per_class_accuracy = _safe_div(count, total, None) + per_class_accuracy = math_ops.div_no_nan( + count, total, negative_to_zero=True, name=None) mean_accuracy_v = math_ops.reduce_mean( per_class_accuracy, name='mean_accuracy') return mean_accuracy_v @@ -1076,7 +1060,9 @@ def mean_per_class_accuracy(labels, mean_accuracy_v = _aggregate_across_towers( metrics_collections, compute_mean_accuracy, count, total) - update_op = _safe_div(update_count_op, update_total_op, name='update_op') + update_op = math_ops.div_no_nan(update_count_op, update_total_op, + negative_to_zero=True, + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -1385,12 +1371,15 @@ def mean_tensor(values, with ops.control_dependencies([values]): update_count_op = state_ops.assign_add(count, num_values) - compute_mean = lambda _, t, c: _safe_div(t, c, 'value') + compute_mean = lambda _, t, c: math_ops.div_no_nan( + t, c, negative_to_zero=True, name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = _safe_div(update_total_op, update_count_op, 'update_op') + update_op = math_ops.div_no_nan(update_total_op, update_count_op, + negative_to_zero=True, + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) -- cgit v1.2.3 From a6b016dc0a33f50f20fd1e8e3b9716ddbec75e57 Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Thu, 23 Aug 2018 11:14:25 +0900 Subject: comments regarding why rates are disabled --- tensorflow/core/kernels/extract_volume_patches_op.cc | 4 ++++ tensorflow/core/ops/array_ops.cc | 3 +++ 2 files changed, 7 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/extract_volume_patches_op.cc b/tensorflow/core/kernels/extract_volume_patches_op.cc index 80405c66dc..0f1d566c75 100644 --- a/tensorflow/core/kernels/extract_volume_patches_op.cc +++ b/tensorflow/core/kernels/extract_volume_patches_op.cc @@ -87,6 +87,10 @@ class ExtractVolumePatchesOp : public UnaryOp { const int stride_cols = strides_[3]; /* + // TODO(hsgkim): enable rates + // Rates are disabled as of now due to Eigen's definitions of extract_volume_patch + // functions; none of them accept rates as its argument and rates are fixed to + // (1, 1, 1, 1, 1). A workaround has to be found for this. // In order to enable rates, uncomment the following lines and use // ksize_*_eff instead of ksize_* for the second argument of GetWindowedOutputSize // calls. diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 48d8327a9e..6c8369200a 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2583,6 +2583,9 @@ REGISTER_OP("ExtractVolumePatches") } /* + // TODO(hsgkim): Enable rates. + // See extract_volume_patches_op.cc for why rates are disabled now. + std::vector rates; TF_RETURN_IF_ERROR(c->GetAttr("rates", &rates)); if (rates.size() != 5) { -- cgit v1.2.3 From 52d3e5a3a7bece06da072dcfb3f4ac53e83f8470 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Wed, 22 Aug 2018 23:34:34 -0700 Subject: Added the BUILD files for tbb and updated the ngraph.BUILD with CPU library (DEX). --- WORKSPACE | 7 +++ tensorflow/workspace.bzl | 29 +++++++--- third_party/ngraph/ngraph.BUILD | 109 ++++++++++++++++++++++++++++++++++++- third_party/ngraph/ngraph_tf.BUILD | 11 +--- third_party/ngraph/tbb.BUILD | 52 ++++++++++++++++++ 5 files changed, 188 insertions(+), 20 deletions(-) create mode 100644 third_party/ngraph/tbb.BUILD (limited to 'tensorflow') diff --git a/WORKSPACE b/WORKSPACE index 17961829a6..4af1a1e75f 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -79,3 +79,10 @@ new_http_archive( "http://download.tensorflow.org/models/speech_commands_v0.01.zip", ], ) + +new_local_repository( + name = "ngraph", + path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph", + build_file = "//third_party/ngraph:ngraph.BUILD", +) + diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5d90d0fe64..951cb8a89d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -833,15 +833,26 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): ) tf_http_archive( - name = "ngraph", - urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - ], - sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", - strip_prefix = "ngraph-0.5.0", - build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), - ) + name = "tbb", + urls = [ + "https://mirror.bazel.build/github.com/01org/tbb/archive/tbb_2018.zip", + "https://github.com/01org/tbb/archive/tbb_2018.zip", + ], + sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13", + strip_prefix = "tbb-tbb_2018", + build_file = clean_dep("//third_party/ngraph:tbb.BUILD"), + ) + + # tf_http_archive( + # name = "ngraph", + # urls = [ + # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", + # "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", + # ], + # sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", + # strip_prefix = "ngraph-0.5.0", + # build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), + # ) tf_http_archive( name = "nlohmann_json_lib", diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD index 31aa3cee51..f1cf8acbf6 100644 --- a/third_party/ngraph/ngraph.BUILD +++ b/third_party/ngraph/ngraph.BUILD @@ -2,6 +2,112 @@ licenses(["notice"]) # 3-Clause BSD exports_files(["LICENSE"]) +cc_library( + name = "ngraph_headers", + hdrs = glob(["src/ngraph/**/*.hpp"]) , + visibility = ["//visibility:public"], +) + +cc_library( + name = "ngraph_cpu_backend", + srcs = [ + "src/ngraph/runtime/cpu/cpu_backend.cpp", + "src/ngraph/runtime/cpu/cpu_builder.cpp", + "src/ngraph/runtime/cpu/cpu_call_frame.cpp", + "src/ngraph/runtime/cpu/cpu_external_function.cpp", + "src/ngraph/runtime/cpu/cpu_kernels.cpp", + "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp", + "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp", + "src/ngraph/runtime/cpu/cpu_tensor_view.cpp", + "src/ngraph/runtime/cpu/cpu_tracing.cpp", + "src/ngraph/runtime/cpu/builder/add.cpp", + "src/ngraph/runtime/cpu/builder/allreduce.cpp", + "src/ngraph/runtime/cpu/builder/avg_pool.cpp", + "src/ngraph/runtime/cpu/builder/argmin.cpp", + "src/ngraph/runtime/cpu/builder/argmax.cpp", + "src/ngraph/runtime/cpu/builder/batch_norm.cpp", + "src/ngraph/runtime/cpu/builder/broadcast.cpp", + "src/ngraph/runtime/cpu/builder/bounded_relu.cpp", + "src/ngraph/runtime/cpu/builder/concat.cpp", + "src/ngraph/runtime/cpu/builder/convert.cpp", + "src/ngraph/runtime/cpu/builder/convert_layout.cpp", + "src/ngraph/runtime/cpu/builder/convolution.cpp", + "src/ngraph/runtime/cpu/builder/dot.cpp", + "src/ngraph/runtime/cpu/builder/function_call.cpp", + "src/ngraph/runtime/cpu/builder/lstm.cpp", + "src/ngraph/runtime/cpu/builder/lrn.cpp", + "src/ngraph/runtime/cpu/builder/matmul_bias.cpp", + "src/ngraph/runtime/cpu/builder/max.cpp", + "src/ngraph/runtime/cpu/builder/max_pool.cpp", + "src/ngraph/runtime/cpu/builder/min.cpp", + "src/ngraph/runtime/cpu/builder/one_hot.cpp", + "src/ngraph/runtime/cpu/builder/relu.cpp", + "src/ngraph/runtime/cpu/builder/pad.cpp", + "src/ngraph/runtime/cpu/builder/product.cpp", + "src/ngraph/runtime/cpu/builder/reduce_function.cpp", + "src/ngraph/runtime/cpu/builder/reduce_function_window.cpp", + "src/ngraph/runtime/cpu/builder/replace_slice.cpp", + "src/ngraph/runtime/cpu/builder/reshape.cpp", + "src/ngraph/runtime/cpu/builder/reverse.cpp", + "src/ngraph/runtime/cpu/builder/reverse_sequence.cpp", + "src/ngraph/runtime/cpu/builder/rnn.cpp", + "src/ngraph/runtime/cpu/builder/select.cpp", + "src/ngraph/runtime/cpu/builder/select_and_scatter.cpp", + "src/ngraph/runtime/cpu/builder/sigmoid.cpp", + "src/ngraph/runtime/cpu/builder/slice.cpp", + "src/ngraph/runtime/cpu/builder/softmax.cpp", + "src/ngraph/runtime/cpu/builder/sum.cpp", + "src/ngraph/runtime/cpu/kernel/eigen_thread_pool.cpp", + "src/ngraph/runtime/cpu/kernel/pad.cpp", + "src/ngraph/runtime/cpu/kernel/reduce_max.cpp", + "src/ngraph/runtime/cpu/kernel/reduce_sum.cpp", + "src/ngraph/runtime/cpu/kernel/reshape.cpp", + "src/ngraph/runtime/cpu/mkldnn_emitter.cpp", + "src/ngraph/runtime/cpu/mkldnn_invoke.cpp", + "src/ngraph/runtime/cpu/mkldnn_utils.cpp", + "src/ngraph/runtime/cpu/op/batch_dot.cpp", + "src/ngraph/runtime/cpu/op/batch_norm_relu.cpp", + "src/ngraph/runtime/cpu/op/bounded_relu.cpp", + "src/ngraph/runtime/cpu/op/group_conv.cpp", + "src/ngraph/runtime/cpu/op/conv_bias.cpp", + "src/ngraph/runtime/cpu/op/conv_relu.cpp", + "src/ngraph/runtime/cpu/op/convert_layout.cpp", + "src/ngraph/runtime/cpu/op/loop_kernel.cpp", + "src/ngraph/runtime/cpu/op/lstm.cpp", + "src/ngraph/runtime/cpu/op/matmul_bias.cpp", + "src/ngraph/runtime/cpu/op/max_pool_with_indices.cpp", + "src/ngraph/runtime/cpu/op/rnn.cpp", + "src/ngraph/runtime/cpu/op/sigmoid_mul.cpp", + "src/ngraph/runtime/cpu/pass/cpu_assignment.cpp", + "src/ngraph/runtime/cpu/pass/cpu_collapse_dims.cpp", + "src/ngraph/runtime/cpu/pass/cpu_concat_inputs.cpp", + "src/ngraph/runtime/cpu/pass/cpu_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_layout.cpp", + "src/ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_mat_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.cpp", + "src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp", + "src/ngraph/runtime/cpu/pass/cpu_workspace_insertion.cpp", + ], + hdrs = glob(["src/ngraph/runtime/cpu/**/*.hpp"]) + glob([]), + deps = [ + ":ngraph_headers", + "@eigen_archive//:eigen", + "@nlohmann_json_lib", + "@tbb", + "@mkl_dnn//:mkl_dnn", + ], + copts = [ + "-I external/ngraph/src", + "-I external/nlohmann_json_lib/include/", + '-D SHARED_LIB_EXT=\\".so\\"', + '-D NGRAPH_VERSION=\\"0.5.0\\"', + '-D NGRAPH_DEX_ONLY', + ], + visibility = ["//visibility:public"], + alwayslink = 1, +) + cc_library( name = "ngraph_core", srcs = glob([ @@ -21,8 +127,9 @@ cc_library( "src/ngraph/runtime/interpreter/*.cpp", "src/ngraph/runtime/interpreter/*.hpp", ]), - hdrs = glob(["src/ngraph/**/*.hpp"]), deps = [ + ":ngraph_headers", + ":ngraph_cpu_backend", "@eigen_archive//:eigen", "@nlohmann_json_lib", ], diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index 4d96ccf2f2..0647d9926a 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -7,15 +7,6 @@ load( "tf_cc_test", ) -cc_library( - name = "ngraph_libs_linux", - srcs = [ - "lib/libiomp5.so", - "lib/libmklml_intel.so", - ], - visibility = ["//visibility:public"], -) - cc_library( name = "ngraph_tf", srcs = [ @@ -58,7 +49,7 @@ cc_library( "-I external/ngraph_tf/src", "-I external/ngraph_tf/logging", "-I external/ngraph/src", - "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", + #"-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", ], alwayslink = 1, visibility = ["//visibility:public"], diff --git a/third_party/ngraph/tbb.BUILD b/third_party/ngraph/tbb.BUILD new file mode 100644 index 0000000000..c3e7f7fd35 --- /dev/null +++ b/third_party/ngraph/tbb.BUILD @@ -0,0 +1,52 @@ +licenses(["notice"]) # 3-Clause BSD + +exports_files(["LICENSE"]) + +genrule( + name = "build_tbb", + srcs = glob(["**"]) + [ + "@local_config_cc//:toolchain", + ], + cmd = """ + set -e + WORK_DIR=$$PWD + DEST_DIR=$$PWD/$(@D) + export PATH=$$(dirname $(AR)):$$PATH + export CXXFLAGS=$(CC_FLAGS) + export NM=$(NM) + export AR=$(AR) + cd $$(dirname $(location :Makefile)) + + #TBB's build needs some help to figure out what compiler it's using + if $$CXX --version | grep clang &> /dev/null; then + COMPILER_OPT="compiler=clang" + else + COMPILER_OPT="compiler=gcc" + fi + + # uses extra_inc=big_iron.inc to specify that static libraries are + # built. See https://software.intel.com/en-us/forums/intel-threading-building-blocks/topic/297792 + make tbb_build_prefix="build" \ + extra_inc=big_iron.inc \ + $$COMPILER_OPT; \ + + echo cp build/build_{release,debug}/*.a $$DEST_DIR + cp build/build_{release,debug}/*.a $$DEST_DIR + cd $$WORK_DIR + """, + outs = [ + "libtbb.a", + "libtbbmalloc.a", + ] +) + +cc_library( + name = "tbb", + hdrs = glob([ + "include/serial/**", + "include/tbb/**/**", + ]), + srcs = ["libtbb.a"], + includes = ["include"], + visibility = ["//visibility:public"], +) \ No newline at end of file -- cgit v1.2.3 From 38f811077dd52820eaa3d5c684f41142de01c7eb Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Thu, 23 Aug 2018 16:23:03 +0800 Subject: CLN: remove negative_to_zero argument --- .../contrib/losses/python/losses/loss_ops.py | 9 +++--- .../contrib/metrics/python/ops/metric_ops.py | 20 ++++++------- tensorflow/contrib/rate/rate.py | 4 +-- tensorflow/python/keras/engine/training_utils.py | 4 +-- tensorflow/python/keras/metrics.py | 2 +- tensorflow/python/ops/losses/losses_impl.py | 18 ++++++------ tensorflow/python/ops/math_ops.py | 5 +--- tensorflow/python/ops/math_ops_test.py | 13 --------- tensorflow/python/ops/metrics_impl.py | 33 +++++++++++----------- 9 files changed, 47 insertions(+), 61 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 29f7953c3b..8a0932c376 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -78,8 +78,9 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return math_ops.div_no_nan(total_loss, num_present, - negative_to_zero=True, name="value") + return math_ops.div_no_nan(total_loss, + math_ops.maximum(num_present, 0), + name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -585,14 +586,12 @@ def mean_pairwise_squared_error(predictions, num_present_per_batch = _num_present(diffs, weights, per_batch=True) term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, - num_present_per_batch, - negative_to_zero=True, + math_ops.maximum(num_present_per_batch), name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) term2 = 2.0 * math_ops.div_no_nan(math_ops.square(sum_diff), math_ops.square(num_present_per_batch), - negative_to_zero=True, name="value") loss = _scale_losses(term1 - term2, weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index d972e7da53..bfef0816aa 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -3188,12 +3188,12 @@ def streaming_covariance(predictions, # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = math_ops.div_no_nan( - math_ops.reduce_sum(weighted_predictions), batch_count, - negative_to_zero=True, + math_ops.reduce_sum(weighted_predictions), + math_ops.maximum(batch_count, 0), name='batch_mean_prediction') delta_mean_prediction = math_ops.div_no_nan( - (batch_mean_prediction - mean_prediction) * batch_count, update_count, - negative_to_zero=True, + (batch_mean_prediction - mean_prediction) * batch_count, + math_ops.maximum(update_count, 0), name='delta_mean_prediction') update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) @@ -3202,12 +3202,12 @@ def streaming_covariance(predictions, # batch_mean_label is E[y_B] in the update equation batch_mean_label = math_ops.div_no_nan( - math_ops.reduce_sum(weighted_labels), batch_count, - negative_to_zero=True, + math_ops.reduce_sum(weighted_labels), + math_ops.maximum(batch_count, 0), name='batch_mean_label') delta_mean_label = math_ops.div_no_nan( - (batch_mean_label - mean_label) * batch_count, update_count, - negative_to_zero=True, + (batch_mean_label - mean_label) * batch_count, + math_ops.maximum(update_count, 0), name='delta_mean_label') update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation @@ -3871,8 +3871,8 @@ def cohen_kappa(labels, total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( math_ops.div_no_nan( - pe_row * pe_col, total, - negative_to_zero=True, + pe_row * pe_col, + math_ops.maximum(total, 0), name=None)) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 68f5a6e58a..489d5cce78 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -141,6 +141,6 @@ class Rate(object): state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) - return math_ops.div_no_nan(self.numer, self.denom, - negative_to_zero=True, + return math_ops.div_no_nan(self.numer, + math_op.maximum(self.denom, 0), name="safe_rate") diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 12ea75c5ea..eeca60dc57 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -607,8 +607,8 @@ def weighted_masked_objective(fn): score_array = math_ops.multiply(score_array, weights) score_array = math_ops.reduce_sum(score_array) weights = math_ops.reduce_sum(weights) - score_array = math_ops.div_no_nan(score_array, weights, - negative_to_zero=True) + score_array = math_ops.div_no_nan(score_array, + math_ops.maximum(weights, 0)) return K.mean(score_array) return weighted diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 6f4353f96a..b5d3138da2 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -455,7 +455,7 @@ class Mean(Metric): state_ops.assign_add(self.count, num_values) def result(self): - return math_ops.div_no_nan(self.total, self.count, negative_to_zero=True) + return math_ops.div_no_nan(self.total, math_ops.maximum(self.count, 0)) class MeanMetricWrapper(Mean): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 1e65aac115..a980a43f62 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -86,8 +86,9 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return math_ops.div_no_nan(total_loss, num_present, - negative_to_zero=True, name="value") + return math_ops.div_no_nan(total_loss, + math_ops.maximum(num_present, 0), + name="value") def _num_present(losses, weights, per_batch=False): @@ -575,17 +576,18 @@ def mean_pairwise_squared_error( keepdims=True) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * math_ops.div_no_nan(sum_squares_diff_per_batch, - num_present_per_batch - 1, - negative_to_zero=True, - name="value") + term1 = 2.0 * math_ops.div_no_nan( + sum_squares_diff_per_batch, + math_ops.maximum(num_present_per_batch - 1, 0), + name="value") sum_diff = math_ops.reduce_sum( diffs, reduction_indices=reduction_indices, keepdims=True) term2 = 2.0 * math_ops.div_no_nan( math_ops.square(sum_diff), - math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), - negative_to_zero=True, + math_ops.maximum( + math_ops.multiply(num_present_per_batch, num_present_per_batch - 1), + 0), name="value") weighted_losses = math_ops.multiply(term1 - term2, weights) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a693b1ebac..67ea534639 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -1039,14 +1039,13 @@ def div(x, y, name=None): @tf_export("div_no_nan") -def div_no_nan(x, y, name=None, negative_to_zero=False): +def div_no_nan(x, y, name=None): """Computes an unsafe divide which returns 0 if the y is zero. Args: x: A `Tensor`. Must be one of the following types: `float32`, `float64`. y: A `Tensor` whose dtype is compatible with `x`. name: A name for the operation (optional). - negative_to_zero: If `True`, negative is treated as zero in denominator. Returns: The element-wise value of the x divided by y. """ @@ -1059,8 +1058,6 @@ def div_no_nan(x, y, name=None, negative_to_zero=False): if x_dtype != y_dtype: raise TypeError("x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype)) - if negative_to_zero: - y = gen_math_ops.maximum(y, 0, name='negative_to_zero') return gen_math_ops.div_no_nan(x, y, name=name) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 6e1e5f37c8..6bd41020c5 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -487,19 +487,6 @@ class DivNoNanTest(test_util.TensorFlowTestCase): tf_result = math_ops.div_no_nan(nums, divs).eval() self.assertAllEqual(tf_result, np_result) - def testNegativeToZero(self): - for dtype in [np.float32, np.float64]: - nums = np.arange(-10, 10, .25, dtype=dtype).reshape(80, 1) - divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24) - - np_result = np.true_divide(nums, divs) - np_result[:, divs[0] <= 0] = 0 - - with self.cached_session(): - tf_result = math_ops.div_no_nan(nums, divs, - negative_to_zero=True).eval() - self.assertAllEqual(tf_result, np_result) - if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 32f8fd3ed7..e449318020 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -379,12 +379,13 @@ def mean(values, update_count_op = state_ops.assign_add(count, num_values) compute_mean = lambda _, t, c: math_ops.div_no_nan( - t, c, negative_to_zero=True, name='value') + t, math_ops.maximum(c, 0), name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = math_ops.div_no_nan(update_total_op, update_count_op, - negative_to_zero=True, name='update_op') + update_op = math_ops.div_no_nan(update_total_op, + math_ops.maximum(update_count_op, 0), + name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -756,21 +757,21 @@ def auc(labels, """ dtp = tp[:num_thresholds - 1] - tp[1:] p = tp + fp - prec_slope = math_ops.div_no_nan(dtp, p[:num_thresholds - 1] - p[1:], - negative_to_zero=True, - name='prec_slope') + prec_slope = math_ops.div_no_nan( + dtp, + math_ops.maximum(p[:num_thresholds - 1] - p[1:], 0), + name='prec_slope') intercept = tp[1:] - math_ops.multiply(prec_slope, p[1:]) safe_p_ratio = array_ops.where( math_ops.logical_and(p[:num_thresholds - 1] > 0, p[1:] > 0), - math_ops.div_no_nan(p[:num_thresholds - 1], p[1:], - negative_to_zero=True, + math_ops.div_no_nan(p[:num_thresholds - 1], + math_ops.maximum(p[1:], 0), name='recall_relative_ratio'), array_ops.ones_like(p[1:])) return math_ops.reduce_sum( math_ops.div_no_nan( prec_slope * (dtp + intercept * math_ops.log(safe_p_ratio)), - tp[1:] + fn[1:], - negative_to_zero=True, + math_ops.maximum(tp[1:] + fn[1:], 0), name='pr_auc_increment'), name='interpolate_pr_auc') @@ -1052,7 +1053,7 @@ def mean_per_class_accuracy(labels, def compute_mean_accuracy(_, count, total): per_class_accuracy = math_ops.div_no_nan( - count, total, negative_to_zero=True, name=None) + count, math_ops.maximum(total, 0), name=None) mean_accuracy_v = math_ops.reduce_mean( per_class_accuracy, name='mean_accuracy') return mean_accuracy_v @@ -1060,8 +1061,8 @@ def mean_per_class_accuracy(labels, mean_accuracy_v = _aggregate_across_towers( metrics_collections, compute_mean_accuracy, count, total) - update_op = math_ops.div_no_nan(update_count_op, update_total_op, - negative_to_zero=True, + update_op = math_ops.div_no_nan(update_count_op, + math_ops.maximum(update_total_op, 0), name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) @@ -1372,13 +1373,13 @@ def mean_tensor(values, update_count_op = state_ops.assign_add(count, num_values) compute_mean = lambda _, t, c: math_ops.div_no_nan( - t, c, negative_to_zero=True, name='value') + t, math_ops.maximum(c, 0), name='value') mean_t = _aggregate_across_towers( metrics_collections, compute_mean, total, count) - update_op = math_ops.div_no_nan(update_total_op, update_count_op, - negative_to_zero=True, + update_op = math_ops.div_no_nan(update_total_op, + math_ops.maximum(update_count_op, 0), name='update_op') if updates_collections: ops.add_to_collections(updates_collections, update_op) -- cgit v1.2.3 From 407a64b773f15bfe67a2b5b1979134368464b6ff Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Thu, 23 Aug 2018 16:52:00 +0800 Subject: TST: revise test case and too long line --- .../python/estimator/canned/boosted_trees.py | 7 +- .../python/estimator/canned/boosted_trees_test.py | 125 +++++++++++++++------ 2 files changed, 96 insertions(+), 36 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index b1d5d60fb0..f2a5b9178b 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -213,8 +213,13 @@ def _generate_feature_name_mapping(sorted_feature_columns): feature_column_lib._VocabularyListCategoricalColumn): # pylint:disable=protected-access for value in categorical_column.vocabulary_list: names.append('{}:{}'.format(column.name, value)) + elif isinstance(categorical_column, + feature_column_lib._BucketizedColumn): # pylint:disable=protected-access + boundaries = [-np.inf] + list(categorical_column.boundaries) + [np.inf] + for pair in zip(boundaries[:-1], boundaries[1:]): + names.append('{}:{}'.format(column.name, pair)) else: - for num in categorical_column._num_buckets: # pylint:disable=protected-access + for num in range(categorical_column._num_buckets): # pylint:disable=protected-access names.append('{}:{}'.format(column.name, num)) else: names.append(column.name) diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 24d3a3501e..7620f73425 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -564,13 +564,17 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] + feature_names_expected = ['f_0_bucketized', + 'f_2_bucketized', + 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.833933, 0.606342, 0.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.579010, 0.420990, 0.0], importances) @@ -599,7 +603,9 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(ValueError, 'empty serialized string'): est.experimental_feature_importances(normalize=True) - def _create_fake_checkpoint_with_tree_ensemble_proto(self, est, tree_ensemble_text): + def _create_fake_checkpoint_with_tree_ensemble_proto(self, + est, + tree_ensemble_text): with ops.Graph().as_default(): with ops.name_scope('boosted_trees') as name: tree_ensemble = boosted_trees_ops.TreeEnsemble(name=name) @@ -731,14 +737,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): tree_weights: 1.0 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', + 'f_2_bucketized', + 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) + # Gain sum for each features: + # = 1.0 * [3 + 1, 2, 2] + 1.0 * [1, 1, 0] self.assertAllClose([5.0, 3.0, 2.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) @@ -820,14 +833,21 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): tree_weights: 0.6 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['f_0_bucketized', 'f_2_bucketized', 'f_1_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + feature_names_expected = ['f_0_bucketized', + 'f_2_bucketized', + 'f_1_bucketized'] + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) + # Gain sum for each features: + # = 0.4 * [12.5, 0, 5] + 0.6 * [0, 5, 0] + 1.0 * [0, 0, 0] self.assertAllClose([5.0, 3.0, 2.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.3, 0.2], importances) @@ -856,11 +876,15 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): tree_weights: 1.0 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) # Reverse order because feature importances are sorted by np.argsort(f)[::-1] - feature_names_expected = ['f_2_bucketized', 'f_1_bucketized', 'f_0_bucketized'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + feature_names_expected = ['f_2_bucketized', + 'f_1_bucketized', + 'f_0_bucketized'] + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.0, 0.0, 0.0], importances) @@ -868,17 +892,20 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'all empty or contain only a root node'): est.experimental_feature_importances(normalize=True) - def TestFeatureImportancesNamesForCategoricalColumn(self): + def testFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column( - 'an_uninformative_feature', dtype=dtypes.float32), + 'continuous', dtype=dtypes.float32), BUCKET_BOUNDARIES) + bucketized_indicator = feature_column.indicator_column(bucketized_col) est = boosted_trees.BoostedTreesRegressor( - feature_columns=[bucketized_col, feature_indicator], + feature_columns=[feature_indicator, + bucketized_col, + bucketized_indicator], n_batches_per_layer=1, n_trees=2, learning_rate=1.0, @@ -898,7 +925,7 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): } nodes { bucketized_split { - feature_id: 3 + feature_id: 4 left_id: 3 right_id: 4 } @@ -930,36 +957,63 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): right_id: 2 } metadata { - gain: 3.0 + gain: 1.0 + } + } + nodes { + bucketized_split { + feature_id: 5 + left_id: 3 + right_id: 4 + } + metadata { + gain: 2.0 } } nodes { leaf { - scalar: -0.34 + scalar: -2.34 } } nodes { leaf { - scalar: 1.34 + scalar: 3.34 + } + } + nodes { + leaf { + scalar: 4.34 } } } tree_weights: 1.0 tree_weights: 1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) - - feature_names_expected = ['categorical_indicator:good', - 'an_uninformative_feature_bucketized', - 'categorical_indicator:ok', - 'categorical_indicator:bad'] - feature_names, importances = est.experimental_feature_importances(normalize=False) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + feature_names_expected = ['categorical_indicator:ok', + 'continuous_bucketized_indicator:(-2.0, 0.5)', + 'continuous_bucketized_indicator:(-inf, -2.0)', + 'categorical_indicator:bad', + # Reverse order because feature importances + # are sorted by np.argsort(f)[::-1] + 'continuous_bucketized_indicator:(12.0, inf)', + 'continuous_bucketized_indicator:(0.5, 12.0)', + 'continuous_bucketized', + 'categorical_indicator:good'] + + feature_names, importances = est.experimental_feature_importances( + normalize=False) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([5.0, 3.0, 2.0, 0.0], importances) + # Gain sum for each features: + # = 1.0 * [5, 0, 2, 0, 0, 0, 0, 0] + 1.0 * [0, 2, 0, 1, 0, 0, 0, 0] + self.assertAllClose([5.0, 2.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0], importances) - feature_names, importances = est.experimental_feature_importances(normalize=True) + feature_names, importances = est.experimental_feature_importances( + normalize=True) self.assertAllEqual(feature_names_expected, feature_names) - self.assertAllClose([0.5, 0.3, 0.2, 0.0], importances) + self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances) def testNegativeFeatureImportances(self): est = boosted_trees.BoostedTreesClassifier( @@ -995,7 +1049,8 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): } tree_weights: -1.0 """ - self._create_fake_checkpoint_with_tree_ensemble_proto(est, tree_ensemble_text) + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) with self.assertRaisesRegexp(AssertionError, 'non-negative'): est.experimental_feature_importances(normalize=False) -- cgit v1.2.3 From e357bcea4b10d5e5cbc3a4ba59385e832401ba8d Mon Sep 17 00:00:00 2001 From: Dao Zhang Date: Thu, 23 Aug 2018 20:11:10 +0800 Subject: merge_repeated option is confusing I have the same question with [WIP: Remove invalid merge_repeated option from CTC beam decoder](https://github.com/tensorflow/tensorflow/pull/15586), it's a pity I haven't seen any changes for so long. Generally I will use the default value of merge_repeated: True, but I found it's confusing, that is, I got the wrong anser, it has been explained well in [WIP: Remove invalid merge_repeated option from CTC beam decoder](https://github.com/tensorflow/tensorflow/pull/15586). And the top path in ctc_beam_search_decoder is similar with sequence in ctc_greedy_decoder, this is confusing, I have found the project [CRNN](https://github.com/Belval/CRNN/blob/master/CRNN/crnn.py)(line 167) and some other projects use the wrong settings. So I think it's better to give a explain here, this has no conflict with the existing code. --- tensorflow/python/ops/ctc_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 908e793902..6bfe405b2b 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -242,11 +242,11 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, - only the first of these is emitted. That is, when the top path - is `A B B B B`, the return value is: + only the first of these is emitted. That is, when the sequence is `A B B * B * B` (where '*' + is the blank label), the return value is: * `A B` if `merge_repeated = True`. - * `A B B B B` if `merge_repeated = False`. + * `A B B B` if `merge_repeated = False`. Args: inputs: 3-D `float` `Tensor`, size -- cgit v1.2.3 From c7c152981cdf9494dce9efdeed04a9c3ae7a8e3d Mon Sep 17 00:00:00 2001 From: weidankong Date: Fri, 24 Aug 2018 11:23:26 -0700 Subject: Accumulated Gradient Normalization Optimizer --- tensorflow/contrib/opt/BUILD | 19 ++ tensorflow/contrib/opt/__init__.py | 3 + .../contrib/opt/python/training/agn_optimizer.py | 309 +++++++++++++++++++++ .../opt/python/training/agn_optimizer_test.py | 279 +++++++++++++++++++ 4 files changed, 610 insertions(+) create mode 100644 tensorflow/contrib/opt/python/training/agn_optimizer.py create mode 100644 tensorflow/contrib/opt/python/training/agn_optimizer_test.py (limited to 'tensorflow') diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 5319a8b655..642cda7845 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -16,6 +16,7 @@ py_library( "__init__.py", "python/training/adamax.py", "python/training/addsign.py", + "python/training/agn_optimizer.py", "python/training/drop_stale_gradient_optimizer.py", "python/training/elastic_average_optimizer.py", "python/training/external_optimizer.py", @@ -242,6 +243,24 @@ tf_py_test( ], ) +tf_py_test( + name = "agn_optimizer_test", + srcs = ["python/training/agn_optimizer_test.py"], + additional_deps = [ + ":opt_py", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:array_ops", + "//tensorflow/python:variables", + "//tensorflow/python:framework", + "//tensorflow/python:platform", + "//tensorflow/python:training", + "//tensorflow/python:ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//third_party/py/numpy", + ], +) + tf_py_test( name = "elastic_average_optimizer_test", srcs = ["python/training/elastic_average_optimizer_test.py"], diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 781621dba0..b814a57680 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -21,6 +21,7 @@ from __future__ import print_function # pylint: disable=wildcard-import from tensorflow.contrib.opt.python.training.adamax import * from tensorflow.contrib.opt.python.training.addsign import * +from tensorflow.contrib.opt.python.training.agn_optimizer import * from tensorflow.contrib.opt.python.training.drop_stale_gradient_optimizer import * from tensorflow.contrib.opt.python.training.elastic_average_optimizer import * from tensorflow.contrib.opt.python.training.external_optimizer import * @@ -59,6 +60,8 @@ _allowed_symbols = [ 'VariableClippingOptimizer', 'MultitaskOptimizerWrapper', 'clip_gradients_by_global_norm', + 'AGNOptimizer', + 'AGNCustomGetter', 'ElasticAverageOptimizer', 'ElasticAverageCustomGetter', 'ModelAverageOptimizer', diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py new file mode 100644 index 0000000000..dc1f8d6347 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -0,0 +1,309 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.training import optimizer +from tensorflow.python.training import session_run_hook +from tensorflow.python.training import gradient_descent + + +GLOBAL_VARIABLE_NAME = 'global_center_variable' +GRAD_VARIABLE_NAME = 'grad_variable' + +class AGNCustomGetter(object): + """Custom_getter class is used to do: + 1. Change trainable variables to local collection and place them at worker + device + 2. Generate global variables(global center variables) + 3. Generate grad variables(gradients) which record the gradients sum + and place them at worker device + Notice that the class should be used with tf.replica_device_setter, + so that the global center variables and global step variable can be placed + at ps device. + """ + def __init__(self, worker_device): + """ + Args: + worker_device: put the grad_variables on worker device + """ + self._worker_device = worker_device + self._global_map = {} + self._grad_map = {} + + def __call__(self, getter, name, trainable, collections, *args, **kwargs): + if trainable: + with ops.device(self._worker_device): + local_var = getter( + name, + trainable=True, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + *args, + **kwargs) + if kwargs['reuse'] == True: + return local_var + global_center_variable = getter( + name='%s/%s' % (GLOBAL_VARIABLE_NAME, name), + trainable=False, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + *args, + **kwargs) + + with ops.device(self._worker_device): + grad_variable = getter( + name='%s/%s' % (GRAD_VARIABLE_NAME, name), + trainable=False, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + *args, + **kwargs) + if kwargs['partitioner'] is None: + self._grad_map[local_var] = grad_variable + self._global_map[local_var] = global_center_variable + else: + v_list = list(local_var) + for i in range(len(v_list)): + self._grad_map[v_list[i]] = list(grad_variable)[i] + self._global_map[v_list[i]] = list(global_center_variable)[i] + return local_var + else: + return getter(name, + trainable=trainable, + collections=collections, + *args, + **kwargs) + +class AGNOptimizer(optimizer.Optimizer): + """Wrapper that implements the Accumulated GradientNormalization algorithm. + Reference: + Accumulated Gradient Normalization: Joeri Hermans ACML2017 + https://arxiv.org/abs/1710.02368 + """ + + def __init__(self, + optimizer, + num_worker, + custom_getter, + communication_period=10, + use_locking=True, + name='AGNOptimizer'): + """Construct a new AGN optimizer. + + Args: + optimizer: input optimizer, can be sgd/momentum/adam etc. + num_worker: The number of workers + custom_getter: The AGNCustomGetter + communication_period: An int point value to controls the frequency + of the communication between every worker and the ps. + use_locking: If True use locks for update operations. + name: Optional name prefix for the operations created when applying + gradients. Defaults to "AGNOptimizer". + + """ + super(AGNOptimizer, self).__init__(use_locking, name) + self._opt = optimizer + self._num_worker = num_worker + self._period = communication_period + self._global_map = custom_getter._global_map + self._grad_map = custom_getter._grad_map + self._local_step = variable_scope.get_variable( + initializer=0, + trainable=False, + collections=[ops.GraphKeys.LOCAL_VARIABLES], + name='local_step') + self._opt._prepare() + + def compute_gradients(self, + loss, + var_list=None, + gate_gradients=optimizer.Optimizer.GATE_OP, + aggregation_method=None, + colocate_gradients_with_ops=False, + grad_loss=None): + """Compute gradients of `loss` for the variables in `var_list`. + Args: + loss: A Tensor containing the value to minimize. + var_list: Optional list or tuple of `tf.Variable` to update to minimize + `loss`. Defaults to the list of variables collected in the graph + under the key `GraphKey.TRAINABLE_VARIABLES`. + gate_gradients: How to gate the computation of gradients. Can be + `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. + aggregation_method: Specifies the method used to combine gradient terms. + Valid values are defined in the class `AggregationMethod`. + colocate_gradients_with_ops: If True, try colocating gradients with + the corresponding op. + grad_loss: Optional. A `Tensor` holding the gradient computed for `loss` + + Returns: + A list of (gradient, variable) pairs. Variable is always present, but + gradient can be `None`. + """ + if not var_list: + var_list = variables.trainable_variables() + return self._opt.compute_gradients(loss, + var_list, + gate_gradients, + aggregation_method, + colocate_gradients_with_ops, + grad_loss) + + def _adjust_optimizer_variable_collection(self, opt_vars): + """ Move optimizer created variables to local collection + """ + g = ops.get_default_graph() + idx = 0 + for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): + var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] + name = var.op.name + if name in opt_vars: + ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) + del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] + else: + idx += 1 + + def apply_gradients(self, grads_and_vars, global_step=None, name=None): + """Apply gradients to global variables. + + This is the second part of `minimize()`. It returns an `Operation` that + applies gradients. + + Args: + grads_and_vars: List of (gradient, variable) pairs as returned by + `compute_gradients()`. + global_step: Optional `Variable` to increment by one after the + variables have been updated. + name: Optional name for the returned operation. Default to the + name passed to the `Optimizer` constructor. + + Returns: + An `Operation` that applies the specified gradients. If `global_step` + was not None, that operation also increments `global_step`. + """ + local_vars = [v for g, v in grads_and_vars if g is not None] + grads = [g for g, v in grads_and_vars if g is not None] + # theta = theta - lr * grad + global_old = set(n.op.name for n in variables.global_variables()) + local_update_op = self._opt.apply_gradients(grads_and_vars) + global_new = set(n.op.name for n in variables.global_variables()) + + self._adjust_optimizer_variable_collection(global_new - global_old) + + # a = a + grad + update_ops = [] + update_ops.append(local_update_op) + grad_vars = [self._grad_map[var] for var in local_vars] + for g, grad_var in zip (grads, grad_vars): + update_ops.append(state_ops.assign_add(grad_var, g)) + + global_center_vars = [self._global_map[var] for var in local_vars] + + # update global variables. + def _Update_global_variables(): + global_norm = [] + # a = a / t + for g in grad_vars: + global_norm.append(state_ops.assign(g, g / self._period)) + # apply + with ops.control_dependencies(global_norm): + apply_global_op = self._opt.apply_gradients(zip(grad_vars, + global_center_vars)) + + # pull + with ops.control_dependencies([apply_global_op]): + update_ops = [] + if global_step: + with ops.colocate_with(global_step): + update_ops.append(state_ops.assign_add(global_step, 1)) + + for lvar in local_vars: + g_val = self._global_map[lvar].read_value() + update_ops.append(state_ops.assign(lvar, g_val)) + for grad_var in grad_vars: + update_ops.append(state_ops.assign(grad_var, + array_ops.zeros_like(grad_var))) + variable_update = control_flow_ops.group(*(update_ops)) + return variable_update + + local_update = state_ops.assign_add( + self._local_step, 1, name='local_step_update').op + + with ops.control_dependencies([local_update]): + condition = math_ops.equal( + math_ops.mod(self._local_step, self._period), 0) + with ops.control_dependencies(update_ops): + conditional_update = control_flow_ops.cond( + condition, _Update_global_variables, control_flow_ops.no_op) + return conditional_update + + def get_init_op(self, task_index): + """Returns the op to let all the local variables and local center + variables equal to the global center variables before the training begins + """ + init_ops = [] + local_vars = variables.trainable_variables() + global_center_vars = [self._global_map[var] for var in local_vars] + grad_vars = [self._grad_map[var] for var in local_vars] + if not (local_vars and global_center_vars and grad_vars): + raise ValueError('The lists of local_variables, global_center_variables,' + 'grad_center_variables should not be empty') + for lvar, gc_var in zip(local_vars, global_center_vars): + init_ops.append(state_ops.assign(gc_var, lvar)) + for g in grad_vars: + init_ops.append(state_ops.assign(g, array_ops.zeros_like(g))) + init_op = control_flow_ops.group(*(init_ops)) + return init_op + + def make_session_run_hook(self, is_chief, task_index): + """Creates a hook to handle AGNOptimizerHook ops such as initialization.""" + return _AGNOptimizerHook(self, is_chief, task_index) + + +class _AGNOptimizerHook(session_run_hook.SessionRunHook): + + def __init__(self, agn_optimizer, is_chief, task_index): + """Creates hook to handle AGNOptimizer initialization ops. + + Args: + agn_optimizer: `AGNOptimizer` which this hook will initialize. + is_chief: `Bool`, whether is this a chief replica or not. + task_index: int, task_index of worker + """ + self._agn_optimizer = agn_optimizer + self._is_chief = is_chief + self._task_index = task_index + + def begin(self): + self._local_init_op = variables.local_variables_initializer() + self._global_init_op = None + if self._is_chief: + self._global_init_op = variables.global_variables_initializer() + self._variable_init_op = self._agn_optimizer.get_init_op(self._task_index) + + def after_create_session(self, session, coord): + """Run initialization ops""" + session.run(self._variable_init_op) diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py new file mode 100644 index 0000000000..091943de02 --- /dev/null +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -0,0 +1,279 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Tests for EAOptimizer.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import portpicker + +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import device_setter +from tensorflow.python.training import momentum +from tensorflow.python.training import server_lib +from tensorflow.python.training import training +from tensorflow.python.training import training_util + +from tensorflow.contrib.opt.python.training.agn_optimizer import \ + AGNOptimizer, AGNCustomGetter, GLOBAL_VARIABLE_NAME + + +def create_local_cluster(num_workers, num_ps, protocol="grpc"): + """Create local GRPC servers and return them.""" + worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)] + ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)] + cluster_dict = { + "worker": ["localhost:%s" % port for port in worker_ports], + "ps": ["localhost:%s" % port for port in ps_ports] + } + cs = server_lib.ClusterSpec(cluster_dict) + + workers = [ + server_lib.Server( + cs, job_name="worker", protocol=protocol, task_index=ix, start=True) + for ix in range(num_workers) + ] + ps_servers = [ + server_lib.Server( + cs, job_name="ps", protocol=protocol, task_index=ix, start=True) + for ix in range(num_ps) + ] + + return cluster_dict, workers, ps_servers + + +# Creates the workers and return their sessions, graphs, train_ops. +# Cheif worker will update at last +def _get_workers(num_workers, period, workers, num_ps=1): + sessions = [] + graphs = [] + train_ops = [] + for worker_id in range(num_workers): + graph = ops.Graph() + is_chief = (worker_id == 0) + with graph.as_default(): + worker_device = "/job:worker/task:%d/cpu:0" % (worker_id) + ps_device = device_setter.replica_device_setter( + worker_device=worker_device, + ps_device="/job:ps/task:0/cpu:0", + ps_tasks=1) + agn_getter = AGNCustomGetter(worker_device=worker_device) + with variable_scope.variable_scope( + "", custom_getter=agn_getter), ops.device(ps_device): + global_step = training_util.get_or_create_global_step() + var_0 = variable_scope.get_variable(initializer=0.0, name="v0") + var_1 = variable_scope.get_variable(initializer=0.5, name="v1") + if num_ps > 1: + with variable_scope.variable_scope("", + partitioner=partitioned_variables.fixed_size_partitioner( + num_ps, axis=0), + custom_getter=agn_getter), ops.device(ps_device): + + partition_var = variable_scope.get_variable( + 'partition_var', + shape=[2, 4], + initializer=init_ops.zeros_initializer) + part_0 = list(partition_var)[0] + part_1 = list(partition_var)[1] + + with ops.device("/job:worker/task:" + str(worker_id)): + grads_0 = constant_op.constant(-1.0) + grads_1 = constant_op.constant(-1.0) + grads_part_0 = constant_op.constant([[-1., -1., -1., -1.]]) + grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]]) + + optimizer = \ + momentum.MomentumOptimizer(learning_rate=0.1, momentum=0.0) + opt = AGNOptimizer( + optimizer, + num_worker=num_workers, + communication_period=period, + custom_getter=agn_getter) + if num_ps == 1: + train_op = [ + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), + global_step) + ] + else: + train_op = [ + opt.apply_gradients(([grads_0, var_0], + [grads_1, var_1], + [grads_part_0, part_0], + [grads_part_1, part_1]), + global_step) + ] + hook = opt.make_session_run_hook(is_chief, worker_id) + # Creates MonitoredSession + sess = training.MonitoredTrainingSession( + workers[worker_id].target, hooks=[hook]) + + sessions.append(sess) + graphs.append(graph) + train_ops.append(train_op) + + return sessions, graphs, train_ops + + +class AGNOptimizerTest(test.TestCase): + + def _run(self, train_op, sess): + sess.run(train_op) + + def test1Workers2Period(self): + num_workers = 1 + communication_period = 4 + num_ps = 1 + _, workers, _ = create_local_cluster( + num_workers=num_workers, num_ps=num_ps) + + sessions, graphs, train_ops = _get_workers( + num_workers, communication_period, workers) + + var_0 = graphs[0].get_tensor_by_name("v0:0") + var_1 = graphs[0].get_tensor_by_name("v1:0") + global_step = training_util.get_global_step(graphs[0]) + var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") + var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + + # Verify the initialized value. + self.assertAllEqual(0.0, sessions[0].run(var_0)) + self.assertAllEqual(0.5, sessions[0].run(var_1)) + self.assertAllEqual(0.0, sessions[0].run(var_0_g)) + self.assertAllEqual(0.5, sessions[0].run(var_1_g)) + self.assertAllEqual(0, sessions[0].run(global_step)) + # step 0 + sessions[0].run(train_ops[0]) + self.assertNear(0.1, sessions[0].run(var_0), 1e-6) + self.assertNear(0.6, sessions[0].run(var_1), 1e-6) + self.assertAllEqual(0.0, sessions[0].run(var_0_g)) + self.assertAllEqual(0.5, sessions[0].run(var_1_g)) + self.assertAllEqual(0, sessions[0].run(global_step)) + + # 2 & 3 + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + self.assertNear(0.3, sessions[0].run(var_0), 1e-6) + self.assertNear(0.8, sessions[0].run(var_1), 1e-6) + + # 4 + sessions[0].run(train_ops[0]) + # pull + self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g)) + self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g)) + self.assertNear(0.1, sessions[0].run(var_0), 1e-6) + self.assertNear(0.6, sessions[0].run(var_1), 1e-6) + + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + sessions[0].run(train_ops[0]) + self.assertAllEqual(sessions[0].run(var_0), sessions[0].run(var_0_g)) + self.assertAllEqual(sessions[0].run(var_1), sessions[0].run(var_1_g)) + self.assertNear(0.2, sessions[0].run(var_0), 1e-6) + self.assertNear(0.7, sessions[0].run(var_1), 1e-6) + + def test2Worker1Period(self): + num_workers = 2 + communication_period = 1 + num_ps = 2 + _, workers, _ = create_local_cluster( + num_workers=num_workers, num_ps=num_ps) + + sessions, graphs, train_ops = _get_workers( + num_workers, communication_period, workers, num_ps=2) + + var_0 = graphs[0].get_tensor_by_name("v0:0") + var_1 = graphs[0].get_tensor_by_name("v1:0") + + var_0_1 = graphs[1].get_tensor_by_name("v0:0") + var_1_1 = graphs[1].get_tensor_by_name("v1:0") + + var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") + var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + part_0_g = graphs[0].get_tensor_by_name( + GLOBAL_VARIABLE_NAME + "/partition_var/part_0:0") + part_1_g = graphs[0].get_tensor_by_name( + GLOBAL_VARIABLE_NAME + "/partition_var/part_1:0") + + # Verify the initialized value. + self.assertAllEqual(0.0, sessions[0].run(var_0)) + self.assertAllEqual(0.5, sessions[0].run(var_1)) + self.assertAllEqual(0.0, sessions[1].run(var_0_1)) + self.assertAllEqual(0.5, sessions[1].run(var_1_1)) + self.assertAllEqual(0.0, sessions[0].run(var_0_g)) + self.assertAllEqual(0.5, sessions[0].run(var_1_g)) + + # verify each step + sessions[0].run(train_ops[0]) + self.assertNear(0.1, sessions[0].run(var_0_g), 1e-6) + self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], + sessions[0].run(part_0_g), + 1e-6) + self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], + sessions[0].run(part_1_g), + 1e-6) + + sessions[1].run(train_ops[1]) + self.assertNear(0.2, sessions[0].run(var_0_g), 1e-6) + self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], + sessions[0].run(part_0_g), + 1e-6) + self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], + sessions[0].run(part_1_g), + 1e-6) + + sessions[0].run(train_ops[0]) + sessions[1].run(train_ops[1]) + + sessions[0].run(train_ops[0]) + sessions[1].run(train_ops[1]) + self.assertNear(0.6, sessions[0].run(var_0_g), 1e-6) + self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], + sessions[0].run(part_0_g), + 1e-6) + self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], + sessions[0].run(part_1_g), + 1e-6) + + def testAGNCustomGetter(self): + cluster_spec = server_lib.ClusterSpec({ + "ps": ["ps0:2222", "ps1:2222"], + "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] + }) + agn_getter = AGNCustomGetter(worker_device="/job:worker/task:0") + from tensorflow.python.training import device_setter + with ops.device( + device_setter.replica_device_setter(cluster=cluster_spec, + worker_device="/job:worker/task:0", + ps_device="/job:ps")), \ + variable_scope.variable_scope("", custom_getter=agn_getter): + v = variable_scope.get_variable(initializer=[1, 2], name="v") + w = variable_scope.get_variable(initializer=[2, 1], name="w") + v_g, w_g = agn_getter._global_map[v], agn_getter._global_map[w] + self.assertDeviceEqual("/job:worker/task:0", v.device) + self.assertDeviceEqual("job:ps/task:0", v_g.device) + self.assertDeviceEqual("/job:worker/task:0", w.device) + self.assertDeviceEqual("job:ps/task:1", w_g.device) + + +if __name__ == "__main__": + test.main() -- cgit v1.2.3 From 512f95d4b5e350fa0709aeef975730f22112b970 Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Fri, 24 Aug 2018 11:34:10 -0700 Subject: [Intel MKL] Adding cc tests to the MKL public CI tests. --- tensorflow/tools/ci_build/linux/cpu/run_mkl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh index 2a9f295188..7be5f454ec 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh @@ -33,7 +33,7 @@ yes "" | $PYTHON_BIN_PATH configure.py # Setting KMP_BLOCKTIME to 0 lets OpenMP threads to sleep right after parallel execution # in an MKL primitive. This reduces the effects of an oversubscription of OpenMP threads # caused by executing multiple tests concurrently. -bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=py -k \ +bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=cc,py -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only \ --config=mkl --test_env=KMP_BLOCKTIME=0 --config=opt --test_output=errors -- \ //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... -- cgit v1.2.3 From 44dc83c18dfb8fff5525422e6c08a468aca4fb65 Mon Sep 17 00:00:00 2001 From: weidankong Date: Fri, 24 Aug 2018 11:52:18 -0700 Subject: AGN: clear unused imports --- tensorflow/contrib/opt/python/training/agn_optimizer.py | 4 ---- tensorflow/contrib/opt/python/training/agn_optimizer_test.py | 2 -- 2 files changed, 6 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index dc1f8d6347..dd058bc26e 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -16,12 +16,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import logging_ops @@ -31,7 +28,6 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import optimizer from tensorflow.python.training import session_run_hook -from tensorflow.python.training import gradient_descent GLOBAL_VARIABLE_NAME = 'global_center_variable' diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index 091943de02..4e2200fa1a 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -19,13 +19,11 @@ from __future__ import print_function import portpicker -from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import device_setter from tensorflow.python.training import momentum -- cgit v1.2.3 From f8ee9799e6a72d4fe24f9fad76d6e6b1b3a01af1 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sat, 25 Aug 2018 07:03:07 +0800 Subject: ENH: raise exception if unsupported features/columns is given --- .../python/estimator/canned/boosted_trees.py | 9 +- .../python/estimator/canned/boosted_trees_test.py | 97 ++++++++++++---------- 2 files changed, 63 insertions(+), 43 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/canned/boosted_trees.py b/tensorflow/python/estimator/canned/boosted_trees.py index f2a5b9178b..66784fad0c 100644 --- a/tensorflow/python/estimator/canned/boosted_trees.py +++ b/tensorflow/python/estimator/canned/boosted_trees.py @@ -204,6 +204,9 @@ def _generate_feature_name_mapping(sorted_feature_columns): Returns: feature_name_mapping: a list of feature names indexed by the feature ids. + + Raises: + ValueError: when unsupported features/columns are tried. """ names = [] for column in sorted_feature_columns: @@ -221,8 +224,12 @@ def _generate_feature_name_mapping(sorted_feature_columns): else: for num in range(categorical_column._num_buckets): # pylint:disable=protected-access names.append('{}:{}'.format(column.name, num)) - else: + elif isinstance(column, feature_column_lib._BucketizedColumn): names.append(column.name) + else: + raise ValueError( + 'For now, only bucketized_column and indicator_column is supported ' + 'but got: {}'.format(column)) return names diff --git a/tensorflow/python/estimator/canned/boosted_trees_test.py b/tensorflow/python/estimator/canned/boosted_trees_test.py index 7620f73425..14c05e024d 100644 --- a/tensorflow/python/estimator/canned/boosted_trees_test.py +++ b/tensorflow/python/estimator/canned/boosted_trees_test.py @@ -892,6 +892,49 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): 'all empty or contain only a root node'): est.experimental_feature_importances(normalize=True) + def testNegativeFeatureImportances(self): + est = boosted_trees.BoostedTreesClassifier( + feature_columns=self._feature_columns, + n_batches_per_layer=1, + n_trees=1, + max_depth=5) + + # In order to generate a negative feature importances, + # We assign an invalid value -1 to tree_weights here. + tree_ensemble_text = """ + trees { + nodes { + bucketized_split { + feature_id: 1 + left_id: 1 + right_id: 2 + } + metadata { + gain: 5.0 + } + } + nodes { + leaf { + scalar: -0.34 + } + } + nodes { + leaf { + scalar: 1.34 + } + } + } + tree_weights: -1.0 + """ + self._create_fake_checkpoint_with_tree_ensemble_proto( + est, tree_ensemble_text) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=False) + + with self.assertRaisesRegexp(AssertionError, 'non-negative'): + est.experimental_feature_importances(normalize=True) + def testFeatureImportancesNamesForCategoricalColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) @@ -1015,48 +1058,18 @@ class BoostedTreesEstimatorTest(test_util.TensorFlowTestCase): self.assertAllEqual(feature_names_expected, feature_names) self.assertAllClose([0.5, 0.2, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0], importances) - def testNegativeFeatureImportances(self): - est = boosted_trees.BoostedTreesClassifier( - feature_columns=self._feature_columns, - n_batches_per_layer=1, - n_trees=1, - max_depth=5) - - # In order to generate a negative feature importances, - # We assign an invalid value -1 to tree_weights here. - tree_ensemble_text = """ - trees { - nodes { - bucketized_split { - feature_id: 1 - left_id: 1 - right_id: 2 - } - metadata { - gain: 5.0 - } - } - nodes { - leaf { - scalar: -0.34 - } - } - nodes { - leaf { - scalar: 1.34 - } - } - } - tree_weights: -1.0 - """ - self._create_fake_checkpoint_with_tree_ensemble_proto( - est, tree_ensemble_text) - - with self.assertRaisesRegexp(AssertionError, 'non-negative'): - est.experimental_feature_importances(normalize=False) - - with self.assertRaisesRegexp(AssertionError, 'non-negative'): - est.experimental_feature_importances(normalize=True) + def testFeatureImportancesNamesForUnsupportedColumn(self): + numeric_col = feature_column.numeric_column( + 'continuous', dtype=dtypes.float32) + + with self.assertRaisesRegexp(ValueError, + 'only bucketized_column and indicator_column'): + _ = boosted_trees.BoostedTreesRegressor( + feature_columns=[numeric_col], + n_batches_per_layer=1, + n_trees=2, + learning_rate=1.0, + max_depth=1) class ModelFnTests(test_util.TensorFlowTestCase): -- cgit v1.2.3 From 7e91ec68c7df088c306cc56cce621aee7ff53c94 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 24 Aug 2018 22:13:21 -0700 Subject: Added more unit tests and upgraded to the device-less bridge. --- WORKSPACE | 6 ++++++ tensorflow/workspace.bzl | 20 ++++++++++---------- third_party/ngraph/ngraph_tf.BUILD | 34 +++++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 21 deletions(-) (limited to 'tensorflow') diff --git a/WORKSPACE b/WORKSPACE index 4af1a1e75f..15aa24f3c1 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -86,3 +86,9 @@ new_local_repository( build_file = "//third_party/ngraph:ngraph.BUILD", ) +new_local_repository( + name = "ngraph_tf", + path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph-tf", + build_file = "//third_party/ngraph:ngraph_tf.BUILD", +) + diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 951cb8a89d..a5dc95d609 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -865,16 +865,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), ) - tf_http_archive( - name = "ngraph_tf", - urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - ], - sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", - strip_prefix = "ngraph-tf-0.3.0-rc1", - build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), - ) + # tf_http_archive( + # name = "ngraph_tf", + # urls = [ + # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", + # "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", + # ], + # sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", + # strip_prefix = "ngraph-tf-0.3.0-rc1", + # build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), + # ) ############################################################################## # BIND DEFINITIONS diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index d0231e468e..f40d2057e8 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -10,26 +10,35 @@ load( cc_library( name = "ngraph_tf", srcs = [ + "src/ngraph_assign_clusters.h", + "src/ngraph_assign_clusters.cc", "src/ngraph_builder.h", "src/ngraph_builder.cc", - "src/ngraph_cluster.h", - "src/ngraph_cluster.cc", + "src/ngraph_capture_variables.h", + "src/ngraph_capture_variables.cc", + "src/ngraph_conversions.h", "src/ngraph_cluster_manager.h", "src/ngraph_cluster_manager.cc", - "src/ngraph_confirm_pass.cc", - "src/ngraph_device.cc", + "src/ngraph_deassign_clusters.h", + "src/ngraph_deassign_clusters.cc", "src/ngraph_encapsulate_op.cc", - "src/ngraph_encapsulate_pass.cc", + "src/ngraph_encapsulate_clusters.h", + "src/ngraph_encapsulate_clusters.cc", "src/ngraph_freshness_tracker.h", "src/ngraph_freshness_tracker.cc", - "src/ngraph_graph_rewrite_passes.cc", - "src/ngraph_liberate_pass.cc", - "src/ngraph_op_kernels.cc", - "src/ngraph_stub_ops.cc", + # "src/ngraph_liberate_pass.cc", + # "src/ngraph_op_kernels.cc", + # "src/ngraph_stub_ops.cc", + "src/ngraph_mark_for_clustering.h", + "src/ngraph_mark_for_clustering.cc", + "src/ngraph_rewrite_pass.cc", + "src/ngraph_rewrite_for_tracking.h", + "src/ngraph_rewrite_for_tracking.cc", + "src/ngraph_tracked_variable.cc", "src/ngraph_utils.h", "src/ngraph_utils.cc", - "src/ngraph_send_recv_ops.cc", - "src/ngraph_variable_ops.cc", + # "src/ngraph_send_recv_ops.cc", + # "src/ngraph_variable_ops.cc", "src/tf_graphcycles.cc", "logging/ngraph_log.h", "logging/ngraph_log.cc", @@ -60,6 +69,9 @@ tf_cc_test( size = "small", srcs = [ "test/tf_exec.cpp", + "test/conversions.cpp", + "test/padding.cpp", + "test/graph_rewrites/assign_clusters.cc", "test/main.cpp", ], deps = [ -- cgit v1.2.3 From 607004e583ecbd9fb788aaf9b360a8d85cf167ac Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 27 Aug 2018 13:12:23 -0700 Subject: AGN: remove compute_gradient --- .../contrib/opt/python/training/agn_optimizer.py | 38 +--------------------- 1 file changed, 1 insertion(+), 37 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index dd058bc26e..f47ef5acc5 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -19,9 +19,7 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -134,40 +132,6 @@ class AGNOptimizer(optimizer.Optimizer): name='local_step') self._opt._prepare() - def compute_gradients(self, - loss, - var_list=None, - gate_gradients=optimizer.Optimizer.GATE_OP, - aggregation_method=None, - colocate_gradients_with_ops=False, - grad_loss=None): - """Compute gradients of `loss` for the variables in `var_list`. - Args: - loss: A Tensor containing the value to minimize. - var_list: Optional list or tuple of `tf.Variable` to update to minimize - `loss`. Defaults to the list of variables collected in the graph - under the key `GraphKey.TRAINABLE_VARIABLES`. - gate_gradients: How to gate the computation of gradients. Can be - `GATE_NONE`, `GATE_OP`, or `GATE_GRAPH`. - aggregation_method: Specifies the method used to combine gradient terms. - Valid values are defined in the class `AggregationMethod`. - colocate_gradients_with_ops: If True, try colocating gradients with - the corresponding op. - grad_loss: Optional. A `Tensor` holding the gradient computed for `loss` - - Returns: - A list of (gradient, variable) pairs. Variable is always present, but - gradient can be `None`. - """ - if not var_list: - var_list = variables.trainable_variables() - return self._opt.compute_gradients(loss, - var_list, - gate_gradients, - aggregation_method, - colocate_gradients_with_ops, - grad_loss) - def _adjust_optimizer_variable_collection(self, opt_vars): """ Move optimizer created variables to local collection """ @@ -268,7 +232,7 @@ class AGNOptimizer(optimizer.Optimizer): raise ValueError('The lists of local_variables, global_center_variables,' 'grad_center_variables should not be empty') for lvar, gc_var in zip(local_vars, global_center_vars): - init_ops.append(state_ops.assign(gc_var, lvar)) + init_ops.append(state_ops.assign(lvar, gc_var)) for g in grad_vars: init_ops.append(state_ops.assign(g, array_ops.zeros_like(g))) init_op = control_flow_ops.group(*(init_ops)) -- cgit v1.2.3 From 8d226fe074d18aadf98a869755e7d432341ba882 Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 27 Aug 2018 15:59:54 -0700 Subject: AGN: use variable_creator_scope to move variables from GLOBAL_VARIABLES to LOCAL VARIABLES --- tensorflow/contrib/opt/python/training/agn_optimizer.py | 15 ++++++++++----- .../contrib/opt/python/training/agn_optimizer_test.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index f47ef5acc5..8f415c75b9 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -166,12 +166,17 @@ class AGNOptimizer(optimizer.Optimizer): """ local_vars = [v for g, v in grads_and_vars if g is not None] grads = [g for g, v in grads_and_vars if g is not None] + def _variable_creator(next_creator, collections, **kwargs): + if not collections: + collections = [ops.GraphKeys.LOCAL_VARIABLES] + elif ops.GraphKeys.GLOBAL_VARIABLES in collections: + collections = list(collections) + collections.append(ops.GraphKeys.LOCAL_VARIABLES) + collections.remove(ops.GraphKeys.GLOBAL_VARIABLES) + return next_creator(collections=collections, **kwargs) # theta = theta - lr * grad - global_old = set(n.op.name for n in variables.global_variables()) - local_update_op = self._opt.apply_gradients(grads_and_vars) - global_new = set(n.op.name for n in variables.global_variables()) - - self._adjust_optimizer_variable_collection(global_new - global_old) + with variable_scope.variable_creator_scope(_variable_creator): + local_update_op = self._opt.apply_gradients(grads_and_vars) # a = a + grad update_ops = [] diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index 4e2200fa1a..a2302d2f11 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -23,10 +23,11 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variables from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test from tensorflow.python.training import device_setter -from tensorflow.python.training import momentum +from tensorflow.python.training import adam from tensorflow.python.training import server_lib from tensorflow.python.training import training from tensorflow.python.training import training_util @@ -100,7 +101,7 @@ def _get_workers(num_workers, period, workers, num_ps=1): grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]]) optimizer = \ - momentum.MomentumOptimizer(learning_rate=0.1, momentum=0.0) + adam.AdamOptimizer(learning_rate=0.1, beta1=0.0, beta2=0.0) opt = AGNOptimizer( optimizer, num_worker=num_workers, @@ -152,6 +153,13 @@ class AGNOptimizerTest(test.TestCase): var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + # verify adam/beta variables not in global collection + with graphs[0].as_default(): + for ele in variables.global_variables(): + self.assertTrue(ele.op.name.find('beta') < 0) + if ele.op.name.find('global_center_variable') < 0: + self.assertTrue(ele.op.name.find('Adam') < 0) + # Verify the initialized value. self.assertAllEqual(0.0, sessions[0].run(var_0)) self.assertAllEqual(0.5, sessions[0].run(var_1)) -- cgit v1.2.3 From 540ca4a8755a3670920b49647860d085df834a00 Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 27 Aug 2018 17:03:47 -0700 Subject: AGN: fix Sanity test --- .../contrib/opt/python/training/agn_optimizer.py | 19 ++--------- .../opt/python/training/agn_optimizer_test.py | 37 +++++++++++----------- 2 files changed, 21 insertions(+), 35 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer.py b/tensorflow/contrib/opt/python/training/agn_optimizer.py index 8f415c75b9..9fb5be56e6 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer.py @@ -19,7 +19,6 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -132,20 +131,6 @@ class AGNOptimizer(optimizer.Optimizer): name='local_step') self._opt._prepare() - def _adjust_optimizer_variable_collection(self, opt_vars): - """ Move optimizer created variables to local collection - """ - g = ops.get_default_graph() - idx = 0 - for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): - var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] - name = var.op.name - if name in opt_vars: - ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) - del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] - else: - idx += 1 - def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to global variables. @@ -182,7 +167,7 @@ class AGNOptimizer(optimizer.Optimizer): update_ops = [] update_ops.append(local_update_op) grad_vars = [self._grad_map[var] for var in local_vars] - for g, grad_var in zip (grads, grad_vars): + for g, grad_var in zip(grads, grad_vars): update_ops.append(state_ops.assign_add(grad_var, g)) global_center_vars = [self._global_map[var] for var in local_vars] @@ -215,7 +200,7 @@ class AGNOptimizer(optimizer.Optimizer): return variable_update local_update = state_ops.assign_add( - self._local_step, 1, name='local_step_update').op + self._local_step, 1, name='local_step_update').op with ops.control_dependencies([local_update]): condition = math_ops.equal( diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index a2302d2f11..28732c2a1d 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -72,9 +72,9 @@ def _get_workers(num_workers, period, workers, num_ps=1): with graph.as_default(): worker_device = "/job:worker/task:%d/cpu:0" % (worker_id) ps_device = device_setter.replica_device_setter( - worker_device=worker_device, - ps_device="/job:ps/task:0/cpu:0", - ps_tasks=1) + worker_device=worker_device, + ps_device="/job:ps/task:0/cpu:0", + ps_tasks=1) agn_getter = AGNCustomGetter(worker_device=worker_device) with variable_scope.variable_scope( "", custom_getter=agn_getter), ops.device(ps_device): @@ -82,7 +82,8 @@ def _get_workers(num_workers, period, workers, num_ps=1): var_0 = variable_scope.get_variable(initializer=0.0, name="v0") var_1 = variable_scope.get_variable(initializer=0.5, name="v1") if num_ps > 1: - with variable_scope.variable_scope("", + with variable_scope.variable_scope( + "", partitioner=partitioned_variables.fixed_size_partitioner( num_ps, axis=0), custom_getter=agn_getter), ops.device(ps_device): @@ -109,12 +110,12 @@ def _get_workers(num_workers, period, workers, num_ps=1): custom_getter=agn_getter) if num_ps == 1: train_op = [ - opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), global_step) ] else: train_op = [ - opt.apply_gradients(([grads_0, var_0], + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1], [grads_part_0, part_0], [grads_part_1, part_1]), @@ -232,20 +233,20 @@ class AGNOptimizerTest(test.TestCase): sessions[0].run(train_ops[0]) self.assertNear(0.1, sessions[0].run(var_0_g), 1e-6) self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], - sessions[0].run(part_0_g), - 1e-6) + sessions[0].run(part_0_g), + 1e-6) self.assertNDArrayNear([0.1, 0.1, 0.1, 0.1], - sessions[0].run(part_1_g), - 1e-6) + sessions[0].run(part_1_g), + 1e-6) sessions[1].run(train_ops[1]) self.assertNear(0.2, sessions[0].run(var_0_g), 1e-6) self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], - sessions[0].run(part_0_g), - 1e-6) + sessions[0].run(part_0_g), + 1e-6) self.assertNDArrayNear([0.2, 0.2, 0.2, 0.2], - sessions[0].run(part_1_g), - 1e-6) + sessions[0].run(part_1_g), + 1e-6) sessions[0].run(train_ops[0]) sessions[1].run(train_ops[1]) @@ -254,11 +255,11 @@ class AGNOptimizerTest(test.TestCase): sessions[1].run(train_ops[1]) self.assertNear(0.6, sessions[0].run(var_0_g), 1e-6) self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], - sessions[0].run(part_0_g), - 1e-6) + sessions[0].run(part_0_g), + 1e-6) self.assertNDArrayNear([0.6, 0.6, 0.6, 0.6], - sessions[0].run(part_1_g), - 1e-6) + sessions[0].run(part_1_g), + 1e-6) def testAGNCustomGetter(self): cluster_spec = server_lib.ClusterSpec({ -- cgit v1.2.3 From ccb1af57af2532dfee1af73899d1970ac7a263e4 Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Tue, 28 Aug 2018 12:33:41 +0900 Subject: update golden & pylint --- tensorflow/python/kernel_tests/extract_volume_patches_op_test.py | 1 + tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py index 215474f6db..64757a3e07 100644 --- a/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py +++ b/tensorflow/python/kernel_tests/extract_volume_patches_op_test.py @@ -54,6 +54,7 @@ class ExtractVolumePatches(test.TestCase): name="im2col_3d") self.assertAllClose(patches, out_tensor.eval()) + # pylint: disable=bad-whitespace def testKsize1x1x1Stride1x1x1(self): """Verifies that for 1x1x1 kernel the output equals the input.""" image = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6]) + 1 diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 4f19627691..ba928eba9e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1060,6 +1060,10 @@ tf_module { name: "extract_image_patches" argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "extract_volume_patches" + argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "eye" argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\'], " -- cgit v1.2.3 From eafc3914b0356e013b888fb103d20a76faf5ee5c Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Tue, 28 Aug 2018 20:49:09 +0900 Subject: change golden/v2/tensorflow.pbtxt Running the API compatibility test only checks for pbtxt files under directory v1. Manually added extract_volume_patches under v2 as extract_image_patches is registered under v2 as well. --- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 5eb42b4db3..f7e63978da 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -1060,6 +1060,10 @@ tf_module { name: "extract_image_patches" argspec: "args=[\'images\', \'ksizes\', \'strides\', \'rates\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "extract_volume_patches" + argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "eye" argspec: "args=[\'num_rows\', \'num_columns\', \'batch_shape\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \"\", \'None\'], " -- cgit v1.2.3 From 40aee739c3d5c7aee63020f36b83aded09044efb Mon Sep 17 00:00:00 2001 From: weidankong Date: Tue, 28 Aug 2018 10:09:13 -0700 Subject: AGN: fix sanity failure --- tensorflow/contrib/opt/python/training/agn_optimizer_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py index 28732c2a1d..fc291f829f 100644 --- a/tensorflow/contrib/opt/python/training/agn_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/agn_optimizer_test.py @@ -111,15 +111,15 @@ def _get_workers(num_workers, period, workers, num_ps=1): if num_ps == 1: train_op = [ opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), - global_step) + global_step) ] else: train_op = [ opt.apply_gradients(([grads_0, var_0], - [grads_1, var_1], - [grads_part_0, part_0], - [grads_part_1, part_1]), - global_step) + [grads_1, var_1], + [grads_part_0, part_0], + [grads_part_1, part_1]), + global_step) ] hook = opt.make_session_run_hook(is_chief, worker_id) # Creates MonitoredSession -- cgit v1.2.3 From bb45e28b207f9a0d56f1b4a0d372b267e216ad04 Mon Sep 17 00:00:00 2001 From: Naurril Date: Wed, 29 Aug 2018 22:45:38 +0800 Subject: Code formatted --- tensorflow/core/common_runtime/parallel_concat_optimizer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc index 0f853ae52a..6af4ca4d96 100644 --- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -51,7 +51,7 @@ class ParallelConcatRemovePass : public GraphOptimizationPass { for (Node* n : matches) { AttrSlice n_attrs = n->attrs(); auto base_make_node = [n, &n_attrs](const string& op, - const string& name) { + const string& name) { NodeBuilder node_builder(name, op); node_builder.Device(n->requested_device()); string colo; -- cgit v1.2.3 From 1b166c7e6f30bf7179f31764b3615e63025a7472 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Fri, 20 Jul 2018 19:03:55 +0000 Subject: Rename CUDA GPU ID to platform GPU ID Rename CUDA GPU ID to platform GPU ID so the notion is applicable on both CUDA and ROCm platform. --- .../contrib/tensorrt/convert/convert_graph.cc | 8 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 13 +- .../core/common_runtime/gpu/gpu_bfc_allocator.cc | 11 +- .../core/common_runtime/gpu/gpu_bfc_allocator.h | 6 +- .../common_runtime/gpu/gpu_bfc_allocator_test.cc | 30 +-- .../common_runtime/gpu/gpu_cudamalloc_allocator.cc | 5 +- .../common_runtime/gpu/gpu_cudamalloc_allocator.h | 2 +- .../core/common_runtime/gpu/gpu_debug_allocator.cc | 10 +- .../core/common_runtime/gpu/gpu_debug_allocator.h | 4 +- .../common_runtime/gpu/gpu_debug_allocator_test.cc | 59 +++--- tensorflow/core/common_runtime/gpu/gpu_device.cc | 224 +++++++++++---------- tensorflow/core/common_runtime/gpu/gpu_device.h | 22 +- .../core/common_runtime/gpu/gpu_device_test.cc | 19 +- tensorflow/core/common_runtime/gpu/gpu_id.h | 32 +-- .../core/common_runtime/gpu/gpu_id_manager.cc | 38 ++-- .../core/common_runtime/gpu/gpu_id_manager.h | 12 +- .../core/common_runtime/gpu/gpu_id_manager_test.cc | 32 +-- tensorflow/core/common_runtime/gpu/gpu_id_utils.h | 37 ++-- .../core/common_runtime/gpu/gpu_process_state.cc | 15 +- .../core/grappler/clusters/single_machine.cc | 6 +- tensorflow/core/grappler/clusters/utils.cc | 13 +- tensorflow/core/grappler/clusters/utils.h | 2 +- tensorflow/core/grappler/clusters/utils_test.cc | 22 +- tensorflow/core/grappler/costs/utils.cc | 8 +- tensorflow/core/protobuf/config.proto | 2 +- 25 files changed, 333 insertions(+), 299 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b019c99882..f29f4d6deb 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -780,12 +780,12 @@ std::pair GetDeviceAndAllocator( // If device is not set, use the first found GPU device for the conversion. for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) { TfGpuId tf_gpu_id(tf_gpu_id_value); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (s.ok()) { VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device " - << cuda_gpu_id.value(); - cuda_device_id = cuda_gpu_id.value(); + << platform_gpu_id.value(); + cuda_device_id = platform_gpu_id.value(); GPUOptions gpu_options; // If the TF to Cuda gpu id mapping exist, the device and corresponding // allocator must have been initialized already, so the diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 2b42d81f47..88cf8d5980 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -565,21 +565,22 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources( new TRTInt8Calibrator(device_buffers_, batch_size, name())); const string label(name()); auto segment_graph = &segment_graph_; - const int cuda_gpu_id = ctx->device()->tensorflow_gpu_device_info()->gpu_id; - if (cuda_gpu_id < 0) { + const int platform_gpu_id = + ctx->device()->tensorflow_gpu_device_info()->gpu_id; + if (platform_gpu_id < 0) { LOG(ERROR) << "Can't get gpu_device_info from context->device()"; return tensorflow::errors::InvalidArgument( "Context->device doesn't contain device info!"); } const int64 workspace_size_bytes = workspace_size_; cres->thr_.reset(new std::thread([cres, label, segment_graph, shapes, - cuda_gpu_id, workspace_size_bytes]() { - VLOG(0) << "Starting calibration thread on device " << cuda_gpu_id + platform_gpu_id, workspace_size_bytes]() { + VLOG(0) << "Starting calibration thread on device " << platform_gpu_id << ", Calibration Resource @ " << cres; - auto err = cudaSetDevice(cuda_gpu_id); + auto err = cudaSetDevice(platform_gpu_id); if (err != cudaSuccess) { // TODO(aaroey): should return error here. - LOG(ERROR) << "Couldn't set cuda device to " << cuda_gpu_id + LOG(ERROR) << "Couldn't set cuda device to " << platform_gpu_id << " in calibration thread"; } // ConvertGraphDefToEngine() will try to build the engine. This thread diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc index 2d4c8d0201..c8db384b64 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc @@ -22,16 +22,17 @@ limitations under the License. namespace tensorflow { -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, - const string& name) - : GPUBFCAllocator(cuda_gpu_id, total_memory, GPUOptions(), name) {} +GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id, + size_t total_memory, const string& name) + : GPUBFCAllocator(platform_gpu_id, total_memory, GPUOptions(), name) {} -GPUBFCAllocator::GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, +GPUBFCAllocator::GPUBFCAllocator(PlatformGpuId platform_gpu_id, + size_t total_memory, const GPUOptions& gpu_options, const string& name) : BFCAllocator( new GPUMemAllocator( - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(), + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(), gpu_options.per_process_gpu_memory_fraction() > 1.0 || gpu_options.experimental().use_unified_memory()), total_memory, gpu_options.allow_growth(), name) {} diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h index f1cc2eace1..435ffb4959 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.h @@ -35,11 +35,11 @@ namespace tensorflow { // algorithm. class GPUBFCAllocator : public BFCAllocator { public: - // 'cuda_gpu_id' refers to the ID of the GPU device within + // 'platform_gpu_id' refers to the ID of the GPU device within // the process and must reference a valid ID in the process. - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory, const string& name); - GPUBFCAllocator(CudaGpuId cuda_gpu_id, size_t total_memory, + GPUBFCAllocator(PlatformGpuId platform_gpu_id, size_t total_memory, const GPUOptions& gpu_options, const string& name); virtual ~GPUBFCAllocator() {} diff --git a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc index 67caeb3495..518ccba580 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_bfc_allocator_test.cc @@ -46,7 +46,7 @@ static void CheckStats(Allocator* a, int64 num_allocs, int64 bytes_in_use, } TEST(GPUBFCAllocatorTest, NoDups) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); // Allocate a lot of raw pointers @@ -75,7 +75,7 @@ TEST(GPUBFCAllocatorTest, NoDups) { } TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); // Allocate 256 raw pointers of sizes between 100 bytes and about // a meg random::PhiloxRandom philox(123, 17); @@ -133,7 +133,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocations) { } TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); CheckStats(&a, 0, 0, 0, 0); float* first_ptr = a.Allocate(1024); @@ -168,18 +168,18 @@ TEST(GPUBFCAllocatorTest, ExerciseCoalescing) { } TEST(GPUBFCAllocatorTest, AllocateZeroBufSize) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); float* ptr = a.Allocate(0); EXPECT_EQ(nullptr, ptr); } TEST(GPUBFCAllocatorTest, TracksSizes) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); @@ -188,7 +188,7 @@ TEST(GPUBFCAllocatorTest, AllocatedVsRequested) { TEST(GPUBFCAllocatorTest, TestCustomMemoryLimit) { // Configure a 1MiB byte limit - GPUBFCAllocator a(CudaGpuId(0), 1 << 20, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 20, "GPU_0_bfc"); float* first_ptr = a.Allocate(1 << 6); float* second_ptr = a.Allocate(1 << 20); @@ -203,7 +203,7 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { options.set_allow_growth(true); // Max of 2GiB, but starts out small. - GPUBFCAllocator a(CudaGpuId(0), 1LL << 31, options, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1LL << 31, options, "GPU_0_bfc"); // Allocate 10 raw pointers of sizes between 100 bytes and about // 64 megs. @@ -264,8 +264,8 @@ TEST(GPUBFCAllocatorTest, AllocationsAndDeallocationsWithGrowth) { } TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { - GPUBFCAllocator a(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); - GPUBFCAllocator b(CudaGpuId(0), 1UL << 60, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc"); + GPUBFCAllocator b(PlatformGpuId(0), 1UL << 60, "GPU_0_bfc"); void* amem = a.AllocateRaw(1, 1); void* bmem = b.AllocateRaw(1, 1 << 30); a.DeallocateRaw(amem); @@ -273,7 +273,7 @@ TEST(GPUBFCAllocatorTest, DISABLED_AllocatorReceivesZeroMemory) { } static void BM_Allocation(int iters) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 524288, 512, 1048576, 10485760, 104857600, @@ -289,7 +289,7 @@ static void BM_Allocation(int iters) { BENCHMARK(BM_Allocation); static void BM_AllocationThreaded(int iters, int num_threads) { - GPUBFCAllocator a(CudaGpuId(0), 1uLL << 33, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1uLL << 33, "GPU_0_bfc"); thread::ThreadPool pool(Env::Default(), "test", num_threads); std::atomic_int_fast32_t count(iters); mutex done_lock; @@ -325,7 +325,7 @@ BENCHMARK(BM_AllocationThreaded)->Arg(1)->Arg(4)->Arg(16); // A more complex benchmark that defers deallocation of an object for // "delay" allocations. static void BM_AllocationDelayed(int iters, int delay) { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); // Exercise a few different allocation sizes std::vector sizes = {256, 4096, 16384, 4096, 512, 1024, 1024}; int size_index = 0; @@ -363,7 +363,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { // only methods inside this class can access private members of BFCAllocator. void TestBinDebugInfo() { - GPUBFCAllocator a(CudaGpuId(0), 1 << 30, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 << 30, "GPU_0_bfc"); std::vector initial_ptrs; std::vector initial_ptrs_allocated_sizes; @@ -441,7 +441,7 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test { } void TestLog2FloorNonZeroSlow() { - GPUBFCAllocator a(CudaGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); + GPUBFCAllocator a(PlatformGpuId(0), 1 /* total_memory */, "GPU_0_bfc"); EXPECT_EQ(-1, a.Log2FloorNonZeroSlow(0)); EXPECT_EQ(0, a.Log2FloorNonZeroSlow(1)); EXPECT_EQ(1, a.Log2FloorNonZeroSlow(2)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc index 934a57a5fb..553a5628ad 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.cc @@ -28,9 +28,10 @@ limitations under the License. namespace tensorflow { GPUcudaMallocAllocator::GPUcudaMallocAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id) + PlatformGpuId platform_gpu_id) : base_allocator_(allocator) { - stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + stream_exec_ = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); } GPUcudaMallocAllocator::~GPUcudaMallocAllocator() { delete base_allocator_; } diff --git a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h index 856fdc34b4..8f38cc5a18 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_cudamalloc_allocator.h @@ -32,7 +32,7 @@ namespace tensorflow { class GPUcudaMallocAllocator : public VisitableAllocator { public: explicit GPUcudaMallocAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + PlatformGpuId platform_gpu_id); ~GPUcudaMallocAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index e4c834b30d..badb021aa5 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -74,9 +74,10 @@ void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { // GPUDebugAllocator // ----------------------------------------------------------------------------- GPUDebugAllocator::GPUDebugAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id) + PlatformGpuId platform_gpu_id) : base_allocator_(allocator) { - stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + stream_exec_ = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); } GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; } @@ -159,9 +160,10 @@ bool GPUDebugAllocator::CheckFooter(void* ptr) { // GPUNanResetAllocator // ----------------------------------------------------------------------------- GPUNanResetAllocator::GPUNanResetAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id) + PlatformGpuId platform_gpu_id) : base_allocator_(allocator) { - stream_exec_ = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + stream_exec_ = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); } GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; } diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h index 0f9b72040c..9e007ed8c1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h @@ -34,7 +34,7 @@ namespace tensorflow { class GPUDebugAllocator : public VisitableAllocator { public: explicit GPUDebugAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + PlatformGpuId platform_gpu_id); ~GPUDebugAllocator() override; string Name() override { return "gpu_debug"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; @@ -66,7 +66,7 @@ class GPUDebugAllocator : public VisitableAllocator { class GPUNanResetAllocator : public VisitableAllocator { public: explicit GPUNanResetAllocator(VisitableAllocator* allocator, - CudaGpuId cuda_gpu_id); + PlatformGpuId platform_gpu_id); ~GPUNanResetAllocator() override; string Name() override { return "gpu_nan_reset"; } void* AllocateRaw(size_t alignment, size_t num_bytes) override; diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc index 236a0afa0b..bc3e3a8c35 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator_test.cc @@ -34,10 +34,11 @@ namespace tensorflow { namespace { TEST(GPUDebugAllocatorTest, OverwriteDetection_None) { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); - auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); + auto stream_exec = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); for (int s : {8}) { std::vector cpu_array(s); @@ -58,11 +59,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Header) { for (int s : {8, 211}) { EXPECT_DEATH( { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); auto stream_exec = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(s); memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); @@ -91,11 +92,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { for (int s : {8, 22}) { EXPECT_DEATH( { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); auto stream_exec = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(s); memset(&cpu_array[0], 0, cpu_array.size() * sizeof(int64)); @@ -121,10 +122,11 @@ TEST(GPUDebugAllocatorTest, OverwriteDetection_Footer) { } TEST(GPUDebugAllocatorTest, ResetToNan) { - const CudaGpuId cuda_gpu_id(0); - GPUNanResetAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); - auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + const PlatformGpuId platform_gpu_id(0); + GPUNanResetAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); + auto stream_exec = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(1024); std::vector cpu_array_result(1024); @@ -161,13 +163,14 @@ TEST(GPUDebugAllocatorTest, ResetToNan) { } TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { - const CudaGpuId cuda_gpu_id(0); + const PlatformGpuId platform_gpu_id(0); // NaN reset must be the outer-most allocator. GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id), - cuda_gpu_id); - auto stream_exec = GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id), + platform_gpu_id); + auto stream_exec = + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); std::vector cpu_array(1024); std::vector cpu_array_result(1024); @@ -204,18 +207,18 @@ TEST(GPUDebugAllocatorTest, ResetToNanWithHeaderFooter) { } TEST(GPUDebugAllocatorTest, TracksSizes) { - const CudaGpuId cuda_gpu_id(0); - GPUDebugAllocator a(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id); + const PlatformGpuId platform_gpu_id(0); + GPUDebugAllocator a(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id); EXPECT_EQ(true, a.TracksAllocationSizes()); } TEST(GPUDebugAllocatorTest, AllocatedVsRequested) { - const CudaGpuId cuda_gpu_id(0); + const PlatformGpuId platform_gpu_id(0); GPUNanResetAllocator a( - new GPUDebugAllocator(new GPUBFCAllocator(cuda_gpu_id, 1 << 30, ""), - cuda_gpu_id), - cuda_gpu_id); + new GPUDebugAllocator(new GPUBFCAllocator(platform_gpu_id, 1 << 30, ""), + platform_gpu_id), + platform_gpu_id); float* t1 = a.Allocate(1); EXPECT_EQ(4, a.RequestedSize(t1)); EXPECT_EQ(256, a.AllocatedSize(t1)); diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 2763ac0d4a..4bf23bc017 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -105,9 +105,9 @@ class EigenCudaStreamDevice : public ::Eigen::StreamInterface { reinterpret_cast(scratch + Eigen::kCudaScratchSize); stream_ = cuda_stream; allocator_ = alloc; - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - device_prop_ = &Eigen::m_deviceProperties[cuda_gpu_id.value()]; + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); + device_prop_ = &Eigen::m_deviceProperties[platform_gpu_id.value()]; } const cudaStream_t& stream() const override { return *stream_; } @@ -332,9 +332,10 @@ Status BaseGPUDevice::Init(const SessionOptions& options) { gpu_device_info_->stream = streams_[0]->compute; gpu_device_info_->default_context = device_contexts_[0]; gpu_device_info_->event_mgr = em_.get(); - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id)); - gpu_device_info_->gpu_id = cuda_gpu_id.value(); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id)); + gpu_device_info_->gpu_id = platform_gpu_id.value(); set_tensorflow_gpu_device_info(gpu_device_info_); // Whether and how the GPU device uses its own threadpool. @@ -690,9 +691,9 @@ class ConcretePerOpGpuDevice : public PerOpGpuDevice { Eigen::GpuDevice device_; }; -// Parse 'visible_device_list' into a list of CUDA GPU ids. +// Parse 'visible_device_list' into a list of platform GPU ids. Status ParseVisibleDeviceList(const string& visible_device_list, - std::vector* visible_gpu_order) { + std::vector* visible_gpu_order) { visible_gpu_order->clear(); se::Platform* gpu_manager = GPUMachineManager(); @@ -707,26 +708,28 @@ Status ParseVisibleDeviceList(const string& visible_device_list, } else { const std::vector order_str = str_util::Split(visible_device_list, ','); - for (const string& cuda_gpu_id_str : order_str) { - int32 cuda_gpu_id; - if (!strings::safe_strto32(cuda_gpu_id_str, &cuda_gpu_id)) { + for (const string& platform_gpu_id_str : order_str) { + int32 platform_gpu_id; + if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) { return errors::InvalidArgument( "Could not parse entry in 'visible_device_list': '", - cuda_gpu_id_str, "'. visible_device_list = ", visible_device_list); + platform_gpu_id_str, "'. visible_device_list = ", + visible_device_list); } - if (cuda_gpu_id < 0 || cuda_gpu_id >= gpu_manager->VisibleDeviceCount()) { + if (platform_gpu_id < 0 || + platform_gpu_id >= gpu_manager->VisibleDeviceCount()) { return errors::InvalidArgument( - "'visible_device_list' listed an invalid GPU id '", cuda_gpu_id, + "'visible_device_list' listed an invalid GPU id '", platform_gpu_id, "' but visible device count is ", gpu_manager->VisibleDeviceCount()); } - visible_gpu_order->push_back(CudaGpuId(cuda_gpu_id)); + visible_gpu_order->push_back(PlatformGpuId(platform_gpu_id)); } } // Validate no repeats. - std::set visible_device_set(visible_gpu_order->begin(), - visible_gpu_order->end()); + std::set visible_device_set(visible_gpu_order->begin(), + visible_gpu_order->end()); if (visible_device_set.size() != visible_gpu_order->size()) { return errors::InvalidArgument( "visible_device_list contained a duplicate entry: ", @@ -737,8 +740,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list, Status VerifyVirtualDeviceSettings( const size_t num_gpus_to_use, const GPUOptions& gpu_options, - const std::vector& visible_gpu_order, - const std::vector& valid_cuda_gpu_ids) { + const std::vector& visible_gpu_order, + const std::vector& valid_platform_gpu_ids) { const auto& virtual_devices = gpu_options.experimental().virtual_devices(); CHECK(!virtual_devices.empty()); if (gpu_options.per_process_gpu_memory_fraction() > 0) { @@ -760,11 +763,11 @@ Status VerifyVirtualDeviceSettings( " #GPUs in visible_device_list: ", visible_gpu_order.size(), " virtual_devices.size(): ", virtual_devices.size()); } - if (valid_cuda_gpu_ids.size() != virtual_devices.size()) { + if (valid_platform_gpu_ids.size() != virtual_devices.size()) { return errors::Unknown( "The number of valid GPUs doesn't match the number of elements in " "the virtual_devices list.", - " #valid GPUs: ", valid_cuda_gpu_ids.size(), + " #valid GPUs: ", valid_platform_gpu_ids.size(), " virtual_devices.size(): ", virtual_devices.size()); } return Status::OK(); @@ -806,18 +809,18 @@ int64 MinSystemMemory(int64 available_memory) { } // Get the memory limit for the virtual device being created on GPU with -// 'cuda_gpu_id', when that virtual device is the only virtual device being +// 'platform_gpu_id', when that virtual device is the only virtual device being // created on that GPU. Status SingleVirtualDeviceMemoryLimit(const GPUOptions& gpu_options, - CudaGpuId cuda_gpu_id, + PlatformGpuId platform_gpu_id, int64* memory_limit) { int64 total_memory = 0; int64 available_memory = 0; se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); if (!se->DeviceMemoryUsage(&available_memory, &total_memory)) { return errors::Unknown("Failed to query available memory for GPU ", - cuda_gpu_id.value()); + platform_gpu_id.value()); } int64 allocated_memory = 0; @@ -916,8 +919,8 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, num_gpus_to_use = iter->second; } const auto& gpu_options = options.config.gpu_options(); - std::vector visible_gpu_order; - std::vector valid_cuda_gpu_ids; + std::vector visible_gpu_order; + std::vector valid_platform_gpu_ids; // If we aren't going to use any GPUs, don't initialize them. // We don't want to call ParseVisibleDeviceList if num_gpus_to_use is 0, // because it treats an empty gpu_options.visible_device_list as 'all GPUs are @@ -926,12 +929,12 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, TF_RETURN_IF_ERROR(ParseVisibleDeviceList(gpu_options.visible_device_list(), &visible_gpu_order)); TF_RETURN_IF_ERROR( - GetValidDeviceIds(visible_gpu_order, &valid_cuda_gpu_ids)); + GetValidDeviceIds(visible_gpu_order, &valid_platform_gpu_ids)); } - if (num_gpus_to_use > valid_cuda_gpu_ids.size()) { - num_gpus_to_use = valid_cuda_gpu_ids.size(); + if (num_gpus_to_use > valid_platform_gpu_ids.size()) { + num_gpus_to_use = valid_platform_gpu_ids.size(); } - if (!valid_cuda_gpu_ids.empty()) { + if (!valid_platform_gpu_ids.empty()) { // Save the original device. int original_device = 0; cudaError_t err = cudaGetDevice(&original_device); @@ -941,17 +944,18 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, } // Force to implicitly initialize CUDA runtime on each valid GPU before // CreateGPUDevice(). - for (CudaGpuId cuda_gpu_id : valid_cuda_gpu_ids) { - err = cudaSetDevice(cuda_gpu_id.value()); + for (PlatformGpuId platform_gpu_id : valid_platform_gpu_ids) { + err = cudaSetDevice(platform_gpu_id.value()); if (err != cudaSuccess) { - return errors::Internal("cudaSetDevice() on GPU:", cuda_gpu_id.value(), - " failed. Status: ", cudaGetErrorString(err)); + return errors::Internal("cudaSetDevice() on GPU:", + platform_gpu_id.value(), " failed. Status: ", + cudaGetErrorString(err)); } err = cudaFree(nullptr); if (err != cudaSuccess) { - return errors::Internal( - "CUDA runtime implicit initialization on GPU:", cuda_gpu_id.value(), - " failed. Status: ", cudaGetErrorString(err)); + return errors::Internal("CUDA runtime implicit initialization on GPU:", + platform_gpu_id.value(), " failed. Status: ", + cudaGetErrorString(err)); } } // Reset to the original device. @@ -977,10 +981,10 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, LOG(INFO) << line_buf; for (int i = 0; i < visible_gpu_order.size(); ++i) { line_buf = strings::StrCat(visible_gpu_order[i].value(), ": "); - CudaGpuId cuda_id_i = visible_gpu_order[i]; + PlatformGpuId gpu_id_i = visible_gpu_order[i]; for (int j = 0; j < visible_gpu_order.size(); ++j) { - CudaGpuId cuda_id_j = visible_gpu_order[j]; - if (im.directed_links.find({cuda_id_i, cuda_id_j}) != + PlatformGpuId gpu_id_j = visible_gpu_order[j]; + if (im.directed_links.find({gpu_id_i, gpu_id_j}) != im.directed_links.end()) { line_buf.append("Y "); } else { @@ -993,22 +997,23 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, const auto& virtual_devices = gpu_options.experimental().virtual_devices(); if (!virtual_devices.empty()) { - TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings( - num_gpus_to_use, gpu_options, visible_gpu_order, valid_cuda_gpu_ids)); + TF_RETURN_IF_ERROR(VerifyVirtualDeviceSettings(num_gpus_to_use, gpu_options, + visible_gpu_order, + valid_platform_gpu_ids)); // We've verified that num_gpus_to_use >= virtual_devices.size(). num_gpus_to_use = virtual_devices.size(); CHECK(gpu_options.visible_device_list().empty() || - valid_cuda_gpu_ids == visible_gpu_order); + valid_platform_gpu_ids == visible_gpu_order); } int next_tf_gpu_id = 0; std::vector memory_limit_bytes; for (int i = 0; i < num_gpus_to_use; ++i) { - const CudaGpuId cuda_gpu_id = valid_cuda_gpu_ids[i]; + const PlatformGpuId platform_gpu_id = valid_platform_gpu_ids[i]; if (virtual_devices.empty() || virtual_devices.Get(i).memory_limit_mb_size() == 0) { int64 single_virtual_device_memory_limit = 0; TF_RETURN_IF_ERROR(SingleVirtualDeviceMemoryLimit( - gpu_options, cuda_gpu_id, &single_virtual_device_memory_limit)); + gpu_options, platform_gpu_id, &single_virtual_device_memory_limit)); memory_limit_bytes.push_back(single_virtual_device_memory_limit); } else { const auto& memory_limit_mb = virtual_devices.Get(i).memory_limit_mb(); @@ -1021,7 +1026,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, TfGpuId tf_gpu_id(next_tf_gpu_id); ++next_tf_gpu_id; TF_RETURN_IF_ERROR( - GpuIdManager::InsertTfCudaGpuIdPair(tf_gpu_id, cuda_gpu_id)); + GpuIdManager::InsertTfPlatformGpuIdPair(tf_gpu_id, platform_gpu_id)); } } const int num_tf_gpus = next_tf_gpu_id; @@ -1046,7 +1051,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, return Status::OK(); } -static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id, +static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id, const se::DeviceDescription& desc) { int cc_major; int cc_minor; @@ -1055,9 +1060,8 @@ static string GetShortDeviceDescription(CudaGpuId cuda_gpu_id, cc_minor = 0; } // LINT.IfChange - return strings::StrCat("device: ", cuda_gpu_id.value(), - ", name: ", desc.name(), - ", pci bus id: ", desc.pci_bus_id(), + return strings::StrCat("device: ", platform_gpu_id.value(), ", name: ", + desc.name(), ", pci bus id: ", desc.pci_bus_id(), ", compute capability: ", cc_major, ".", cc_minor); // LINT.ThenChange(//tensorflow/python/platform/test.py) } @@ -1072,12 +1076,13 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, const string device_name = strings::StrCat(name_prefix, "/device:GPU:", tf_gpu_id.value()); GpuIdUtil::CheckValidTfGpuId(tf_gpu_id); - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); int numa_node = dev_locality.numa_node(); se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); const se::DeviceDescription& desc = se->GetDeviceDescription(); GPUProcessState* process_state = GPUProcessState::singleton(); Allocator* gpu_allocator = process_state->GetGPUAllocator( @@ -1098,11 +1103,11 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, // TODO(laigd): report error if memory_limit doesn't match stats.bytes_limit. BaseGPUDevice* gpu_device = CreateGPUDevice( options, device_name, static_cast(stats.bytes_limit), dev_locality, - tf_gpu_id, GetShortDeviceDescription(cuda_gpu_id, desc), gpu_allocator, - ProcessState::singleton()->GetCPUAllocator(numa_node)); + tf_gpu_id, GetShortDeviceDescription(platform_gpu_id, desc), + gpu_allocator, ProcessState::singleton()->GetCPUAllocator(numa_node)); LOG(INFO) << "Created TensorFlow device (" << device_name << " with " << (stats.bytes_limit >> 20) << " MB memory) -> physical GPU (" - << GetShortDeviceDescription(cuda_gpu_id, desc) << ")"; + << GetShortDeviceDescription(platform_gpu_id, desc) << ")"; TF_RETURN_IF_ERROR(gpu_device->Init(options)); devices->push_back(gpu_device); @@ -1110,18 +1115,21 @@ Status BaseGPUDeviceFactory::CreateGPUDevice(const SessionOptions& options, } namespace { -std::unique_ptr, bool>> +std::unique_ptr, bool>> GetPeerAccessMap(se::Platform* platform, - const std::vector& visible_gpu_order) { - std::unique_ptr, bool>> map( - new std::map, bool>); - for (CudaGpuId cuda_gpu_i : visible_gpu_order) { - for (CudaGpuId cuda_gpu_j : visible_gpu_order) { + const std::vector& visible_gpu_order) { + std::unique_ptr, bool>> map( + new std::map, bool>); + for (PlatformGpuId platform_gpu_i : visible_gpu_order) { + for (PlatformGpuId platform_gpu_j : visible_gpu_order) { se::StreamExecutor* from = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i) + .ValueOrDie(); se::StreamExecutor* to = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie(); - (*map)[{cuda_gpu_i, cuda_gpu_j}] = from->CanEnablePeerAccessTo(to); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j) + .ValueOrDie(); + (*map)[{platform_gpu_i, platform_gpu_j}] = + from->CanEnablePeerAccessTo(to); } } @@ -1131,19 +1139,19 @@ GetPeerAccessMap(se::Platform* platform, } // namespace Status BaseGPUDeviceFactory::GetInterconnectMaps( - const std::vector& visible_gpu_order, se::Platform* gpu_manager, - std::vector* maps) { + const std::vector& visible_gpu_order, + se::Platform* gpu_manager, std::vector* maps) { // The default interconnect map is obtained from the StreamExecutor. auto access_map = GetPeerAccessMap(gpu_manager, visible_gpu_order); maps->resize(1); InterconnectMap& imap = maps->at(0); imap.name = "StreamExecutor"; imap.strength = InterconnectMap::kStreamExecutorStrength; - for (CudaGpuId cuda_id_i : visible_gpu_order) { - for (CudaGpuId cuda_id_j : visible_gpu_order) { - if (cuda_id_i == cuda_id_j) continue; - if ((*access_map)[{cuda_id_i, cuda_id_j}]) { - imap.directed_links.insert({cuda_id_i, cuda_id_j}); + for (PlatformGpuId gpu_id_i : visible_gpu_order) { + for (PlatformGpuId gpu_id_j : visible_gpu_order) { + if (gpu_id_i == gpu_id_j) continue; + if ((*access_map)[{gpu_id_i, gpu_id_j}]) { + imap.directed_links.insert({gpu_id_i, gpu_id_j}); } } } @@ -1158,13 +1166,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( all_tf_gpu_ids.push_back(TfGpuId(i)); } for (TfGpuId tf_gpu_id : all_tf_gpu_ids) { - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); // Get GPU bus_id from its reported NUMA affinity. Because GPUs are // virtualized in some environments, we can't just use the GPU id. // NUMA locales are indexed from 0, buses are indexed from 1. se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(cuda_gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(); const se::DeviceDescription& desc = se->GetDeviceDescription(); int numa_node = desc.numa_node(); if (numa_node < 0) { @@ -1174,7 +1183,8 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( // may run into trouble later with data transfer operations. The // trouble may manifest as slower than expected performance, or // outright failures. - LOG(INFO) << "Could not identify NUMA node of CUDA gpu id " << cuda_gpu_id + LOG(INFO) << "Could not identify NUMA node of platform GPU id " + << platform_gpu_id << ", defaulting to 0. Your kernel may not have been built " << "with NUMA support."; numa_node = 0; @@ -1187,10 +1197,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( LocalLinks* links = dev_locality.mutable_links(); for (const InterconnectMap& imap : interconnects) { for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) { - CudaGpuId cuda_gpu_dst; + PlatformGpuId platform_gpu_dst; TF_RETURN_IF_ERROR( - GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst)); - if (imap.directed_links.find({cuda_gpu_id, cuda_gpu_dst}) != + GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst)); + if (imap.directed_links.find({platform_gpu_id, platform_gpu_dst}) != imap.directed_links.end()) { InterconnectLink* ilink = links->add_link(); ilink->set_device_id(tf_gpu_dst.value()); @@ -1204,10 +1214,10 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( // add high strength links to the others. for (TfGpuId tf_gpu_dst : all_tf_gpu_ids) { if (tf_gpu_id == tf_gpu_dst) continue; - CudaGpuId cuda_gpu_dst; + PlatformGpuId platform_gpu_dst; TF_RETURN_IF_ERROR( - GpuIdManager::TfToCudaGpuId(tf_gpu_dst, &cuda_gpu_dst)); - if (cuda_gpu_id == cuda_gpu_dst) { + GpuIdManager::TfToPlatformGpuId(tf_gpu_dst, &platform_gpu_dst)); + if (platform_gpu_id == platform_gpu_dst) { InterconnectLink* ilink = links->add_link(); ilink->set_device_id(tf_gpu_dst.value()); ilink->set_type("SAME_DEVICE"); @@ -1216,9 +1226,9 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( } (*localities)[tf_gpu_id] = dev_locality; - VLOG(1) << "GPUDevice CudaGpuId " << cuda_gpu_id << " TfGpuId " << tf_gpu_id - << " on bus " << dev_locality.bus_id() << " numa: " << numa_node - << " pci: " << desc.pci_bus_id() + VLOG(1) << "GPUDevice PlatformGpuId " << platform_gpu_id << " TfGpuId " + << tf_gpu_id << " on bus " << dev_locality.bus_id() + << " numa: " << numa_node << " pci: " << desc.pci_bus_id() << " DeviceLocality: " << dev_locality.DebugString(); } return Status::OK(); @@ -1226,14 +1236,14 @@ Status BaseGPUDeviceFactory::GetDeviceLocalities( static int GetDefaultMinGPUMultiprocessorCount( se::Platform* gpu_manager, - const std::vector& visible_gpu_order) { + const std::vector& visible_gpu_order) { static const int kDefaultMinGPUMultiprocessorCount = 8; // Find the highest multi-processor count across all visible GPUs. int max_count = -1; for (int i = 0; i < visible_gpu_order.size(); ++i) { auto exec_status = - GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_order[i]); + GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_order[i]); if (!exec_status.ok()) { continue; } @@ -1252,7 +1262,7 @@ static int GetDefaultMinGPUMultiprocessorCount( static int GetMinGPUMultiprocessorCount( se::Platform* gpu_manager, - const std::vector& visible_gpu_order) { + const std::vector& visible_gpu_order) { const char* tf_min_gpu_core_count = getenv("TF_MIN_GPU_MULTIPROCESSOR_COUNT"); if (tf_min_gpu_core_count == nullptr || @@ -1330,18 +1340,20 @@ std::vector GetSupportedCudaComputeCapabilities() { } Status EnablePeerAccess(se::Platform* platform, - const std::vector& visible_gpu_order) { + const std::vector& visible_gpu_order) { int possible_peer_count = 0; int enabled_peer_count = 0; for (int i = 0; i < visible_gpu_order.size(); ++i) { - const CudaGpuId cuda_gpu_i = visible_gpu_order[i]; + const PlatformGpuId platform_gpu_i = visible_gpu_order[i]; for (int j = 0; j < visible_gpu_order.size(); ++j) { - const CudaGpuId cuda_gpu_j = visible_gpu_order[j]; + const PlatformGpuId platform_gpu_j = visible_gpu_order[j]; // We have already validated that ExecutorForDevice() calls return OK. se::StreamExecutor* from = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_i).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_i) + .ValueOrDie(); se::StreamExecutor* to = - GpuIdUtil::ExecutorForCudaGpuId(platform, cuda_gpu_j).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(platform, platform_gpu_j) + .ValueOrDie(); if (from->CanEnablePeerAccessTo(to)) { ++possible_peer_count; @@ -1349,7 +1361,8 @@ Status EnablePeerAccess(se::Platform* platform, if (!status.ok()) { LOG(WARNING) << "Unable to enable peer access between device ordinals " - << cuda_gpu_i << " and " << cuda_gpu_j << ", status: " << status; + << platform_gpu_i << " and " << platform_gpu_j + << ", status: " << status; } else { ++enabled_peer_count; } @@ -1372,22 +1385,23 @@ Status EnablePeerAccess(se::Platform* platform, } // namespace Status BaseGPUDeviceFactory::GetValidDeviceIds( - const std::vector& visible_gpu_order, - std::vector* ids) { + const std::vector& visible_gpu_order, + std::vector* ids) { se::Platform* gpu_manager = GPUMachineManager(); bool new_gpu_found = false; for (int i = 0; i < visible_gpu_order.size(); ++i) { - const CudaGpuId cuda_gpu_id = visible_gpu_order[i]; + const PlatformGpuId visible_gpu_id = visible_gpu_order[i]; - // Only perform this once per visible cuda gpu id. - if (visible_gpu_initialized_[cuda_gpu_id.value()]) { + // Only perform this once per visible platform gpu id. + if (visible_gpu_initialized_[visible_gpu_id.value()]) { continue; } - visible_gpu_initialized_[cuda_gpu_id.value()] = true; + visible_gpu_initialized_[visible_gpu_id.value()] = true; new_gpu_found = true; - auto executor = GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, cuda_gpu_id); + auto executor = + GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id); if (!executor.ok()) { return executor.status(); } @@ -1435,9 +1449,9 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds( // Filter out devices that don't have the right capability or power. for (int i = 0; i < visible_gpu_order.size(); ++i) { - const CudaGpuId visible_gpu_id = visible_gpu_order[i]; + const PlatformGpuId visible_gpu_id = visible_gpu_order[i]; auto exec_status = - GpuIdUtil::ExecutorForCudaGpuId(gpu_manager, visible_gpu_id); + GpuIdUtil::ExecutorForPlatformGpuId(gpu_manager, visible_gpu_id); if (!exec_status.ok()) { LOG(INFO) << "Ignoring visible gpu device " << visible_gpu_id << " whose executor is in invalid state: " @@ -1486,7 +1500,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds( if (!ids->empty()) { std::vector raw_ids(ids->size()); std::transform(ids->begin(), ids->end(), raw_ids.begin(), - [](CudaGpuId id) -> int { return id.value(); }); + [](PlatformGpuId id) -> int { return id.value(); }); LOG(INFO) << "Adding visible gpu devices: " << str_util::Join(raw_ids, ", "); } diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 56d03d7a8c..684cc0c1de 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -89,12 +89,12 @@ class BaseGPUDevice : public LocalDevice { void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, DeviceContext* dc, Allocator* allocator) override; - // Returns the CUDA GPU id of this device within the native driver system; + // Returns the platform GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. int gpu_id() const { - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id)); - return cuda_gpu_id.value(); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id)); + return platform_gpu_id.value(); } // The executor that provides control for the device; e.g., for CUDA this @@ -168,14 +168,14 @@ class BaseGPUDeviceFactory : public DeviceFactory { int32 strength; static const int kSameDeviceStrength; static const int kStreamExecutorStrength; - std::set> directed_links; + std::set> directed_links; }; protected: // Populates *maps with interconnect maps for all local direct access // pathways between GPUs. virtual Status GetInterconnectMaps( - const std::vector& visible_gpu_order, + const std::vector& visible_gpu_order, se::Platform* gpu_manager, std::vector* maps); struct TfGpuIdHash { @@ -207,16 +207,16 @@ class BaseGPUDeviceFactory : public DeviceFactory { Allocator* gpu_allocator, Allocator* cpu_allocator) = 0; - // Returns into 'ids' the list of valid CUDA GPU ids, in the order that + // Returns into 'ids' the list of valid platform GPU ids, in the order that // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc, // based upon 'visible_gpu_order' which was generated by parsing // GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU // ids. - Status GetValidDeviceIds(const std::vector& visible_gpu_order, - std::vector* ids); + Status GetValidDeviceIds(const std::vector& visible_gpu_order, + std::vector* ids); - // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id - // has been initialized by the process. + // visible_gpu_initialized_[platform_gpu_id] is true if visible GPU + // platform_gpu_id has been initialized by the process. std::unordered_map visible_gpu_initialized_; }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc index daf59f0560..36294094e9 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc @@ -30,18 +30,21 @@ namespace tensorflow { namespace { const char* kDeviceNamePrefix = "/job:localhost/replica:0/task:0"; -int64 GetTotalGPUMemory(CudaGpuId gpu_id) { +int64 GetTotalGPUMemory(PlatformGpuId gpu_id) { se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id) + .ValueOrDie(); int64 total_memory, available_memory; CHECK(se->DeviceMemoryUsage(&available_memory, &total_memory)); return total_memory; } -Status GetComputeCapability(CudaGpuId gpu_id, int* cc_major, int* cc_minor) { +Status GetComputeCapability(PlatformGpuId gpu_id, int* cc_major, + int* cc_minor) { se::StreamExecutor* se = - GpuIdUtil::ExecutorForCudaGpuId(GPUMachineManager(), gpu_id).ValueOrDie(); + GpuIdUtil::ExecutorForPlatformGpuId(GPUMachineManager(), gpu_id) + .ValueOrDie(); if (!se->GetDeviceDescription().cuda_compute_capability(cc_major, cc_minor)) { *cc_major = 0; *cc_minor = 0; @@ -223,7 +226,7 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) { // error. TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) { int cc_major, cc_minor; - TF_ASSERT_OK(GetComputeCapability(CudaGpuId(0), &cc_major, &cc_minor)); + TF_ASSERT_OK(GetComputeCapability(PlatformGpuId(0), &cc_major, &cc_minor)); // Exit early while running on Pascal or later GPUs. if (cc_major >= 6) { return; @@ -244,10 +247,10 @@ TEST_F(GPUDeviceTest, UnifiedMemoryUnavailableOnPrePascalGpus) { // more memory than what is available on the device. TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) { static constexpr double kGpuMemoryFraction = 1.2; - static constexpr CudaGpuId kCudaGpuId(0); + static constexpr PlatformGpuId kPlatformGpuId(0); int cc_major, cc_minor; - TF_ASSERT_OK(GetComputeCapability(kCudaGpuId, &cc_major, &cc_minor)); + TF_ASSERT_OK(GetComputeCapability(kPlatformGpuId, &cc_major, &cc_minor)); // Exit early if running on pre-Pascal GPUs. if (cc_major < 6) { LOG(INFO) @@ -262,7 +265,7 @@ TEST_F(GPUDeviceTest, UnifiedMemoryAllocation) { ASSERT_EQ(1, devices.size()); int64 memory_limit = devices[0]->attributes().memory_limit(); - ASSERT_EQ(memory_limit, static_cast(GetTotalGPUMemory(kCudaGpuId) * + ASSERT_EQ(memory_limit, static_cast(GetTotalGPUMemory(kPlatformGpuId) * kGpuMemoryFraction)); AllocatorAttributes allocator_attributes = AllocatorAttributes(); diff --git a/tensorflow/core/common_runtime/gpu/gpu_id.h b/tensorflow/core/common_runtime/gpu/gpu_id.h index 2a6caea296..f0d9022821 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id.h @@ -25,10 +25,10 @@ namespace tensorflow { // physical machine, it can be filtered by CUDA environment variable // CUDA_VISIBLE_DEVICES. Note that this id is not visible to Tensorflow, but // result after filtering by CUDA_VISIBLE_DEVICES is visible to TF and is -// called CUDA GPU id as below. See +// called platform GPU id as below. See // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars // for more details. -// - CUDA GPU id (also called *visible* GPU id in +// - *platform* GPU id (also called *visible* GPU id in // third_party/tensorflow/core/protobuf/config.proto): this is the id that is // visible to Tensorflow after filtering by CUDA_VISIBLE_DEVICES, and is // generated by the CUDA GPU driver. It starts from 0 and is used for CUDA API @@ -39,14 +39,14 @@ namespace tensorflow { // field of the device name "/device:GPU:", and is also the identifier of // a BaseGPUDevice. Note that the configuration allows us to create multiple // BaseGPUDevice per GPU hardware in order to use multi CUDA streams on the -// hardware, so the mapping between TF GPU id and CUDA GPU id is not a 1:1 +// hardware, so the mapping between TF GPU id and platform GPU id is not a 1:1 // mapping, see the example below. // // For example, assuming that in the machine we have GPU device with index 0, 1, // 2 and 3 (physical GPU id). Setting "CUDA_VISIBLE_DEVICES=1,2,3" will create -// the following mapping between CUDA GPU id and physical GPU id: +// the following mapping between platform GPU id and physical GPU id: // -// CUDA GPU id -> physical GPU id +// platform GPU id -> physical GPU id // 0 -> 1 // 1 -> 2 // 2 -> 3 @@ -56,32 +56,32 @@ namespace tensorflow { // // Assuming we configure the Session to create one BaseGPUDevice per GPU // hardware, then setting GPUOptions::visible_device_list to "2,0" will create -// the following mappting between TF GPU id and CUDA GPU id: +// the following mappting between TF GPU id and platform GPU id: // -// TF GPU id -> CUDA GPU ID +// TF GPU id -> platform GPU ID // 0 (i.e. /device:GPU:0) -> 2 // 1 (i.e. /device:GPU:1) -> 0 // -// Note that CUDA GPU id 1 is filtered out by GPUOptions::visible_device_list, -// so it won't be used by the TF process. +// Note that platform GPU id 1 is filtered out by +// GPUOptions::visible_device_list, so it won't be used by the TF process. // // On the other hand, if we configure it to create 2 BaseGPUDevice per GPU // hardware, then setting GPUOptions::visible_device_list to "2,0" will create -// the following mappting between TF GPU id and CUDA GPU id: +// the following mappting between TF GPU id and platform GPU id: // -// TF GPU id -> CUDA GPU ID +// TF GPU id -> platform GPU ID // 0 (i.e. /device:GPU:0) -> 2 // 1 (i.e. /device:GPU:1) -> 2 // 2 (i.e. /device:GPU:2) -> 0 // 3 (i.e. /device:GPU:3) -> 0 // -// We create strong-typed integer classes for both TF GPU id and CUDA GPU id to -// minimize programming errors and improve code readability. Except for the +// We create strong-typed integer classes for both TF GPU id and platform GPU id +// to minimize programming errors and improve code readability. Except for the // StreamExecutor interface (as we don't change its API), whenever we need a -// TF GPU id (or CUDA GPU id) we should use TfGpuId (or CudaGpuId) instead of a -// raw integer. +// TF GPU id (or platform GPU id) we should use TfGpuId (or PlatformGpuId) +// instead of a raw integer. TF_LIB_GTL_DEFINE_INT_TYPE(TfGpuId, int32); -TF_LIB_GTL_DEFINE_INT_TYPE(CudaGpuId, int32); +TF_LIB_GTL_DEFINE_INT_TYPE(PlatformGpuId, int32); } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index b5099dc8ef..2b40730119 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -26,26 +26,27 @@ limitations under the License. namespace tensorflow { namespace { -// Manages the map between TfGpuId and CUDA GPU id. -class TfToCudaGpuIdMap { +// Manages the map between TfGpuId and platform GPU id. +class TfToPlatformGpuIdMap { public: - static TfToCudaGpuIdMap* singleton() { - static auto* id_map = new TfToCudaGpuIdMap; + static TfToPlatformGpuIdMap* singleton() { + static auto* id_map = new TfToPlatformGpuIdMap; return id_map; } - Status Insert(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) LOCKS_EXCLUDED(mu_) { + Status Insert(TfGpuId tf_gpu_id, PlatformGpuId platform_gpu_id) + LOCKS_EXCLUDED(mu_) { std::pair result; { mutex_lock lock(mu_); - result = id_map_.insert({tf_gpu_id.value(), cuda_gpu_id.value()}); + result = id_map_.insert({tf_gpu_id.value(), platform_gpu_id.value()}); } - if (!result.second && cuda_gpu_id.value() != result.first->second) { + if (!result.second && platform_gpu_id.value() != result.first->second) { return errors::AlreadyExists( "TensorFlow device (GPU:", tf_gpu_id.value(), ") is being mapped to " "multiple CUDA devices (", - cuda_gpu_id.value(), " now, and ", result.first->second, + platform_gpu_id.value(), " now, and ", result.first->second, " previously), which is not supported. " "This may be the result of providing different GPU configurations " "(ConfigProto.gpu_options, for example different visible_device_list)" @@ -56,17 +57,17 @@ class TfToCudaGpuIdMap { return Status::OK(); } - bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const + bool Find(TfGpuId tf_gpu_id, PlatformGpuId* platform_gpu_id) const LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); auto result = id_map_.find(tf_gpu_id.value()); if (result == id_map_.end()) return false; - *cuda_gpu_id = result->second; + *platform_gpu_id = result->second; return true; } private: - TfToCudaGpuIdMap() = default; + TfToPlatformGpuIdMap() = default; void TestOnlyReset() LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); @@ -78,17 +79,18 @@ class TfToCudaGpuIdMap { IdMapType id_map_ GUARDED_BY(mu_); friend class ::tensorflow::GpuIdManager; - TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap); + TF_DISALLOW_COPY_AND_ASSIGN(TfToPlatformGpuIdMap); }; } // namespace -Status GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, - CudaGpuId cuda_gpu_id) { - return TfToCudaGpuIdMap::singleton()->Insert(tf_gpu_id, cuda_gpu_id); +Status GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id, + PlatformGpuId platform_gpu_id) { + return TfToPlatformGpuIdMap::singleton()->Insert(tf_gpu_id, platform_gpu_id); } -Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { - if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) { +Status GpuIdManager::TfToPlatformGpuId(TfGpuId tf_gpu_id, + PlatformGpuId* platform_gpu_id) { + if (TfToPlatformGpuIdMap::singleton()->Find(tf_gpu_id, platform_gpu_id)) { return Status::OK(); } return errors::NotFound("TensorFlow device GPU:", tf_gpu_id.value(), @@ -96,7 +98,7 @@ Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { } void GpuIdManager::TestOnlyReset() { - TfToCudaGpuIdMap::singleton()->TestOnlyReset(); + TfToPlatformGpuIdMap::singleton()->TestOnlyReset(); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h index 491d92ccdd..62df4310c4 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -21,15 +21,17 @@ limitations under the License. namespace tensorflow { -// Class that maintains a map from TfGpuId to CudaGpuId, and manages the +// Class that maintains a map from TfGpuId to PlatformGpuId, and manages the // translation between them. class GpuIdManager { public: - // Adds a mapping from tf_gpu_id to cuda_gpu_id. - static Status InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + // Adds a mapping from tf_gpu_id to platform_gpu_id. + static Status InsertTfPlatformGpuIdPair(TfGpuId tf_gpu_id, + PlatformGpuId platform_gpu_id); - // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found. - static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id); + // Gets the platform_gpu_id associated with tf_gpu_id. Returns OK if found. + static Status TfToPlatformGpuId(TfGpuId tf_gpu_id, + PlatformGpuId* platform_gpu_id); // Clears the map. Used in unit tests only. static void TestOnlyReset(); diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc index a663ec7051..8bf3c6a308 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager_test.cc @@ -22,38 +22,38 @@ limitations under the License. namespace tensorflow { namespace { -CudaGpuId TfToCudaGpuId(TfGpuId tf) { - CudaGpuId cuda; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf, &cuda)); - return cuda; +PlatformGpuId TfToPlatformGpuId(TfGpuId tf) { + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf, &platform_gpu_id)); + return platform_gpu_id; } TEST(GpuIdManagerTest, Basics) { TfGpuId key_0(0); - CudaGpuId value_0(0); - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0)); - EXPECT_EQ(value_0, TfToCudaGpuId(key_0)); + PlatformGpuId value_0(0); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0)); + EXPECT_EQ(value_0, TfToPlatformGpuId(key_0)); // Multiple calls to map the same value is ok. - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_0, value_0)); - EXPECT_EQ(value_0, TfToCudaGpuId(key_0)); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_0, value_0)); + EXPECT_EQ(value_0, TfToPlatformGpuId(key_0)); // Map a different TfGpuId to a different value. TfGpuId key_1(3); - CudaGpuId value_1(2); - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_1, value_1)); - EXPECT_EQ(value_1, TfToCudaGpuId(key_1)); + PlatformGpuId value_1(2); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_1, value_1)); + EXPECT_EQ(value_1, TfToPlatformGpuId(key_1)); // Mapping a different TfGpuId to the same value is ok. TfGpuId key_2(10); - TF_ASSERT_OK(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_1)); - EXPECT_EQ(value_1, TfToCudaGpuId(key_2)); + TF_ASSERT_OK(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_1)); + EXPECT_EQ(value_1, TfToPlatformGpuId(key_2)); // Mapping the same TfGpuId to a different value. - ASSERT_FALSE(GpuIdManager::InsertTfCudaGpuIdPair(key_2, value_0).ok()); + ASSERT_FALSE(GpuIdManager::InsertTfPlatformGpuIdPair(key_2, value_0).ok()); // Getting a nonexistent mapping. - ASSERT_FALSE(GpuIdManager::TfToCudaGpuId(TfGpuId(100), &value_0).ok()); + ASSERT_FALSE(GpuIdManager::TfToPlatformGpuId(TfGpuId(100), &value_0).ok()); } } // namespace diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h index b9c66b3328..b1f10fb1dc 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_utils.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_utils.h @@ -24,34 +24,37 @@ limitations under the License. namespace tensorflow { -// Utility methods for translation between Tensorflow GPU ids and CUDA GPU ids. +// Utility methods for translation between Tensorflow GPU ids and platform GPU +// ids. class GpuIdUtil { public: // Convenient methods for getting the associated executor given a TfGpuId or - // CudaGpuId. - static se::port::StatusOr ExecutorForCudaGpuId( - se::Platform* gpu_manager, CudaGpuId cuda_gpu_id) { - return gpu_manager->ExecutorForDevice(cuda_gpu_id.value()); + // PlatformGpuId. + static se::port::StatusOr ExecutorForPlatformGpuId( + se::Platform* gpu_manager, PlatformGpuId platform_gpu_id) { + return gpu_manager->ExecutorForDevice(platform_gpu_id.value()); } - static se::port::StatusOr ExecutorForCudaGpuId( - CudaGpuId cuda_gpu_id) { - return ExecutorForCudaGpuId(GPUMachineManager(), cuda_gpu_id); + static se::port::StatusOr ExecutorForPlatformGpuId( + PlatformGpuId platform_gpu_id) { + return ExecutorForPlatformGpuId(GPUMachineManager(), platform_gpu_id); } static se::port::StatusOr ExecutorForTfGpuId( TfGpuId tf_gpu_id) { - CudaGpuId cuda_gpu_id; - TF_RETURN_IF_ERROR(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); - return ExecutorForCudaGpuId(cuda_gpu_id); + PlatformGpuId platform_gpu_id; + TF_RETURN_IF_ERROR( + GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); + return ExecutorForPlatformGpuId(platform_gpu_id); } - // Verify that the cuda_gpu_id associated with a TfGpuId is legitimate. + // Verify that the platform_gpu_id associated with a TfGpuId is legitimate. static void CheckValidTfGpuId(TfGpuId tf_gpu_id) { - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); const int visible_device_count = GPUMachineManager()->VisibleDeviceCount(); - CHECK_LT(cuda_gpu_id.value(), visible_device_count) - << "cuda_gpu_id is outside discovered device range." - << " TF GPU id: " << tf_gpu_id << " CUDA GPU id: " << cuda_gpu_id + CHECK_LT(platform_gpu_id.value(), visible_device_count) + << "platform_gpu_id is outside discovered device range." + << " TF GPU id: " << tf_gpu_id + << " platform GPU id: " << platform_gpu_id << " visible device count: " << visible_device_count; } }; diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc index b18688174d..a5b46382f1 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc @@ -106,22 +106,23 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, return nullptr; } - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id)); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id)); gpu_allocator = - new GPUBFCAllocator(cuda_gpu_id, total_bytes, options, + new GPUBFCAllocator(platform_gpu_id, total_bytes, options, strings::StrCat("GPU_", tf_gpu_id.value(), "_bfc")); // If true, checks for memory overwrites by writing // distinctive patterns on both ends of allocated memory. if (useCudaMemoryGuardAllocator()) { - gpu_allocator = new GPUDebugAllocator(gpu_allocator, cuda_gpu_id); - gpu_allocator = new GPUNanResetAllocator(gpu_allocator, cuda_gpu_id); + gpu_allocator = new GPUDebugAllocator(gpu_allocator, platform_gpu_id); + gpu_allocator = new GPUNanResetAllocator(gpu_allocator, platform_gpu_id); } else if (useCudaMallocAllocator()) { // If true, passes all allocation requests through to cudaMalloc // useful for doing memory debugging with tools like cuda-memcheck // **WARNING** probably will not work in a multi-gpu scenario - gpu_allocator = new GPUcudaMallocAllocator(gpu_allocator, cuda_gpu_id); + gpu_allocator = + new GPUcudaMallocAllocator(gpu_allocator, platform_gpu_id); } gpu_allocators_[tf_gpu_id.value()] = gpu_allocator; @@ -138,7 +139,7 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options, if (process_state_->ProcessState::FLAGS_brain_gpu_record_mem_types) { ProcessState::MemDesc md; md.loc = ProcessState::MemDesc::GPU; - md.dev_index = cuda_gpu_id.value(); + md.dev_index = platform_gpu_id.value(); md.gpu_registered = false; md.nic_registered = true; if (static_cast(gpu_al_.size()) <= tf_gpu_id.value()) { diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index b97603c890..e4f6bf7c86 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -93,13 +93,13 @@ Status SingleMachine::Provision() { strings::StrCat("Not able to parse GPU device name: ", dev.name())); } TfGpuId tf_gpu_id(parsed.id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (!s.ok()) { return errors::Unavailable("Unknown TF GPU device with id ", tf_gpu_id.value(), ": ", s.ToString()); } - attr = GetLocalGPUInfo(cuda_gpu_id); + attr = GetLocalGPUInfo(platform_gpu_id); } else if (dev.device_type().find("XLA") == string::npos) { // Filter out the fake XLA devices to avoid double counting the actual // hardware resources that are available. diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index a7519725a5..567e7c075e 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -70,13 +70,14 @@ DeviceProperties GetLocalCPUInfo() { return device; } -DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) { +DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) { DeviceProperties device; device.set_type("GPU"); #if GOOGLE_CUDA cudaDeviceProp properties; - cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value()); + cudaError_t error = + cudaGetDeviceProperties(&properties, platform_gpu_id.value()); if (error != cudaSuccess) { device.set_type("UNKNOWN"); LOG(ERROR) << "Failed to get device properties, error code: " << error; @@ -122,15 +123,15 @@ DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { } else if (device.type == "GPU") { if (device.has_id) { TfGpuId tf_gpu_id(device.id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (!s.ok()) { LOG(ERROR) << s; return unknown; } - return GetLocalGPUInfo(cuda_gpu_id); + return GetLocalGPUInfo(platform_gpu_id); } else { - return GetLocalGPUInfo(CudaGpuId(0)); + return GetLocalGPUInfo(PlatformGpuId(0)); } } return unknown; diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h index ca15c48006..f0a342b728 100644 --- a/tensorflow/core/grappler/clusters/utils.h +++ b/tensorflow/core/grappler/clusters/utils.h @@ -28,7 +28,7 @@ DeviceProperties GetLocalCPUInfo(); // Returns the DeviceProperties for the specified GPU attached to the server on // which grappler is running. -DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id); +DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id); // Returns the DeviceProperties of the specified device DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device); diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc index 74218adbac..3863d62980 100644 --- a/tensorflow/core/grappler/clusters/utils_test.cc +++ b/tensorflow/core/grappler/clusters/utils_test.cc @@ -31,22 +31,22 @@ TEST(UtilsTest, GetLocalGPUInfo) { LOG(INFO) << "CUDA is enabled."; DeviceProperties properties; - // Invalid CUDA GPU ID. - properties = GetLocalGPUInfo(CudaGpuId(100)); + // Invalid platform GPU ID. + properties = GetLocalGPUInfo(PlatformGpuId(100)); EXPECT_EQ("UNKNOWN", properties.type()); - // Succeed when a valid CUDA GPU id was inserted. - properties = GetLocalGPUInfo(CudaGpuId(0)); + // Succeed when a valid platform GPU id was inserted. + properties = GetLocalGPUInfo(PlatformGpuId(0)); EXPECT_EQ("GPU", properties.type()); EXPECT_EQ("NVIDIA", properties.vendor()); #else LOG(INFO) << "CUDA is not enabled."; DeviceProperties properties; - properties = GetLocalGPUInfo(CudaGpuId(0)); + properties = GetLocalGPUInfo(PlatformGpuId(0)); EXPECT_EQ("GPU", properties.type()); - properties = GetLocalGPUInfo(CudaGpuId(100)); + properties = GetLocalGPUInfo(PlatformGpuId(100)); EXPECT_EQ("GPU", properties.type()); #endif } @@ -74,20 +74,20 @@ TEST(UtilsTest, GetDeviceInfo) { EXPECT_EQ("NVIDIA", properties.vendor()); #endif - // TF to CUDA GPU id mapping entry doesn't exist. + // TF to platform GPU id mapping entry doesn't exist. device.has_id = true; device.id = 0; properties = GetDeviceInfo(device); EXPECT_EQ("UNKNOWN", properties.type()); #if GOOGLE_CUDA - // Invalid CUDA GPU id. - GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100)); + // Invalid platform GPU id. + GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100)); properties = GetDeviceInfo(device); EXPECT_EQ("UNKNOWN", properties.type()); - // Valid CUDA GPU id. - GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0)); + // Valid platform GPU id. + GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(1), PlatformGpuId(0)); device.id = 1; properties = GetDeviceInfo(device); EXPECT_EQ("GPU", properties.type()); diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index aad00ce039..7691f25327 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -209,13 +209,13 @@ DeviceProperties GetDeviceInfo(const string& device_str) { if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { if (parsed.type == "GPU") { TfGpuId tf_gpu_id(parsed.id); - CudaGpuId cuda_gpu_id; - Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + PlatformGpuId platform_gpu_id; + Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id); if (!s.ok()) { // We are probably running simulation without linking cuda libraries. - cuda_gpu_id = CudaGpuId(parsed.id); + platform_gpu_id = PlatformGpuId(parsed.id); } - return GetLocalGPUInfo(cuda_gpu_id); + return GetLocalGPUInfo(platform_gpu_id); } else if (parsed.type == "CPU") { return GetLocalCPUInfo(); } diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index da3a99565e..c68504a272 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -68,7 +68,7 @@ message GPUOptions { // after the process starts. Users are required to use vendor // specific mechanisms (e.g., CUDA_VISIBLE_DEVICES) to control the // physical to visible device mapping prior to invoking TensorFlow. - // 2. In the code, the ids in this list are also called "CUDA GPU id"s, + // 2. In the code, the ids in this list are also called "platform GPU id"s, // and the 'virtual' ids of GPU devices (i.e. the ids in the device // name "/device:GPU:") are also called "TF GPU id"s. Please // refer to third_party/tensorflow/core/common_runtime/gpu/gpu_id.h -- cgit v1.2.3 From 204ef67242ce7fbba067b631c4d6c4bcd64288c2 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sat, 1 Sep 2018 21:06:52 +0800 Subject: CLN: remove print method, and append error msg to exception --- tensorflow/python/framework/test_util.py | 30 ++++++++++++++------------- tensorflow/python/framework/test_util_test.py | 8 +++++++ 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index b5388ad0b2..6d03e956da 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -1329,35 +1329,36 @@ class TensorFlowTestCase(googletest.TestCase): self.assertEqual( a.shape, b.shape, "Shape mismatch: expected %s, got %s." % (a.shape, b.shape)) + msgs = [msg] if not np.allclose(a, b, rtol=rtol, atol=atol): - # Prints more details than np.testing.assert_allclose. + # Add more details than np.testing.assert_allclose. # # NOTE: numpy.allclose (and numpy.testing.assert_allclose) # checks whether two arrays are element-wise equal within a # tolerance. The relative difference (rtol * abs(b)) and the # absolute difference atol are added together to compare against # the absolute difference between a and b. Here, we want to - # print out which elements violate such conditions. + # tell user which elements violate such conditions. cond = np.logical_or( np.abs(a - b) > atol + rtol * np.abs(b), np.isnan(a) != np.isnan(b)) if a.ndim: x = a[np.where(cond)] y = b[np.where(cond)] - print("not close where = ", np.where(cond)) + msgs.append("not close where = {}".format(np.where(cond))) else: # np.where is broken for scalars x, y = a, b - print("not close lhs = ", x) - print("not close rhs = ", y) - print("not close dif = ", np.abs(x - y)) - print("not close tol = ", atol + rtol * np.abs(y)) - print("dtype = %s, shape = %s" % (a.dtype, a.shape)) + msgs.append("not close lhs = {}".format(x)) + msgs.append("not close rhs = {}".format(y)) + msgs.append("not close dif = {}".format(np.abs(x - y))) + msgs.append("not close tol = {}".format(atol + rtol * np.abs(y))) + msgs.append("dtype = {}, shape = {}".format(a.dtype, a.shape)) # TODO(xpan): There seems to be a bug: # tensorflow/compiler/tests:binary_ops_test pass with float32 # nan even though the equal_nan is False by default internally. np.testing.assert_allclose( - a, b, rtol=rtol, atol=atol, err_msg=msg, equal_nan=True) + a, b, rtol=rtol, atol=atol, err_msg="\n".join(msgs), equal_nan=True) def _assertAllCloseRecursive(self, a, @@ -1539,19 +1540,20 @@ class TensorFlowTestCase(googletest.TestCase): np.float16, np.float32, np.float64, dtypes.bfloat16.as_numpy_dtype ]): same = np.logical_or(same, np.logical_and(np.isnan(a), np.isnan(b))) + msgs = [msg] if not np.all(same): - # Prints more details than np.testing.assert_array_equal. + # Add more details than np.testing.assert_array_equal. diff = np.logical_not(same) if a.ndim: x = a[np.where(diff)] y = b[np.where(diff)] - print("not equal where = ", np.where(diff)) + msgs.append("not equal where = {}".format(np.where(diff))) else: # np.where is broken for scalars x, y = a, b - print("not equal lhs = ", x) - print("not equal rhs = ", y) - np.testing.assert_array_equal(a, b, err_msg=msg) + msgs.append("not equal lhs = {}".format(x)) + msgs.append("not equal rhs = {}".format(y)) + np.testing.assert_array_equal(a, b, err_msg="\n".join(msgs)) def assertAllGreater(self, a, comparison_target): """Assert element values are all greater than a target value. diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index a0939f98b2..c9b5d46f98 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -270,6 +270,11 @@ class TestUtilTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(AssertionError, r"Not equal to tolerance"): self.assertAllClose(7, 7 + 1e-5) + @test_util.run_in_graph_and_eager_modes + def testAllCloseList(self): + with self.assertRaisesRegexp(AssertionError, r"not close dif"): + self.assertAllClose([0], [1]) + @test_util.run_in_graph_and_eager_modes def testAllCloseDictToNonDict(self): with self.assertRaisesRegexp(ValueError, r"Can't compare dict to non-dict"): @@ -455,6 +460,9 @@ class TestUtilTest(test_util.TensorFlowTestCase): self.assertAllEqual([120] * 3, k) self.assertAllEqual([20] * 3, j) + with self.assertRaisesRegexp(AssertionError, r"not equal lhs"): + self.assertAllEqual([0] * 3, k) + @test_util.run_in_graph_and_eager_modes def testAssertNotAllClose(self): # Test with arrays -- cgit v1.2.3 From 89979f42e827d9eb5c349259a5aa2ec32d38c86a Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 1 Sep 2018 16:07:46 +0000 Subject: Fix MPI build failure caused by StringPiece -> absl::string_view This fix tries to fix the MPI build failure caused by StringPiece -> absl::string_view. Signed-off-by: Yong Tang --- tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc | 4 ++-- tensorflow/contrib/mpi/mpi_rendezvous_mgr.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc index 6a7f5efecd..e195cca647 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc @@ -136,7 +136,7 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync( MPIRendezvousMgr* mgr = reinterpret_cast(this->rendezvous_mgr_); - mgr->QueueRequest(parsed.FullKey().ToString(), step_id_, + mgr->QueueRequest(string(parsed.FullKey()), step_id_, std::move(request_call), rendezvous_call); } @@ -258,7 +258,7 @@ void MPIRendezvousMgr::AddRequest(RecvTensorRequest request, std::function res = std::bind( send_cb, status, send_args, recv_args, val, is_dead, mpi_send_call); - SendQueueEntry req(parsed.FullKey().ToString().c_str(), std::move(res)); + SendQueueEntry req(string(parsed.FullKey()), std::move(res)); this->QueueSendRequest(req); diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h index 5596601ddb..90140fcab3 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h @@ -71,7 +71,7 @@ class MPISendTensorCall { void Init(const Rendezvous::ParsedKey& parsed, const int64 step_id, const bool is_dead) { - mRes_.set_key(parsed.FullKey().ToString()); + mRes_.set_key(string(parsed.FullKey())); mRes_.set_step_id(step_id); mRes_.mutable_response()->set_is_dead(is_dead); mRes_.mutable_response()->set_send_start_micros( -- cgit v1.2.3 From 39e324505c380c9d449dc31d34629a9d470c765f Mon Sep 17 00:00:00 2001 From: Jason Zaman Date: Tue, 4 Sep 2018 15:01:22 +0800 Subject: Add //tensorflow:install_headers target Used to prepare all the header files so they can easily be installed into /usr/include when packaging TF. Signed-off-by: Jason Zaman --- tensorflow/BUILD | 28 ++++++++++++++++++++++++++++ tensorflow/cc/BUILD | 28 ++++++++++++++++++++++++++-- tensorflow/core/BUILD | 19 ++++++++++++++++--- third_party/eigen3/BUILD | 10 ++-------- 4 files changed, 72 insertions(+), 13 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 661cba5ff0..768d4107d8 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -617,3 +617,31 @@ py_library( visibility = ["//visibility:public"], deps = ["//tensorflow/python:no_contrib"], ) + +genrule( + name = "install_headers", + srcs = [ + "//tensorflow/c:headers", + "//tensorflow/c/eager:headers", + "//tensorflow/cc:headers", + "//tensorflow/core:headers", + ], + outs = ["include"], + cmd = """ + mkdir $@ + for f in $(SRCS); do + d="$${f%/*}" + d="$${d#bazel-out*genfiles/}" + d="$${d#*external/eigen_archive/}" + + if [[ $${d} == *local_config_* ]]; then + continue + fi + + mkdir -p "$@/$${d}" + cp "$${f}" "$@/$${d}/" + done + """, + tags = ["manual"], + visibility = ["//visibility:public"], +) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index f56521dac0..b587e63227 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -10,11 +10,12 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", - "tf_cc_test", + "cc_library_with_android_deps", "tf_cc_binary", + "tf_cc_test", "tf_copts", "tf_gen_op_wrappers_cc", - "cc_library_with_android_deps", + "transitive_hdrs", ) cc_library( @@ -716,3 +717,26 @@ tf_cc_test( "//tensorflow/core:testlib", ], ) + +transitive_hdrs( + name = "headers", + visibility = ["//tensorflow:__subpackages__"], + deps = [ + ":cc_ops", + ":client_session", + ":coordinator", + ":gradient_checker", + ":gradients", + ":ops", + ":queue_runner", + ":remote_fused_graph_ops", + ":scope", + "//tensorflow/cc/profiler", + "//tensorflow/cc/saved_model:constants", + "//tensorflow/cc/saved_model:loader", + "//tensorflow/cc/saved_model:reader", + "//tensorflow/cc/saved_model:signature_constants", + "//tensorflow/cc/saved_model:tag_constants", + "//tensorflow/cc/tools:freeze_saved_model", + ], +) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5c314f359c..d5d4aad541 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -85,11 +85,12 @@ load( "tf_cc_tests", "tf_copts", "tf_cuda_library", + "tf_features_nomodules_if_android", "tf_gen_op_libs", "tf_generate_proto_text_sources", "tf_genrule_cmd_append_to_srcs", "tf_opts_nortti_if_android", - "tf_features_nomodules_if_android", + "transitive_hdrs", ) load("//tensorflow:tensorflow.bzl", "tf_cc_test_mkl") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") @@ -120,16 +121,16 @@ load( "tf_additional_libdevice_srcs", "tf_additional_minimal_lib_srcs", "tf_additional_mpi_lib_defines", - "tf_additional_proto_hdrs", "tf_additional_proto_compiler_hdrs", + "tf_additional_proto_hdrs", "tf_additional_proto_srcs", "tf_additional_test_deps", "tf_additional_test_srcs", "tf_additional_verbs_lib_defines", "tf_jspb_proto_library", "tf_kernel_tests_linkstatic", - "tf_lib_proto_parsing_deps", "tf_lib_proto_compiler_deps", + "tf_lib_proto_parsing_deps", "tf_nano_proto_library", "tf_platform_hdrs", "tf_platform_srcs", @@ -4691,6 +4692,18 @@ cc_library( ] + tf_additional_libdevice_deps(), ) +transitive_hdrs( + name = "headers", + visibility = ["//tensorflow:__subpackages__"], + deps = [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:stream_executor", + ], +) + # ----------------------------------------------------------------------------- # Google-internal targets go here (must be at the end). diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index 203991b50f..f072f2545a 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -66,19 +66,13 @@ genrule( outs = ["include"], cmd = """ mkdir $@ - for f in $(locations @eigen_archive//:eigen_header_files) ; do + for f in $(SRCS); do d="$${f%/*}" d="$${d#*external/eigen_archive/}" mkdir -p "$@/$${d}" cp "$${f}" "$@/$${d}/" done - - for f in $(locations :eigen_third_party_header_files) ; do - d="$${f%/*}" - - mkdir -p "$@/$${d}" - cp "$${f}" "$@/$${d}/" - done """, + tags = ["manual"], ) -- cgit v1.2.3 From a0da587dddb7ec2bd703e15882b68085cfd7933e Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Fri, 7 Sep 2018 06:48:27 +0900 Subject: fix documentation errors --- .../base_api/api_def_ExtractVolumePatches.pbtxt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt index 3499ade368..3c8a455983 100644 --- a/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ExtractVolumePatches.pbtxt @@ -1,32 +1,32 @@ op { graph_op_name: "ExtractVolumePatches" in_arg { - name: "images" + name: "input" description: < Date: Fri, 7 Sep 2018 08:16:48 +0900 Subject: fix argument name --- tensorflow/core/ops/array_ops.cc | 2 +- tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 2 +- tensorflow/tools/api/golden/v2/tensorflow.pbtxt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 6c8369200a..44908fe875 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -2553,7 +2553,7 @@ REGISTER_OP("ExtractImagePatches") // as the second parameter of all GetWindowedOutputSizeVerbose calls instead // of ksize_*. REGISTER_OP("ExtractVolumePatches") - .Input("images: T") + .Input("input: T") .Output("patches: T") .Attr("ksizes: list(int) >= 5") .Attr("strides: list(int) >= 5") diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index ba928eba9e..eafcc208cc 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1062,7 +1062,7 @@ tf_module { } member_method { name: "extract_volume_patches" - argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'input\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "eye" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index f7e63978da..cd06ee5763 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -1062,7 +1062,7 @@ tf_module { } member_method { name: "extract_volume_patches" - argspec: "args=[\'images\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'input\', \'ksizes\', \'strides\', \'padding\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "eye" -- cgit v1.2.3 From f5eb30c29d5d34145252e49ac3f9bda067abafe8 Mon Sep 17 00:00:00 2001 From: Smokrow Date: Fri, 7 Sep 2018 09:26:44 +0200 Subject: edited flat_map description and removed typo The examples in interleave are quite helpful. I just added a reference to this example --- tensorflow/python/data/ops/dataset_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 2c1aa22116..8242c7309d 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1007,7 +1007,7 @@ class Dataset(object): return ParallelMapDataset(self, map_func, num_parallel_calls) def flat_map(self, map_func): - """Maps `map_func` across this dataset and flattens the result. + """Maps `map_func` across this dataset and flattens the result. Will produce identical results to 'tf.data.Dataset.interleave' Args: map_func: A function mapping a nested structure of tensors (having shapes @@ -1043,7 +1043,7 @@ class Dataset(object): elements are produced. `cycle_length` controls the number of input elements that are processed concurrently. If you set `cycle_length` to 1, this transformation will handle one input element at a time, and will produce - identical results = to `tf.data.Dataset.flat_map`. In general, + identical results to `tf.data.Dataset.flat_map`. In general, this transformation will apply `map_func` to `cycle_length` input elements, open iterators on the returned `Dataset` objects, and cycle through them producing `block_length` consecutive elements from each iterator, and -- cgit v1.2.3 From a11cb4cb1500f35266667d9f72b0a0534f2d1581 Mon Sep 17 00:00:00 2001 From: BY Shen Date: Fri, 7 Sep 2018 22:20:37 +0800 Subject: Fix a bug in TF_LITE_ENSURE_OK. --- tensorflow/contrib/lite/context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index b23183b743..58977b5c47 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -148,7 +148,7 @@ void TfLiteIntArrayFree(TfLiteIntArray* v); #define TF_LITE_ENSURE_OK(context, status) \ do { \ if ((status) != kTfLiteOk) { \ - return status; \ + return kTfLiteError; \ } \ } while (0) -- cgit v1.2.3 From 3445242ac138d4d5aa9b346e17cd47ebf23770a5 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 7 Sep 2018 23:39:53 +0000 Subject: Fix int64 failure on GPU for TensorArray This fix tries to address the issue raised in 22054 where int64 on GPU results in colocation errors. This fix enables int64 on GPU with TensorArray. This fix fixes 22054. Signed-off-by: Yong Tang --- tensorflow/core/kernels/tensor_array_ops.cc | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index 2ec2651c04..82a7735c6d 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -259,6 +259,7 @@ REGISTER_KERNEL_BUILDER(Name("TensorArrayV3").Device(DEVICE_CPU), TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -487,6 +488,7 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -576,6 +578,7 @@ TF_CALL_ALL_TYPES(REGISTER_READ) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -777,6 +780,7 @@ REGISTER_GATHER_AND_PACK(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -998,6 +1002,7 @@ REGISTER_CONCAT(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -1218,6 +1223,7 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_AND_UNPACK); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA @@ -1388,6 +1394,7 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); +TF_CALL_int64(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA -- cgit v1.2.3 From 81677d2f20664c7f76598c20f2a01d62465999b4 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 7 Sep 2018 23:42:20 +0000 Subject: Add needed specifications for Split on GPU. Signed-off-by: Yong Tang --- tensorflow/core/kernels/split_lib_gpu.cu.cc | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index 393818730b..8623e47e41 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -54,6 +54,7 @@ void SplitCustom::operator()( TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); TF_CALL_complex128(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); TF_CALL_bfloat16(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS @@ -245,6 +246,7 @@ struct SplitVOpGPULaunch { TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); +TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #define REGISTER_GPU_KERNEL(T) \ @@ -254,6 +256,7 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); +TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL -- cgit v1.2.3 From bd1fd82712706592b9a6d34a6bac1b1f438eb00f Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 7 Sep 2018 19:16:04 -0700 Subject: Updated the ngraph-tf and ngraph releases. --- WORKSPACE | 12 ------------ tensorflow/workspace.bzl | 40 ++++++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 32 deletions(-) (limited to 'tensorflow') diff --git a/WORKSPACE b/WORKSPACE index 15aa24f3c1..f1d0ed565d 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -80,15 +80,3 @@ new_http_archive( ], ) -new_local_repository( - name = "ngraph", - path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph", - build_file = "//third_party/ngraph:ngraph.BUILD", -) - -new_local_repository( - name = "ngraph_tf", - path = "/nfs/site/home/avijitch/workspace/tf-upstream/ngraph-tf", - build_file = "//third_party/ngraph:ngraph_tf.BUILD", -) - diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0ff695d9f8..79b3df1e51 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -841,16 +841,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party/ngraph:tbb.BUILD"), ) - # tf_http_archive( - # name = "ngraph", - # urls = [ - # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - # "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", - # ], - # sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", - # strip_prefix = "ngraph-0.5.0", - # build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), - # ) + tf_http_archive( + name = "ngraph", + urls = [ + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", + "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", + ], + sha256 = "", + strip_prefix = "ngraph-0.7.0", + build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), + ) tf_http_archive( name = "nlohmann_json_lib", @@ -863,16 +863,16 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), ) - # tf_http_archive( - # name = "ngraph_tf", - # urls = [ - # "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - # "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", - # ], - # sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", - # strip_prefix = "ngraph-tf-0.3.0-rc1", - # build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), - # ) + tf_http_archive( + name = "ngraph_tf", + urls = [ + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz", + "https://github.com/NervanaSystems/ngraph-tf/archive/v0.5.0.tar.gz", + ], + sha256 = "23b4566d8e40d6f1f236b0ffe3905dd964ae42ca54bacff67f24abcefd443afb", + strip_prefix = "ngraph-tf-0.5.0", + build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), + ) ############################################################################## # BIND DEFINITIONS -- cgit v1.2.3 From 2032512ba1de376baadfa9f3983e3edbc67a6731 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Fri, 7 Sep 2018 19:21:19 -0700 Subject: Updated the sha256 for ngraph --- tensorflow/workspace.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 79b3df1e51..9a82c724b7 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -847,7 +847,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", "https://github.com/NervanaSystems/ngraph/archive/v0.7.0.tar.gz", ], - sha256 = "", + sha256 = "34434b6d5993ac5233538c84f498840db7ac91df82e225c379ee7c8f6de644a5", strip_prefix = "ngraph-0.7.0", build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), ) -- cgit v1.2.3 From aec495d6acdbdfac97ce91dd0782eb88e307c055 Mon Sep 17 00:00:00 2001 From: pengwa Date: Sat, 8 Sep 2018 11:20:23 +0800 Subject: add more ValueError description in dynamic_rnn document --- tensorflow/python/ops/rnn.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 5c00d929bf..4f3d8c2318 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -709,6 +709,10 @@ def _dynamic_rnn_loop(cell, Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. + ValueError: If time is not the same for all the elements in the + input. + ValueError: If batch_size is not the same for all the elements + in the input. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" -- cgit v1.2.3 From f40c960fff788b6770b9b4015734e54604f7481b Mon Sep 17 00:00:00 2001 From: Jonathan Homer Date: Sat, 8 Sep 2018 13:52:04 +0100 Subject: Changed PWD to pwd for bash examples Shell command PWD should be lowercase pwd for it work correct. Obvious typo corrected. --- tensorflow/tools/dockerfiles/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md index d64db35afb..5996573cf1 100644 --- a/tensorflow/tools/dockerfiles/README.md +++ b/tensorflow/tools/dockerfiles/README.md @@ -34,13 +34,13 @@ documentation](https://docs.docker.com/engine/reference/run/). # User permissions (-u) are required if you use (-v). # CPU-based images -$ docker run -u $(id -u):$(id -g) -v $(PWD):/my-devel -it tf +$ docker run -u $(id -u):$(id -g) -v $(pwd):/my-devel -it tf # GPU-based images (set up nvidia-docker2 first) -$ docker run --runtime=nvidia -u $(id -u):$(id -g) -v $(PWD):/my-devel -it tf +$ docker run --runtime=nvidia -u $(id -u):$(id -g) -v $(pwd):/my-devel -it tf # Images with Jupyter run on port 8888, and needs a volume for notebooks -$ docker run --user $(id -u):$(id -g) -p 8888:8888 -v $(PWD):/notebooks -it tf +$ docker run --user $(id -u):$(id -g) -p 8888:8888 -v $(pwd):/notebooks -it tf ``` These images do not come with the TensorFlow source code -- but the development -- cgit v1.2.3 From c50f1da063a7b6365542d923c4014e84515fe955 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 8 Sep 2018 23:43:35 +0000 Subject: Fix broken link in rnn_colorbot The README.md inside rnn_colorbot is broken, this fix fixes the link. Signed-off-by: Yong Tang --- tensorflow/contrib/eager/python/examples/rnn_colorbot/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/README.md b/tensorflow/contrib/eager/python/examples/rnn_colorbot/README.md index fabd7b3e20..750bbc66f3 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/README.md +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/README.md @@ -23,4 +23,4 @@ Attribution-ShareAlike License and is available at https://en.wikipedia.org/wiki/List_of_colors:_N-Z This example was adapted from - https://github.com/random-forests/tensorflow-workshop/tree/master/extras/colorbot + https://github.com/random-forests/tensorflow-workshop/tree/master/archive/extras/colorbot -- cgit v1.2.3 From 542fb58cf5f66899479602c70659d59897249101 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 9 Sep 2018 18:42:36 +0000 Subject: Fix np.float -> np.floating change While running core_rnn_cell_test: ``` bazel test -s --verbose_failures --config=opt //tensorflow/contrib/rnn:core_rnn_cell_test ``` Noticed the following warning: ``` FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. ``` This fix fixes the above warning. Signed-off-by: Yong Tang --- tensorflow/python/framework/test_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 4bece9e25e..cd23b3923e 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -1655,7 +1655,7 @@ class TensorFlowTestCase(googletest.TestCase): if any of the elements do not fall in the specified range. """ target = self._GetNdArray(target) - if not (np.issubdtype(target.dtype, np.float) or + if not (np.issubdtype(target.dtype, np.floating) or np.issubdtype(target.dtype, np.integer)): raise AssertionError( "The value of %s does not have an ordered numeric type, instead it " -- cgit v1.2.3 From ea0d499693c4609a8be55add3163971f93b8f2be Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 10 Sep 2018 01:41:54 +0000 Subject: Fix python 3 GPU test failures Signed-off-by: Yong Tang --- tensorflow/core/kernels/split_lib_gpu.cu.cc | 2 -- tensorflow/core/kernels/tensor_array_ops.cc | 4 ---- 2 files changed, 6 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index 8623e47e41..a4a59dbcbc 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -246,7 +246,6 @@ struct SplitVOpGPULaunch { TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); -TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL #define REGISTER_GPU_KERNEL(T) \ @@ -256,7 +255,6 @@ TF_CALL_bfloat16(REGISTER_GPU_KERNEL); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNEL); TF_CALL_complex64(REGISTER_GPU_KERNEL); TF_CALL_complex128(REGISTER_GPU_KERNEL); -TF_CALL_int64(REGISTER_GPU_KERNEL); TF_CALL_bfloat16(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index 82a7735c6d..58f1a36a90 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -488,7 +488,6 @@ TF_CALL_ALL_TYPES(REGISTER_WRITE); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -780,7 +779,6 @@ REGISTER_GATHER_AND_PACK(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -1002,7 +1000,6 @@ REGISTER_CONCAT(qint32); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); REGISTER_GPU(bfloat16); #undef REGISTER_GPU @@ -1394,7 +1391,6 @@ TF_CALL_ALL_TYPES(REGISTER_SPLIT); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); TF_CALL_complex64(REGISTER_GPU); TF_CALL_complex128(REGISTER_GPU); -TF_CALL_int64(REGISTER_GPU); #undef REGISTER_GPU #endif // GOOGLE_CUDA -- cgit v1.2.3 From cfddd182f71147eaf5ee8dc50113de3c0e622655 Mon Sep 17 00:00:00 2001 From: pengwa Date: Mon, 10 Sep 2018 18:51:42 +0800 Subject: fix comments for _dynamic_rnn_loop and LSTMCell::call --- tensorflow/python/ops/rnn.py | 2 +- tensorflow/python/ops/rnn_cell_impl.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 4f3d8c2318..259aca5a81 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -709,7 +709,7 @@ def _dynamic_rnn_loop(cell, Raises: ValueError: If the input depth cannot be inferred via shape inference from the inputs. - ValueError: If time is not the same for all the elements in the + ValueError: If time_step is not the same for all the elements in the input. ValueError: If batch_size is not the same for all the elements in the input. diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index c11c9ccaae..3e19183ff5 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -954,7 +954,7 @@ class LSTMCell(LayerRNNCell): """Run one step of LSTM. Args: - inputs: input Tensor, 2D, `[batch, num_units]. + inputs: input Tensor, must be 2-D, `[batch, input_size]`. state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple of state Tensors, both `2-D`, with column sizes `c_state` and -- cgit v1.2.3 From 4b0d12bb8c62a44e895ebd515c0145d1c18e9191 Mon Sep 17 00:00:00 2001 From: pengwa Date: Mon, 10 Sep 2018 18:54:52 +0800 Subject: minor format --- tensorflow/python/ops/rnn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 259aca5a81..dcc17db632 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -711,8 +711,8 @@ def _dynamic_rnn_loop(cell, from the inputs. ValueError: If time_step is not the same for all the elements in the input. - ValueError: If batch_size is not the same for all the elements - in the input. + ValueError: If batch_size is not the same for all the elements in the + input. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" -- cgit v1.2.3 From 470305c95c6b607e87ca476e5a109e5993f3cf6f Mon Sep 17 00:00:00 2001 From: Peng Yu Date: Mon, 10 Sep 2018 15:24:22 -0400 Subject: Use random_seed for the process input --- tensorflow/contrib/tensor_forest/kernels/stats_ops.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc b/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc index f80a34ece6..fe2c91c104 100644 --- a/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc +++ b/tensorflow/contrib/tensor_forest/kernels/stats_ops.cc @@ -246,7 +246,8 @@ class ProcessInputOp : public OpKernel { const Tensor& input_weights = context->input(7); const Tensor& leaf_ids_tensor = context->input(8); - std::unique_ptr data_set(new TensorDataSet(input_spec_, 0)); + std::unique_ptr data_set( + new TensorDataSet(input_spec_, random_seed_)); data_set->set_input_tensors(input_data, sparse_input_indices, sparse_input_values, sparse_input_shape); -- cgit v1.2.3 From 6bbe31c5f5d42f646cb5080d955e9ee91bdb6d93 Mon Sep 17 00:00:00 2001 From: pengwa Date: Tue, 11 Sep 2018 09:05:12 +0800 Subject: fix typos --- tensorflow/python/ops/rnn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index dcc17db632..5a3a5cc225 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -710,9 +710,9 @@ def _dynamic_rnn_loop(cell, ValueError: If the input depth cannot be inferred via shape inference from the inputs. ValueError: If time_step is not the same for all the elements in the - input. + inputs. ValueError: If batch_size is not the same for all the elements in the - input. + inputs. """ state = initial_state assert isinstance(parallel_iterations, int), "parallel_iterations must be int" -- cgit v1.2.3 From c807662d69dd1ca8bda7c34a642b812b38a4720b Mon Sep 17 00:00:00 2001 From: Smokrow Date: Tue, 11 Sep 2018 10:35:27 +0200 Subject: added example for flat_map --- tensorflow/python/data/ops/dataset_ops.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 8242c7309d..14a1e3d803 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1007,8 +1007,20 @@ class Dataset(object): return ParallelMapDataset(self, map_func, num_parallel_calls) def flat_map(self, map_func): - """Maps `map_func` across this dataset and flattens the result. Will produce identical results to 'tf.data.Dataset.interleave' + """Maps `map_func` across this dataset and flattens the result. + + Will produce similar results to `tf.data.Dataset.interleave(cycle_length=1)`. + Use `flat_map` if you want to make sure, that the order of your dataset stays the same. + For example: + ```python + # NOTE: The following examples use `{ ... }` to represent the + # contents of a dataset. '[...]' represents a tensor. + a = {[1,2,3,4,5], [6,7,8,9], [10]} + + a.flat_map(lambda x: Dataset.from_tensors(x)) == + {[1,2,3,4,5,6,7,8,9,10]} + ``` Args: map_func: A function mapping a nested structure of tensors (having shapes and types defined by `self.output_shapes` and `self.output_types`) to a -- cgit v1.2.3 From 9fd56039064871a736bb7cff398b2a8e08454bee Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 11 Sep 2018 05:34:31 -0700 Subject: Fix a typo in cudnn_convolution_rewriter. PiperOrigin-RevId: 212436340 --- tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc index 3d1266355b..228379a248 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc @@ -263,7 +263,7 @@ MatchBackwardInput(HloInstruction* conv) { !(window_util::HasBaseDilation(conv->window()) && (reverse_filter->IsConstant() || is_1x1_filter))) { VLOG(1) << "Can't match to backwards convolution. Either filter is not " - "kReverse, or it's not a base-dialted conv with a 1x1 or " + "kReverse, or it's not a base-dilated conv with a 1x1 or " "constant filter."; return no_match_result; } -- cgit v1.2.3 From 87d440506547d5c549261922c268aa55badf0bc4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 11 Sep 2018 06:09:38 -0700 Subject: Fix 31 ClangTidy - Readability findings in //tensorflow/compiler/xla/. * redundant string conversion * using decl 'Eq' is unused * using decl 'HasSubstr' is unused * redundant StrCat calls * please use StrAppend instead of StrCat when appending to an existing string (4 times) * parameters of type 'absl::Span<...>' should be taken by value (23 times) PiperOrigin-RevId: 212439742 --- tensorflow/compiler/xla/client/xla_builder.cc | 2 +- tensorflow/compiler/xla/reference_util.cc | 47 +++++++++----------- tensorflow/compiler/xla/reference_util.h | 50 ++++++++++------------ .../xla/service/gpu/while_transformer_test.cc | 3 -- .../compiler/xla/service/hlo_graph_dumper.cc | 5 +-- .../compiler/xla/tests/reduce_window_test.cc | 8 ++-- 6 files changed, 49 insertions(+), 66 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 4e1ff9e5c0..8951e93ee6 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -2419,7 +2419,7 @@ StatusOr XlaBuilder::AddInstruction(HloInstructionProto&& instr, instr.set_id(handle); instr.set_opcode(HloOpcodeString(opcode)); if (instr.name().empty()) { - instr.set_name(StrCat(instr.opcode())); + instr.set_name(instr.opcode()); } for (const auto& operand : operands) { if (operand.builder_ == nullptr) { diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index 05325367f5..ceb5e74db7 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -186,11 +186,10 @@ ReferenceUtil::SeparableConvArray4D(const Array4D& input, /* static */ std::unique_ptr> ReferenceUtil::ReduceWindow1DGeneric( - const absl::Span& operand, float init, + absl::Span operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, - const absl::Span>& padding) { + absl::Span window, absl::Span stride, + absl::Span> padding) { std::vector dim_lengths{static_cast(operand.size())}; std::vector window_counts(window.size(), 0); std::vector pad_low(window.size(), 0); @@ -218,10 +217,9 @@ ReferenceUtil::ReduceWindow1DGeneric( } /* static */ std::unique_ptr> -ReferenceUtil::ReduceWindow1DAdd(const absl::Span& operand, - float init, - const absl::Span& window, - const absl::Span& stride, +ReferenceUtil::ReduceWindow1DAdd(absl::Span operand, float init, + absl::Span window, + absl::Span stride, Padding padding) { const auto add_reduce = [](float arg1, float arg2) { return arg1 + arg2; }; std::vector dim_lengths{static_cast(operand.size())}; @@ -234,9 +232,8 @@ ReferenceUtil::ReduceWindow1DAdd(const absl::Span& operand, ReferenceUtil::ReduceWindow2DGeneric( const Array2D& operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, - const absl::Span>& padding) { + absl::Span window, absl::Span stride, + absl::Span> padding) { std::vector dim_lengths{operand.height(), operand.width()}; std::vector window_counts(window.size(), 0); @@ -273,9 +270,8 @@ ReferenceUtil::ReduceWindow2DGeneric( } /* static */ std::unique_ptr> ReferenceUtil::ReduceWindow2DAdd( - const Array2D& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding) { + const Array2D& operand, float init, absl::Span window, + absl::Span stride, Padding padding) { const auto add_reduce = [](float arg1, float arg2) { return arg1 + arg2; }; std::vector dim_lengths{operand.height(), operand.width()}; return ReduceWindow2DGeneric( @@ -284,9 +280,8 @@ ReferenceUtil::ReduceWindow2DGeneric( } /* static */ std::unique_ptr> ReferenceUtil::ReduceWindow3DAdd( - const Array3D& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding) { + const Array3D& operand, float init, absl::Span window, + absl::Span stride, Padding padding) { std::vector dim_lengths{operand.n1(), operand.n2(), operand.n3()}; auto padding_both = xla::MakePadding(dim_lengths, window, stride, padding); @@ -332,8 +327,8 @@ ReferenceUtil::ReduceWindow2DGeneric( ReferenceUtil::ReduceWindow4DGeneric( const Array4D& operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, Padding padding) { + absl::Span window, absl::Span stride, + Padding padding) { std::vector dim_lengths{operand.n1(), operand.n2(), operand.n3(), operand.n4()}; return ReduceWindow4DGeneric( @@ -345,9 +340,8 @@ ReferenceUtil::ReduceWindow4DGeneric( ReferenceUtil::ReduceWindow4DGeneric( const Array4D& operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, - const absl::Span>& padding) { + absl::Span window, absl::Span stride, + absl::Span> padding) { std::vector dim_lengths{operand.n1(), operand.n2(), operand.n3(), operand.n4()}; @@ -399,9 +393,8 @@ ReferenceUtil::ReduceWindow4DGeneric( } /* static */ std::unique_ptr> ReferenceUtil::ReduceWindow4DAdd( - const Array4D& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding) { + const Array4D& operand, float init, absl::Span window, + absl::Span stride, Padding padding) { const auto add_reduce = [](float arg1, float arg2) { return arg1 + arg2; }; return ReduceWindow4DGeneric(operand, init, add_reduce, window, stride, padding); @@ -425,8 +418,8 @@ ReferenceUtil::ReduceWindow4DGeneric( ReferenceUtil::SelectAndScatter4DGePlus(const Array4D& operand, const Array4D& source, float init, - const absl::Span& window, - const absl::Span& stride, + absl::Span window, + absl::Span stride, bool same_padding) { Padding padding = same_padding ? Padding::kSame : Padding::kValid; auto result = absl::make_unique>(operand.n1(), operand.n2(), diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 9ce098029d..8654fbb9b5 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -177,47 +177,41 @@ class ReferenceUtil { // Windowed reductions with Add as the function to apply. static std::unique_ptr> ReduceWindow1DAdd( - const absl::Span& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding); + absl::Span operand, float init, + absl::Span window, absl::Span stride, + Padding padding); static std::unique_ptr> ReduceWindow2DAdd( - const Array2D& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding); + const Array2D& operand, float init, absl::Span window, + absl::Span stride, Padding padding); static std::unique_ptr> ReduceWindow3DAdd( - const Array3D& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding); + const Array3D& operand, float init, absl::Span window, + absl::Span stride, Padding padding); static std::unique_ptr> ReduceWindow4DAdd( - const Array4D& operand, float init, - const absl::Span& window, - const absl::Span& stride, Padding padding); + const Array4D& operand, float init, absl::Span window, + absl::Span stride, Padding padding); // Windowed reductions with a generic reduce function. static std::unique_ptr> ReduceWindow1DGeneric( - const absl::Span& operand, float init, + absl::Span operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, - const absl::Span>& padding); + absl::Span window, absl::Span stride, + absl::Span> padding); static std::unique_ptr> ReduceWindow2DGeneric( const Array2D& operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, - const absl::Span>& padding); + absl::Span window, absl::Span stride, + absl::Span> padding); static std::unique_ptr> ReduceWindow4DGeneric( const Array4D& operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, Padding padding); + absl::Span window, absl::Span stride, + Padding padding); // With arbitrary padding. static std::unique_ptr> ReduceWindow4DGeneric( const Array4D& operand, float init, const std::function& reduce_func, - const absl::Span& window, - const absl::Span& stride, - const absl::Span>& padding); + absl::Span window, absl::Span stride, + absl::Span> padding); // Batch normalize data. static std::unique_ptr> BatchNorm4D( @@ -230,8 +224,8 @@ class ReferenceUtil { // TODO(b/74533103) Switch tests to evaluator and remove this implementation. static std::unique_ptr> SelectAndScatter4DGePlus( const Array4D& operand, const Array4D& source, float init, - const absl::Span& window, - const absl::Span& stride, bool same_padding); + absl::Span window, absl::Span stride, + bool same_padding); // Concatenates the lhs and rhs arrays along the concatenate_dimension. // E.g. if concatenate_dimension is 0, the "n1"/height dimension is @@ -332,8 +326,8 @@ class ReferenceUtil { // Slices with index clamping template - static std::vector ClampSlice1D(const absl::Span& input, - int64 start, int64 size) { + static std::vector ClampSlice1D(absl::Span input, int64 start, + int64 size) { start = std::min(std::max(0, start), input.size() - size); std::vector result; for (int64 i = 0; i < size; ++i) { diff --git a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc index 40183de96e..9a61f8ac5a 100644 --- a/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc +++ b/tensorflow/compiler/xla/service/gpu/while_transformer_test.cc @@ -26,9 +26,6 @@ limitations under the License. namespace xla { namespace { -using ::testing::Eq; -using ::testing::HasSubstr; - class WhileTransformerTest : public HloTestBase { protected: WhileTransformerTest() diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index d52f4e5a61..4826bff19e 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -469,9 +469,8 @@ stylesheet=< string graph_label = StrCat(label_, "
Computation ", computation_->name()); if (computation_->IsFusionComputation()) { - StrAppend(&graph_label, - StrCat(" (in fusion instruction ", - computation_->FusionInstruction()->name(), ")")); + StrAppend(&graph_label, " (in fusion instruction ", + computation_->FusionInstruction()->name(), ")"); } if (profile_ != nullptr) { auto cycles = profile_->total_cycles_executed(*computation_); diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index d5de9650f1..63491a90bf 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -588,7 +588,7 @@ string R4ReduceWindowTestDataToString( // Test names are not allowed to contain the '-' character. std::replace(str.begin(), str.end(), '-', 'n'); if (::testing::get<1>(data.param)) { - str = absl::StrCat(str, "_bfloat16"); + absl::StrAppend(&str, "_bfloat16"); } return str; } @@ -980,7 +980,7 @@ string R3ReduceWindowTestDataToString( param.layout[0], "_", param.layout[1], "_", param.layout[2], "__reducer_", param.reducer == kAdd ? "add" : "max"); if (::testing::get<1>(data.param)) { - str = absl::StrCat(str, "_bfloat16"); + absl::StrAppend(&str, "_bfloat16"); } return str; } @@ -1121,7 +1121,7 @@ string R2ReduceWindowTestDataToString( param.layout[1], // "__reducer_", param.reducer == kAdd ? "add" : "max"); if (::testing::get<1>(data.param)) { - str = absl::StrCat(str, "_bfloat16"); + absl::StrAppend(&str, "_bfloat16"); } return str; } @@ -1322,7 +1322,7 @@ string R1ReduceWindowTestDataToString( "__pad_high_", absl::StrJoin(param.pad_high, "x"), "__reducer_", param.reducer == kAdd ? "add" : "max"); if (::testing::get<1>(data.param)) { - str = absl::StrCat(str, "_bfloat16"); + absl::StrAppend(&str, "_bfloat16"); } return str; } -- cgit v1.2.3 From 9ac00398d1c0e5f3f2e76dec15fa6646f5027633 Mon Sep 17 00:00:00 2001 From: Smokrow Date: Tue, 11 Sep 2018 17:26:16 +0200 Subject: Update of flat_map Rework based on Marks review --- tensorflow/python/data/ops/dataset_ops.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 14a1e3d803..2fc41a3b98 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1009,16 +1009,18 @@ class Dataset(object): def flat_map(self, map_func): """Maps `map_func` across this dataset and flattens the result. - Will produce similar results to `tf.data.Dataset.interleave(cycle_length=1)`. + `tf.data.Dataset.interleave()` is a generalization of `flat_map`, since + `flat_map` produces a similar outputs as `tf.data.Dataset.interleave(cycle_length=1)` + Use `flat_map` if you want to make sure, that the order of your dataset stays the same. - For example: + For example, to implement unbatch: ```python # NOTE: The following examples use `{ ... }` to represent the # contents of a dataset. '[...]' represents a tensor. a = {[1,2,3,4,5], [6,7,8,9], [10]} - a.flat_map(lambda x: Dataset.from_tensors(x)) == + a.flat_map(lambda x: Dataset.from_tensor_slices(x)) == {[1,2,3,4,5,6,7,8,9,10]} ``` Args: -- cgit v1.2.3 From de5ddd51e32c4630e63c0cb3e960c69f9ac77662 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Sep 2018 09:10:11 -0700 Subject: Add more description for a common use case of SequenceExample. PiperOrigin-RevId: 212462406 --- tensorflow/core/example/example.proto | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/example/example.proto b/tensorflow/core/example/example.proto index e7142a4ef9..e36e51d8d5 100644 --- a/tensorflow/core/example/example.proto +++ b/tensorflow/core/example/example.proto @@ -199,7 +199,13 @@ message Example { // to determine if all features within the FeatureList must // have the same size. The same holds for this FeatureList across multiple // examples. -// +// - For sequence modeling, e.g.: +// http://colah.github.io/posts/2015-08-Understanding-LSTMs/ +// https://github.com/tensorflow/nmt +// the feature lists represent a sequence of frames. +// In this scenario, all FeatureLists in a SequenceExample have the same +// number of Feature messages, so that the ith element in each FeatureList +// is part of the ith frame (or time step). // Examples of conformant and non-conformant examples' FeatureLists: // // Conformant FeatureLists: -- cgit v1.2.3 From 847b38406a28546991b62193278ee87910cd3d74 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 11 Sep 2018 09:31:42 -0700 Subject: TFTS: Fix an input statistics race condition The fix is straightforward enough, although the triggering circumstances are still a bit mysterious. The unit test did fail with ubsan prior to this CL, so I'm going to leave it at that for now. PiperOrigin-RevId: 212465732 --- .../contrib/timeseries/python/timeseries/estimators_test.py | 9 +++++++++ tensorflow/contrib/timeseries/python/timeseries/math_utils.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py index 461fe22210..83260fc59a 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators_test.py @@ -216,6 +216,15 @@ class TimeSeriesRegressorTest(test.TestCase): exogenous_feature_columns=exogenous_feature_columns) self._fit_restore_fit_test_template(_estimator_fn, dtype=dtype) + def test_structural_ensemble_numpy_input(self): + numpy_data = {"times": numpy.arange(50), + "values": numpy.random.normal(size=[50])} + estimators.StructuralEnsembleRegressor( + num_features=1, periodicities=[], model_dir=self.get_temp_dir(), + config=_SeedRunConfig()).train( + input_pipeline.WholeDatasetInputFn( + input_pipeline.NumpyReader(numpy_data)), + steps=1) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 9b593fecbb..03da2b82e5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -896,8 +896,8 @@ class InputStatisticsFromMiniBatch(object): statistics.total_observation_count, math_ops.cast( gen_math_ops.round( - math_ops.cast(auxiliary_variables.max_time_seen - - statistics.start_time + 1, self._dtype) / + math_ops.cast(max_time_seen_assign - + start_time_update + 1, self._dtype) / inter_observation_duration_estimate), dtypes.int64)) per_chunk_stat_updates = control_flow_ops.group( overall_feature_mean_update, overall_feature_var_update, -- cgit v1.2.3 From ac60b46e2c5962fd8099a4406c1788d826ad3c0d Mon Sep 17 00:00:00 2001 From: Yanan Cao Date: Tue, 11 Sep 2018 09:33:04 -0700 Subject: Automated rollback of commit 45965cfd8b54fb113275ffdaced5366e28aa3553 PiperOrigin-RevId: 212465918 --- tensorflow/compiler/jit/BUILD | 6 - .../compiler/jit/encapsulate_subgraphs_pass.cc | 17 - .../compiler/jit/encapsulate_subgraphs_pass.h | 6 - .../jit/encapsulate_xla_computations_pass.cc | 360 --------------------- .../jit/encapsulate_xla_computations_pass.h | 61 ---- .../jit/encapsulate_xla_computations_pass_test.cc | 346 -------------------- .../jit/jit_compilation_pass_registration.cc | 7 - tensorflow/compiler/jit/ops/xla_ops.cc | 19 -- tensorflow/compiler/tf2xla/BUILD | 1 - tensorflow/compiler/tf2xla/cc/BUILD | 4 +- tensorflow/compiler/tf2xla/test_util.cc | 8 - tensorflow/compiler/tf2xla/test_util.h | 16 - .../core/common_runtime/graph_execution_state.cc | 4 - .../core/grappler/optimizers/meta_optimizer.cc | 23 -- 14 files changed, 1 insertion(+), 877 deletions(-) delete mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc delete mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass.h delete mode 100644 tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc (limited to 'tensorflow') diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 352f63bc98..a989f15a1c 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -362,7 +362,6 @@ cc_library( "deadness_analysis.cc", "deadness_analysis_internal.h", "encapsulate_subgraphs_pass.cc", - "encapsulate_xla_computations_pass.cc", "mark_for_compilation_pass.cc", "mark_for_compilation_pass_test_helper.cc", "partially_decluster_pass.cc", @@ -371,7 +370,6 @@ cc_library( "build_xla_launch_ops_pass.h", "deadness_analysis.h", "encapsulate_subgraphs_pass.h", - "encapsulate_xla_computations_pass.h", "mark_for_compilation_pass.h", "mark_for_compilation_pass_test_helper.h", "partially_decluster_pass.h", @@ -398,7 +396,6 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels:bounds_check", "@com_google_absl//absl/algorithm:container", - "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], ) @@ -477,7 +474,6 @@ tf_cc_test( size = "small", srcs = [ "encapsulate_subgraphs_pass_test.cc", - "encapsulate_xla_computations_pass_test.cc", "mark_for_compilation_pass_test.cc", "partially_decluster_pass_test.cc", ], @@ -493,9 +489,7 @@ tf_cc_test( "//tensorflow/cc:resource_variable_ops", "//tensorflow/cc:sendrecv_ops", "//tensorflow/compiler/jit/kernels:xla_launch_op", - "//tensorflow/compiler/tf2xla:test_util", "//tensorflow/compiler/tf2xla:xla_compiler", - "//tensorflow/compiler/tf2xla/cc:xla_jit_ops", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index e0632ff7e4..ae7a22f451 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include -#include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/jit/graphcycles/graphcycles.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" @@ -59,22 +58,6 @@ const char* const kXlaNumResourceArgsAttr = "_XlaNumResourceArgs"; const char* const kXlaHostTransferSequencerAttr = "_xla_host_transfer_sequencer"; -void SortControlInputs(GraphDef* gdef) { - int64 num_nodes = gdef->node_size(); - for (int64 i = 0; i < num_nodes; ++i) { - NodeDef* node = gdef->mutable_node(i); - // Stable sort control inputs and leave the order of data inputs unchanged. - std::stable_sort(node->mutable_input()->begin(), - node->mutable_input()->end(), - [](const string& a, const string& b) { - bool a_is_control = absl::StartsWith(a, "^"); - bool b_is_control = absl::StartsWith(b, "^"); - return (!a_is_control && b_is_control) || - (a_is_control && b_is_control && a < b); - }); - } -} - namespace { bool AreAllParentsGuaranteedConst( diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h index 90354a801a..926589546f 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.h @@ -102,12 +102,6 @@ extern const char* const kXlaNumConstantArgsAttr; // Name of the attribute containing the number of resource variable arguments. extern const char* const kXlaNumResourceArgsAttr; -// Sorts each node's control inputs by their names. This guarantees that for two -// structually equivalent GraphDefs, we get the same traversal ordering on -// node's control input fields. -// TODO(hpucha): Move the utilities to a more appropriate place. -void SortControlInputs(GraphDef* gdef); - class EncapsulateSubgraphsPass : public GraphOptimizationPass { public: Status Run(const GraphOptimizationPassOptions& options) override; diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc deleted file mode 100644 index 97ef8cd3cb..0000000000 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc +++ /dev/null @@ -1,360 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h" - -#include "absl/memory/memory.h" -#include "absl/strings/str_cat.h" -#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" -#include "tensorflow/compiler/tf2xla/dump_graph.h" -#include "tensorflow/compiler/xla/status_macros.h" -#include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/lib/gtl/flatset.h" -#include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/lib/strings/proto_serialization.h" -#include "tensorflow/core/lib/strings/str_util.h" -#include "tensorflow/core/platform/fingerprint.h" - -namespace tensorflow { - -const char* const EncapsulateXlaComputationsPass::kXlaClusterAttr = - "_xla_compile_id"; - -namespace { - -const char* const kXlaClusterOutput = "XlaClusterOutput"; - -// Checks if a graph node is marked to be a guaranteed constant. -bool is_guaranteed_constant(const Node& n) { - bool guaranteed_constant = false; - if (!GetNodeAttr(n.attrs(), "_is_guaranteed_constant", &guaranteed_constant) - .ok()) { - return false; - } - return guaranteed_constant; -} - -// Finds the `index` of an _Arg or _Retval node. -Status GetIndexAttr(const Node& n, int num_args, int* index) { - TF_RETURN_IF_ERROR(GetNodeAttr(n.attrs(), "index", index)); - if (*index < 0 || *index >= num_args) { - return errors::InvalidArgument("Invalid ", n.type_string(), " number ", - *index); - } - return Status::OK(); -} - -// Returns the data type of the destination of an edge. -DataType EdgeType(const Edge* edge) { - return edge->dst()->input_type(edge->dst_input()); -} - -// Adds the control inputs of `node` to `*deps`. -void AddControlInputs(const Node& node, gtl::FlatSet* deps) { - for (const Edge* edge : node.in_edges()) { - if (edge->IsControlEdge()) { - deps->insert(edge->src()); - } - } -} - -// Adds the control outputs of `node` to `*deps`. -void AddControlOutputs(const Node& node, gtl::FlatSet* deps) { - for (const Edge* edge : node.out_edges()) { - if (edge->IsControlEdge()) { - deps->insert(edge->dst()); - } - } -} - -// Rewrite function to be passed to EncapsulateSubgraphsInFunctions that sorts -// the arguments into the order expected by XlaLaunch computations: -// 1) arguments -// 2) resource variable arguments -// See the documentation of EncapsulateSubgraphsInFunctions for the meaning -// of the arguments. -// -// TODO(b/113166435): Ordering constraints on XlaLaunch op can be relaxed. -Status RewriteSubgraph(const std::vector& arg_source_tensors, - std::unique_ptr* graph_ptr, - std::vector* input_permutation, - std::vector* output_permutation, - NodeDef* call_def) { - Graph* graph = graph_ptr->get(); - const int num_args = input_permutation->size(); - const int num_retvals = output_permutation->size(); - - std::vector args; - std::vector retvals; - args.reserve(num_args); - retvals.reserve(num_retvals); - for (Node* n : graph->nodes()) { - if (n->type_string() == "_Arg") { - // Check if this is a guaranteed constant. - if (is_guaranteed_constant(*n)) { - return errors::InvalidArgument( - "Guaranteed constants are not supported (", n->name(), ")"); - } - args.push_back(n); - } else if (n->type_string() == "_Retval") { - retvals.push_back(n); - } - } - - if (std::find(args.begin(), args.end(), nullptr) != args.end()) { - return errors::InvalidArgument("Missing or non-consecutive arguments"); - } - - // Reorders the arguments. - std::sort(args.begin(), args.end(), [&](Node* a, Node* b) { - // Non-resources appear before resources - bool a_is_resource = (a->output_type(0) == DT_RESOURCE); - bool b_is_resource = (b->output_type(0) == DT_RESOURCE); - // Uses the name as a tiebreaker so the output is deterministic. - StringPiece a_name(a->name()); - StringPiece b_name(b->name()); - return std::tie(a_is_resource, a_name) < std::tie(b_is_resource, b_name); - }); - - // Sorts the retvals by name so the order is deterministic. - std::sort(retvals.begin(), retvals.end(), - [](Node* a, Node* b) { return a->name() < b->name(); }); - - // Computes the permutation to produce the correct argument order, and update - // the argument indices. - int variable_start_index = num_args; - for (int i = 0; i < num_args; ++i) { - int index; - TF_RETURN_IF_ERROR(GetIndexAttr(*args[i], num_args, &index)); - if (args[i]->output_type(0) == DT_RESOURCE && - variable_start_index == num_args) { - variable_start_index = i; - } - (*input_permutation)[index] = i; - args[i]->AddAttr("index", i); - } - VLOG(4) << "variable_start_index: " << variable_start_index; - - // Computes the permutation to produce the correct retval order, and update - // the argument indices. - for (int i = 0; i < num_retvals; ++i) { - int index; - TF_RETURN_IF_ERROR(GetIndexAttr(*retvals[i], num_retvals, &index)); - (*output_permutation)[index] = i; - retvals[i]->AddAttr("index", i); - } - - AddNodeAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, call_def->name(), - call_def); - AddNodeAttr("_variable_start_index", variable_start_index, call_def); - - // Uniquify the function name. - GraphDef gdef; - graph->ToGraphDef(&gdef); - - // Before serialization, sort each node's control inputs to achieve - // determinism. Sorting control inputs could help (but not necessarily) create - // a deterministic serialization and fingerprint. Other sources of - // nondeterminism include unstable node ordering. - SortControlInputs(&gdef); - // Fingerprint the function. - // Nondeterminism in serialization would not lead to incorrect results, but - // may cause spurious cache misses. DeterministicSerialization is a - // best-effort deterministic serialization. - string serialized; - TF_RET_CHECK(SerializeToStringDeterministic(gdef, &serialized)); - uint64 fingerprint = Fingerprint64(serialized); - LOG(INFO) << "Subgraph fingerprint:" << fingerprint; - call_def->set_op(absl::StrCat(call_def->op(), "_", fingerprint)); - return Status::OK(); -} - -} // namespace - -/*static*/ Status EncapsulateXlaComputationsPass::Encapsulate( - std::unique_ptr* graph, FunctionLibraryDefinition* flib_def) { - // Check for undeclared outputs before Encapsulation, so we can give a better - // error message. - // TODO(phawkins): merge this with the encapsulation code to avoid the extra - // O(n) pass over the edges. - for (const Edge* e : (*graph)->edges()) { - if (!e->IsControlEdge() && - e->src()->attrs().Find(kXlaClusterAttr) != nullptr && - e->dst()->attrs().Find(kXlaClusterAttr) == nullptr && - e->dst()->type_string() != kXlaClusterOutput) { - return errors::InvalidArgument( - "Undeclared output of XLA computation. A common cause of this error " - "is variable initializers that depend on the XLA computation. Edge: ", - e->src()->name(), ":", e->src_output(), " -> ", e->dst()->name(), ":", - e->dst_input()); - } - } - - auto output = absl::make_unique((*graph)->op_registry()); - TF_RETURN_WITH_CONTEXT_IF_ERROR( - EncapsulateSubgraphsInFunctions( - kXlaClusterAttr, "", **graph, RewriteSubgraph, - /*reuse_existing_functions=*/true, &output, flib_def), - "EncapsulateXlaComputationsPass failed"); - graph->swap(output); - return Status::OK(); -} - -/*static*/ Status EncapsulateXlaComputationsPass::BuildXlaLaunchOps( - Graph* graph) { - // Finds all of the XlaLaunch function calls, to avoid mutating the graph - // while iterating. - std::vector launch_nodes; - for (Node* n : graph->nodes()) { - string name; - if (GetNodeAttr(n->attrs(), kXlaClusterAttr, &name).ok()) { - launch_nodes.push_back(n); - } - } - - // Replaces each launch function call together with its neighboring - // XlaClusterOutput nodes with a XlaLaunch node. - for (Node* launch : launch_nodes) { - int variable_start_index; - TF_RETURN_IF_ERROR(GetNodeAttr(launch->attrs(), "_variable_start_index", - &variable_start_index)); - - std::vector in_edges; - TF_RETURN_IF_ERROR(launch->input_edges(&in_edges)); - - const int num_inputs = in_edges.size(); - const int num_variables = num_inputs - variable_start_index; - const int num_args = variable_start_index; - - VLOG(4) << "Launch node '" << launch->name() << "'" - << " input edges: " << in_edges.size() << " num_args: " << num_args - << " num_variables: " << num_variables; - - std::vector nodes_to_remove = {launch}; - - // Data and control inputs to the new XlaLaunch node. - std::vector> data_inputs(num_inputs); - gtl::FlatSet control_inputs; - DataTypeVector arg_types(num_args); - - AddControlInputs(*launch, &control_inputs); - - for (int i = 0; i < num_args; ++i) { - const Edge* edge = in_edges[i]; - data_inputs[i] = {edge->src(), edge->src_output()}; - arg_types[i] = EdgeType(edge); - } - - // Appends the variable inputs. - for (int i = 0; i < num_variables; ++i) { - int pos = variable_start_index + i; - const Edge* edge = in_edges[pos]; - data_inputs[pos] = {edge->src(), edge->src_output()}; - } - - // Outputs. - const int num_outputs = launch->output_types().size(); - gtl::FlatSet control_outputs; - std::vector>> data_outputs(num_outputs); - DataTypeVector output_types(num_outputs); - - for (const Edge* le : launch->out_edges()) { - if (le->IsControlEdge()) { - control_outputs.insert(le->dst()); - } else { - TF_RET_CHECK(le->src_output() < num_outputs); - Node* output_node = le->dst(); - - TF_RET_CHECK(output_node->type_string() == kXlaClusterOutput) - << le->DebugString(); - nodes_to_remove.push_back(output_node); - - for (const Edge* oe : output_node->out_edges()) { - TF_RET_CHECK(!oe->IsControlEdge()); - data_outputs[le->src_output()].push_back( - {oe->dst(), oe->dst_input()}); - } - output_types[le->src_output()] = output_node->input_type(0); - - AddControlOutputs(*output_node, &control_outputs); - } - } - - NodeDef def; - def.set_name(launch->name()); - - // Target the XLA CPU/GPU backends. - VLOG(2) << "Replacing with XlaLaunch"; - def.set_op("XlaLaunch"); - AddNodeAttr("Tconstants", DataTypeVector{}, &def); - AddNodeAttr("Targs", arg_types, &def); - AddNodeAttr("Nresources", num_variables, &def); - AddNodeAttr("Tresults", output_types, &def); - NameAttrList function; - function.set_name(launch->type_string()); - AddNodeAttr("function", function, &def); - - for (Node* node : nodes_to_remove) { - VLOG(2) << "Deleting node " << node->DebugString(); - // Ensure that we do not attempt to add control edges to nodes that are - // deleted. - control_inputs.erase(node); - control_outputs.erase(node); - graph->RemoveNode(node); - } - - Status status; - Node* xla_launch = graph->AddNode(def, &status); - if (!status.ok()) { - return status; - } - for (int i = 0; i < data_inputs.size(); ++i) { - graph->AddEdge(data_inputs[i].first, data_inputs[i].second, xla_launch, - i); - } - for (Node* n : control_inputs) { - graph->AddControlEdge(n, xla_launch); - } - for (int i = 0; i < data_outputs.size(); ++i) { - for (const auto& successor : data_outputs[i]) { - graph->AddEdge(xla_launch, i, successor.first, successor.second); - } - } - for (Node* n : control_outputs) { - graph->AddControlEdge(xla_launch, n); - } - } - return Status::OK(); -} - -Status EncapsulateXlaComputationsPass::Run( - const GraphOptimizationPassOptions& options) { - VLOG(1) << "EncapsulateXlaComputations(): " - << dump_graph::DumpGraphToFile("encapsulate_xla_computations_before", - **options.graph, options.flib_def); - - TF_RETURN_IF_ERROR(Encapsulate(options.graph, options.flib_def)); - VLOG(1) << "EncapsulateXlaComputations() half-way: " - << dump_graph::DumpGraphToFile("encapsulate_xla_computations_halfway", - **options.graph, options.flib_def); - - TF_RETURN_IF_ERROR(BuildXlaLaunchOps(options.graph->get())); - VLOG(1) << "EncapsulateXlaComputations() finished: " - << dump_graph::DumpGraphToFile("encapsulate_xla_computations_after", - **options.graph, options.flib_def); - return Status::OK(); -} - -} // namespace tensorflow diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h deleted file mode 100644 index c8bb4dc114..0000000000 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.h +++ /dev/null @@ -1,61 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// Rewrites computations generated by the xla.compile() Python code into -// XlaLaunch nodes. -// -// xla.compile() does two main things: -// a) marks operators that make up a XLA computation with the attribute -// _xla_compile_id=XYZ, where XYZ is a unique key. -// b) adds XlaClusterOutput nodes to represent outputs of the computation. -// These nodes are not marked with the _xla_compile_id attribute. - -#ifndef TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_ -#define TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_ - -#include "tensorflow/core/common_runtime/optimization_registry.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/platform/env.h" - -namespace tensorflow { - -// Encapsulates nodes marked with the _xla_compile_id attribute into -// XlaLaunch operators. -class EncapsulateXlaComputationsPass : public GraphOptimizationPass { - public: - static const char* const kXlaClusterAttr; // _xla_compile_id - - Status Run(const GraphOptimizationPassOptions& options) override; - - // The following methods are public only for unit tests. - - // This pass has two stages: - // a) first, we call EncapsulateSubgraphsPass to encapsulate all nodes - // marked with the same _xla_compile_id attribute into functions. These - // functions contain the computations to be passed to XlaLaunch. During - // encapsulation, we sort the arguments into the order expected by - // XlaLaunch. - static Status Encapsulate(std::unique_ptr* graph, - FunctionLibraryDefinition* flib_def); - - // b) we rewrite the function calls generated in phase (a) into XlaLaunch - // operators. We also convert the XlaClusterOutput output nodes of the - // function call into the outputs of the XlaLaunch operator. - static Status BuildXlaLaunchOps(Graph* graph); -}; - -} // namespace tensorflow - -#endif // TENSORFLOW_COMPILER_JIT_ENCAPSULATE_XLA_COMPUTATIONS_PASS_H_ diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc deleted file mode 100644 index f643fb0cfe..0000000000 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc +++ /dev/null @@ -1,346 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h" - -#include "tensorflow/cc/ops/function_ops.h" -#include "tensorflow/cc/ops/resource_variable_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" -#include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_op.h" -#include "tensorflow/compiler/tf2xla/test_util.h" -#include "tensorflow/core/framework/graph_to_functiondef.h" -#include "tensorflow/core/graph/graph_constructor.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/lib/hash/hash.h" -#include "tensorflow/core/lib/strings/proto_serialization.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/util/equal_graph_def.h" -#include "tensorflow/core/util/ptr_util.h" - -namespace tensorflow { - -static std::unique_ptr MakeOuterGraph( - const FunctionLibraryDefinition& flib_def, const string& function) { - Scope scope = Scope::NewRootScope().ExitOnError(); - TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib_def.ToProto())); - - auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32); - auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT); - auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); - auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); - auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE); - auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); - auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); - - NodeDef def; - TF_CHECK_OK( - NodeDefBuilder("launch0", function, &flib_def) - .Input(a.node()->name(), 0, DT_INT32) - .Input(b.node()->name(), 0, DT_FLOAT) - .Input(c.node()->name(), 0, DT_INT32) - .Input(d.node()->name(), 0, DT_FLOAT) - .Input(u.node()->name(), 0, DT_RESOURCE) - .Input(v.node()->name(), 0, DT_RESOURCE) - .Input(w.node()->name(), 0, DT_RESOURCE) - .Attr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0") - .Attr("_variable_start_index", 4) - .Finalize(&def)); - - Status status; - Node* launch = scope.graph()->AddNode(def, &status); - TF_CHECK_OK(status); - TF_CHECK_OK(scope.DoShapeInference(launch)); - scope.graph()->AddEdge(a.node(), 0, launch, 0); - scope.graph()->AddEdge(b.node(), 0, launch, 1); - scope.graph()->AddEdge(c.node(), 0, launch, 2); - scope.graph()->AddEdge(d.node(), 0, launch, 3); - scope.graph()->AddEdge(u.node(), 0, launch, 4); - scope.graph()->AddEdge(v.node(), 0, launch, 5); - scope.graph()->AddEdge(w.node(), 0, launch, 6); - - auto out0 = - ops::XlaClusterOutput(scope.WithOpName("Out0"), Output(launch, 0)); - auto out1 = - ops::XlaClusterOutput(scope.WithOpName("Out1"), Output(launch, 1)); - auto out2 = - ops::XlaClusterOutput(scope.WithOpName("Out2"), Output(launch, 2)); - auto out3 = - ops::XlaClusterOutput(scope.WithOpName("Out3"), Output(launch, 3)); - - auto consumer0_a = ops::Identity(scope.WithOpName("consumer0_a"), out0); - auto consumer0_b = ops::Identity(scope.WithOpName("consumer0_b"), out0); - auto consumer0_c = ops::Identity(scope.WithOpName("consumer0_c"), out0); - auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1); - auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2); - auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3); - - std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(scope.ToGraph(graph.get())); - return graph; -} - -// Makes an encapsulate body graph for use in tests. -static std::unique_ptr MakeBodyGraph() { - Scope scope = Scope::NewRootScope().ExitOnError(); - - auto arg0 = ops::_Arg(scope.WithOpName("a_0_arg"), DT_INT32, 0); - auto arg1 = ops::_Arg(scope.WithOpName("b_0_arg"), DT_FLOAT, 1); - auto arg2 = ops::_Arg(scope.WithOpName("c_0_arg"), DT_INT32, 2); - auto arg3 = ops::_Arg(scope.WithOpName("d_0_arg"), DT_FLOAT, 3); - - auto arg4 = ops::_Arg(scope.WithOpName("u_0_arg"), DT_RESOURCE, 4); - auto arg5 = ops::_Arg(scope.WithOpName("v_0_arg"), DT_RESOURCE, 5); - auto arg6 = ops::_Arg(scope.WithOpName("w_0_arg"), DT_RESOURCE, 6); - - auto add_attrs = [](Node* node) { - node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0"); - }; - - auto b_identity = ops::Identity(scope.WithOpName("B_identity"), arg1); - - auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), arg4, DT_FLOAT); - add_attrs(read_u.node()); - auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), arg5, DT_FLOAT); - add_attrs(read_v.node()); - auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), arg6, DT_FLOAT); - add_attrs(read_w.node()); - - auto e = ops::Add(scope.WithOpName("E"), arg0, arg2); - add_attrs(e.node()); - auto f = ops::Add(scope.WithOpName("F"), read_v, read_w); - add_attrs(f.node()); - auto g = ops::Add(scope.WithOpName("G"), f, arg3); - add_attrs(g.node()); - - auto out0 = ops::_Retval(scope.WithOpName("b_identity_0_retval_RetVal"), - b_identity, 0); - auto out1 = ops::_Retval(scope.WithOpName("e_0_retval_RetVal"), e, 1); - auto out2 = ops::_Retval(scope.WithOpName("g_0_retval_RetVal"), g, 2); - auto out3 = - ops::_Retval(scope.WithOpName("readu_0_retval_RetVal"), read_u, 3); - - std::unique_ptr graph(new Graph(OpRegistry::Global())); - TF_CHECK_OK(scope.ToGraph(graph.get())); - return graph; -} - -TEST(EncapsulateXlaComputations, DeterministicEncapsulate) { - // Test that control edge insertion order doesn't affect the cache key - // (cluster name) generated by TPU encapsulate pass. - auto get_serialized_graph = [](bool control_input_reversed, - bool operand_reversed) -> string { - FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); - std::unique_ptr graph(new Graph(&flib_def)); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto a0 = ops::Placeholder(scope.WithOpName("A0"), DT_INT32); - auto a1 = ops::Placeholder(scope.WithOpName("A1"), DT_INT32); - - ops::Add e = operand_reversed ? ops::Add(scope.WithOpName("E"), a0, a1) - : ops::Add(scope.WithOpName("E"), a1, a0); - - auto add_attrs = [](Node* node) { - node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, - "launch0"); - }; - add_attrs(e.node()); - - TF_CHECK_OK(scope.ToGraph(graph.get())); - auto get_node_in_graph = [&graph](Node* node) { - return graph->FindNodeId(node->id()); - }; - // Insert control edge in different order. The order should not affect - // the encapsulated or serialized graph. - if (!control_input_reversed) { - graph->AddControlEdge(get_node_in_graph(a0.node()), - get_node_in_graph(e.node()), true); - graph->AddControlEdge(get_node_in_graph(a1.node()), - get_node_in_graph(e.node()), true); - } else { - graph->AddControlEdge(get_node_in_graph(a1.node()), - get_node_in_graph(e.node()), true); - graph->AddControlEdge(get_node_in_graph(a0.node()), - get_node_in_graph(e.node()), true); - } - } - TF_CHECK_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def)); - GraphDef gdef; - graph->ToGraphDef(&gdef); - // Before serialization, sort control inputs first to remove - // nondeterminism. - SortControlInputs(&gdef); - string serialized; - SerializeToStringDeterministic(gdef, &serialized); - return serialized; - }; - - // Changing the order of control input shouldn't affect the graph generated. - EXPECT_EQ(get_serialized_graph(/*control_input_reversed=*/true, - /*operand_reversed=*/false), - get_serialized_graph(/*control_input_reversed=*/false, - /*operand_reversed=*/false)); - - // Changing the order of data input should affect the graph generated. - EXPECT_NE(get_serialized_graph(/*control_input_reversed=*/false, - /*operand_reversed=*/true), - get_serialized_graph(/*control_input_reversed=*/false, - /*operand_reversed=*/false)); -} - -TEST(EncapsulateXlaComputations, Encapsulate) { - FunctionLibraryDefinition flib_def(OpRegistry::Global(), {}); - std::unique_ptr graph(new Graph(&flib_def)); - { - Scope scope = Scope::NewRootScope().ExitOnError(); - auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32); - auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT); - auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); - auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); - auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE); - auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); - auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); - - auto add_attrs = [](Node* node) { - node->AddAttr(EncapsulateXlaComputationsPass::kXlaClusterAttr, "launch0"); - }; - - auto b_identity = ops::Identity(scope.WithOpName("B_identity"), b); - add_attrs(b_identity.node()); - - auto read_u = ops::ReadVariableOp(scope.WithOpName("ReadU"), u, DT_FLOAT); - add_attrs(read_u.node()); - auto read_v = ops::ReadVariableOp(scope.WithOpName("ReadV"), v, DT_FLOAT); - add_attrs(read_v.node()); - auto read_w = ops::ReadVariableOp(scope.WithOpName("ReadW"), w, DT_FLOAT); - add_attrs(read_w.node()); - - auto e = ops::Add(scope.WithOpName("E"), a, c); - add_attrs(e.node()); - auto f = ops::Add(scope.WithOpName("F"), read_v, read_w); - add_attrs(f.node()); - auto g = ops::Add(scope.WithOpName("G"), f, d); - add_attrs(g.node()); - - auto out0 = ops::XlaClusterOutput(scope.WithOpName("Out0"), b_identity); - auto out1 = ops::XlaClusterOutput(scope.WithOpName("Out1"), e); - auto out2 = ops::XlaClusterOutput(scope.WithOpName("Out2"), g); - auto out3 = ops::XlaClusterOutput(scope.WithOpName("Out3"), read_u); - - auto consumer0_a = ops::Identity(scope.WithOpName("consumer0_a"), out0); - auto consumer0_b = ops::Identity(scope.WithOpName("consumer0_b"), out0); - auto consumer0_c = ops::Identity(scope.WithOpName("consumer0_c"), out0); - auto consumer1 = ops::Identity(scope.WithOpName("consumer1"), out1); - auto consumer2 = ops::Identity(scope.WithOpName("consumer2"), out2); - auto consumer3 = ops::Identity(scope.WithOpName("consumer3"), out3); - TF_ASSERT_OK(scope.ToGraph(graph.get())); - } - - std::unique_ptr graph_copy(new Graph(&flib_def)); - CopyGraph(*graph, graph_copy.get()); - - TF_ASSERT_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def)); - - std::unordered_map index = BuildNodeIndex(*graph); - string function = index.at("launch0")->type_string(); - - // Tests the outer graph is as expected. - { - std::unique_ptr outer = MakeOuterGraph(flib_def, function); - GraphDef expected_def; - outer->ToGraphDef(&expected_def); - - GraphDef actual_def; - graph->ToGraphDef(&actual_def); - TF_EXPECT_GRAPH_EQ_INTERNAL(expected_def, actual_def); - } - - // Tests the encapsulated body graph is as expected. - { - std::unique_ptr body = MakeBodyGraph(); - GraphDef expected_body_def; - body->ToGraphDef(&expected_body_def); - - InstantiationResultForTest result; - TF_EXPECT_OK(InstantiateFunctionForTest(function, flib_def, &result)); - - EXPECT_EQ((DataTypeVector{DT_INT32, DT_FLOAT, DT_INT32, DT_FLOAT, - DT_RESOURCE, DT_RESOURCE, DT_RESOURCE}), - result.arg_types); - EXPECT_EQ((DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}), - result.ret_types); - TF_EXPECT_GRAPH_EQ(expected_body_def, result.gdef); - } - - // Encapsulates the same computation again, verifies we reuse the same - // function. Encapsulation should be deterministic to avoid recompilation. - TF_ASSERT_OK( - EncapsulateXlaComputationsPass::Encapsulate(&graph_copy, &flib_def)); - std::unordered_map index_copy = BuildNodeIndex(*graph_copy); - string function_copy = index_copy.at("launch0")->type_string(); - EXPECT_EQ(function, function_copy); -} - -TEST(EncapsulateXlaComputations, BuildXlaLaunchOp) { - std::unique_ptr body_graph = MakeBodyGraph(); - FunctionDefLibrary flib; - TF_ASSERT_OK(GraphToFunctionDef(*body_graph, "launch0", flib.add_function())); - - FunctionLibraryDefinition flib_def(OpRegistry::Global(), flib); - - std::unique_ptr graph = MakeOuterGraph(flib_def, "launch0"); - TF_ASSERT_OK(EncapsulateXlaComputationsPass::BuildXlaLaunchOps(graph.get())); - - Scope scope = Scope::DisabledShapeInferenceScope().ExitOnError(); - TF_EXPECT_OK(scope.graph()->AddFunctionLibrary(flib)); - - auto a = ops::Placeholder(scope.WithOpName("A"), DT_INT32); - auto b = ops::Placeholder(scope.WithOpName("B"), DT_FLOAT); - auto c = ops::Placeholder(scope.WithOpName("C"), DT_INT32); - auto d = ops::Placeholder(scope.WithOpName("D"), DT_FLOAT); - auto u = ops::Placeholder(scope.WithOpName("U"), DT_RESOURCE); - auto v = ops::Placeholder(scope.WithOpName("V"), DT_RESOURCE); - auto w = ops::Placeholder(scope.WithOpName("W"), DT_RESOURCE); - - NameAttrList function; - function.set_name("launch0"); - auto launch = ops::XlaLaunch( - scope.WithOpName("launch0"), std::initializer_list{}, - std::initializer_list{a, b, c, d}, - std::initializer_list{u, v, w}, - DataTypeVector{DT_FLOAT, DT_INT32, DT_FLOAT, DT_FLOAT}, function); - - auto consumer0_a = - ops::Identity(scope.WithOpName("consumer0_a"), launch.results[0]); - auto consumer0_b = - ops::Identity(scope.WithOpName("consumer0_b"), launch.results[0]); - auto consumer0_c = - ops::Identity(scope.WithOpName("consumer0_c"), launch.results[0]); - auto consumer1 = - ops::Identity(scope.WithOpName("consumer1"), launch.results[1]); - auto consumer2 = - ops::Identity(scope.WithOpName("consumer2"), launch.results[2]); - auto consumer3 = - ops::Identity(scope.WithOpName("consumer3"), launch.results[3]); - - GraphDef expected_def; - TF_ASSERT_OK(scope.ToGraphDef(&expected_def)); - - GraphDef actual_def; - graph->ToGraphDef(&actual_def); - TF_EXPECT_GRAPH_EQ(expected_def, actual_def); -} - -} // namespace tensorflow diff --git a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc index 315fcb2fa7..c37b6112cc 100644 --- a/tensorflow/compiler/jit/jit_compilation_pass_registration.cc +++ b/tensorflow/compiler/jit/jit_compilation_pass_registration.cc @@ -15,19 +15,12 @@ limitations under the License. #include "tensorflow/compiler/jit/build_xla_launch_ops_pass.h" #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" -#include "tensorflow/compiler/jit/encapsulate_xla_computations_pass.h" #include "tensorflow/compiler/jit/mark_for_compilation_pass.h" #include "tensorflow/compiler/jit/partially_decluster_pass.h" #include "tensorflow/core/common_runtime/optimization_registry.h" namespace tensorflow { -// EncapsulateXlaComputationsPass rewrites computations generated by the -// xla.compile() Python code into XlaLaunch nodes. -REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 26, - EncapsulateXlaComputationsPass); - -// The following POST_REWRITE passes support auto-clustering to enable XLA. REGISTER_OPTIMIZATION(OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, 10, MarkForCompilationPass); diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc index 1a29c3caab..f2473d98ff 100644 --- a/tensorflow/compiler/jit/ops/xla_ops.cc +++ b/tensorflow/compiler/jit/ops/xla_ops.cc @@ -13,14 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" namespace tensorflow { -using shape_inference::InferenceContext; - REGISTER_OP("XlaLaunch") .Input("constants: Tconstants") .Attr("Tconstants: list(type) >= 0") @@ -36,19 +32,4 @@ REGISTER_OP("XlaLaunch") .SetIsStateful() .Doc("XLA Launch Op. For use by the XLA JIT only."); -REGISTER_OP("XlaClusterOutput") - .Input("input: T") - // Note: when replication is supported, this op will have N outputs. - .Output("outputs: T") - .Attr("T: type") - .SetShapeFn([](InferenceContext* c) { - for (int i = 0; i < c->num_outputs(); ++i) { - c->set_output(i, c->input(0)); - } - return Status::OK(); - }) - .Doc( - "Operator that connects the output of an XLA computation to other " - "consumer graph nodes."); - } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 74b131e07e..ab289a2b6c 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -594,7 +594,6 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ], diff --git a/tensorflow/compiler/tf2xla/cc/BUILD b/tensorflow/compiler/tf2xla/cc/BUILD index 8ac5eb5df9..ea8d1b3d14 100644 --- a/tensorflow/compiler/tf2xla/cc/BUILD +++ b/tensorflow/compiler/tf2xla/cc/BUILD @@ -31,9 +31,7 @@ cc_library( tf_gen_op_wrapper_cc( name = "xla_jit_op_gen", out_ops_file = "ops/xla_jit_op", - deps = [ - "//tensorflow/compiler/jit/ops:xla_ops", - ], + deps = ["//tensorflow/compiler/jit/ops:xla_ops"], ) cc_library( diff --git a/tensorflow/compiler/tf2xla/test_util.cc b/tensorflow/compiler/tf2xla/test_util.cc index f31bfb45a2..3c6c9a91b6 100644 --- a/tensorflow/compiler/tf2xla/test_util.cc +++ b/tensorflow/compiler/tf2xla/test_util.cc @@ -40,12 +40,4 @@ Status InstantiateFunctionForTest(const string& name, return Status::OK(); } -std::unordered_map BuildNodeIndex(const Graph& graph) { - std::unordered_map index; - for (Node* node : graph.nodes()) { - index[node->name()] = node; - } - return index; -} - } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/test_util.h b/tensorflow/compiler/tf2xla/test_util.h index 350a868568..e6e4ae92ed 100644 --- a/tensorflow/compiler/tf2xla/test_util.h +++ b/tensorflow/compiler/tf2xla/test_util.h @@ -24,10 +24,8 @@ limitations under the License. #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph.pb.h" -#include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/util/equal_graph_def.h" namespace tensorflow { @@ -44,20 +42,6 @@ Status InstantiateFunctionForTest(const string& name, const FunctionLibraryDefinition& library, InstantiationResultForTest* result); -// Builds a map from node name to Node* for `graph`. -std::unordered_map BuildNodeIndex(const Graph& graph); - } // namespace tensorflow -// Variant of TF_EXPECT_GRAPH_EQ that also compares internal attributes for -// equality. -#define TF_EXPECT_GRAPH_EQ_INTERNAL(expected, actual) \ - do { \ - string diff; \ - EqualGraphDefOptions eq_options; \ - eq_options.ignore_internal_attrs = false; \ - EXPECT_TRUE(EqualGraphDef(actual, expected, &diff, eq_options)) \ - << diff << "\nActual: " << SummarizeGraphDef(actual); \ - } while (false) - #endif // TENSORFLOW_COMPILER_TF2XLA_TEST_UTIL_H_ diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index 4475fa979e..7f260b3139 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -561,10 +561,6 @@ Status GraphExecutionState::OptimizeGraph( grappler::GrapplerItem item; item.id = "tf_graph"; graph_->ToGraphDef(&item.graph); - // TODO(b/114748242): Add a unit test to test this bug fix. - if (flib_def_) { - *item.graph.mutable_library() = flib_def_->ToProto(); - } item.fetch.insert(item.fetch.end(), options.callable_options.fetch().begin(), diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index b75d6303b4..a5fd33d28b 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -72,16 +72,6 @@ bool IsRunOnceOptimizer(const string& name) { name == "loop_optimizer"; } -// Check if the graphdef contains nodes that indicate TPU execution. -bool IsTPUGraphDef(const GraphDef& def) { - for (auto node : def.node()) { - if (node.op() == "TPUCompile" || node.op() == "TPUPartitionedCall") { - return true; - } - } - return false; -} - } // namespace #define MK_OPT(NAME, VALUE) \ @@ -346,19 +336,6 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // 1. Optimize main graph TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph)); - // Skip optimizing functions if this is a TPU graph. Currently, Grappler - // passes do not handle TPU functions correctly in a variety of ways (Note - // that due to the pre-placement TPU graph rewriting passes, the TPU-related - // ops are encapsulated away into functions). For example, TPU graphs contain - // TPUReplicateMetadata node that carries relevant TPU metadata and Grappler - // passes could prune that away. Grappler passes could also cause issues - // around shape inference. Since the desired and existing behavior is to not - // optimize TPU functions with Grappler, this check preserves that. - if (IsTPUGraphDef(*optimized_graph)) { - VLOG(2) << "Skipping optimizing funcs for TPU graphs"; - return Status::OK(); - } - // 2. Optimize function library FunctionLibraryDefinition flib(OpRegistry::Global(), optimized_graph->library()); -- cgit v1.2.3 From 624ff13fdf4e54e255d23971ef2beec3c48c3bb2 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 11 Sep 2018 09:35:09 -0700 Subject: PR #21826: merge_repeated option is confusing Please approve this CL. It will be submitted automatically, and its GitHub pull request will be marked as merged. Imported from GitHub PR #21826 I have the same question with [WIP: Remove invalid merge_repeated option from CTC beam decoder](#15586), it's a pity I haven't seen any changes for so long. Generally I will use the default value of merge_repeated: True, but I found it's confusing, that is, I got the wrong anser, it has been explained well in [WIP: Remove invalid merge_repeated option from CTC beam decoder](#15586). And the top path in ctc_beam_search_decoder is similar with sequence in ctc_greedy_decoder, this is confusing, I have found the project [CRNN](https://github.com/Belval/CRNN/blob/master/CRNN/crnn.py)(line 167) and some other projects use the wrong settings. So I think it's better to give a explain here, this has no conflict with the existing code. Copybara import of the project: - e357bcea4b10d5e5cbc3a4ba59385e832401ba8d merge_repeated option is confusing by Dao Zhang - a0467d35cc19293fa16918658a7f98e18ead7f87 Merge e357bcea4b10d5e5cbc3a4ba59385e832401ba8d into 34ef4... by Dao Zhang(??) PiperOrigin-RevId: 212466200 --- tensorflow/python/ops/ctc_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 908e793902..32d455bdad 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -242,11 +242,11 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, - only the first of these is emitted. That is, when the top path - is `A B B B B`, the return value is: + only the first of these is emitted. That is, when the sequence is + `A B B * B * B` (where '*' is the blank label), the return value is: * `A B` if `merge_repeated = True`. - * `A B B B B` if `merge_repeated = False`. + * `A B B B` if `merge_repeated = False`. Args: inputs: 3-D `float` `Tensor`, size -- cgit v1.2.3 From 7cfed353d9eb8344d20cd65ecfb5740cff48304c Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Tue, 11 Sep 2018 09:45:29 -0700 Subject: disable tsan for failing test PiperOrigin-RevId: 212467900 --- tensorflow/contrib/saved_model/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'tensorflow') diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index b897224c6d..f687b56ea3 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -123,6 +123,7 @@ py_test( size = "medium", srcs = ["python/saved_model/keras_saved_model_test.py"], srcs_version = "PY2AND3", + tags = ["notsan"], deps = [ ":keras_saved_model", "//tensorflow/python:client_testlib", -- cgit v1.2.3 From b566170b29c41b0da4c23bf5ce0fdfe19b8bcb14 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 11 Sep 2018 10:35:30 -0700 Subject: Block tsan for keras_test PiperOrigin-RevId: 212477605 --- tensorflow/python/estimator/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'tensorflow') diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 4001ffdd6b..bfcc019dd5 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -685,6 +685,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_windows", + "notsan", # b/67510291 ], deps = [ ":keras", -- cgit v1.2.3 From 36e1a5ea5ba2dd5eaa7f4cfc84a61f8ce3ea20e1 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 11 Sep 2018 10:41:44 -0700 Subject: [TF] Variant improvements. 1. Change Variant Decode to accept VariantTensorData (non-ref). This should allow some optimization in the future. In the meantime it means removing the variant.h include from tensor.h, since variant_encode_decode.h now relies on tensor.h and variant.h now relies on that. It also means we found a bunch of places where tensor.proto.h, variant.h, and mutex.h were being imported through tensor.h (along with a bunch of other crap); so now we directly import them in order to compile. 2. Move Variant registry to use TypeIndex instead of a TypeName string; this should speed up registry lookups. PiperOrigin-RevId: 212478896 --- tensorflow/c/c_api.cc | 1 + tensorflow/c/c_api_experimental.cc | 1 + tensorflow/c/c_api_function.cc | 1 + .../contrib/lite/toco/import_tensorflow_test.cc | 1 + tensorflow/contrib/nccl/BUILD | 24 +-- tensorflow/contrib/nccl/kernels/nccl_rewrite.cc | 1 + tensorflow/core/BUILD | 1 + tensorflow/core/common_runtime/copy_tensor.cc | 2 +- tensorflow/core/common_runtime/rendezvous_util.cc | 1 + .../common_runtime/single_threaded_cpu_device.h | 1 + tensorflow/core/framework/allocator.cc | 9 + tensorflow/core/framework/allocator.h | 11 +- tensorflow/core/framework/allocator_registry.h | 1 + tensorflow/core/framework/attr_value_util_test.cc | 1 + tensorflow/core/framework/tensor.h | 3 +- tensorflow/core/framework/tensor_test.cc | 1 + tensorflow/core/framework/tensor_util.h | 1 + tensorflow/core/framework/types.h | 3 +- tensorflow/core/framework/variant.cc | 25 +-- tensorflow/core/framework/variant.h | 60 ++---- tensorflow/core/framework/variant_encode_decode.h | 32 +-- tensorflow/core/framework/variant_op_copy_test.cc | 6 +- tensorflow/core/framework/variant_op_registry.cc | 85 ++++---- tensorflow/core/framework/variant_op_registry.h | 216 +++++++++++---------- .../core/framework/variant_op_registry_test.cc | 96 ++++----- tensorflow/core/framework/variant_tensor_data.cc | 22 ++- tensorflow/core/framework/variant_tensor_data.h | 10 +- tensorflow/core/framework/variant_test.cc | 15 +- tensorflow/core/kernels/data/iterator_ops.cc | 4 +- tensorflow/core/kernels/data/optional_ops.cc | 7 +- tensorflow/core/kernels/gather_functor.h | 1 + tensorflow/core/kernels/list_kernels.cc | 12 +- tensorflow/core/kernels/list_kernels.cu.cc | 3 +- tensorflow/core/kernels/shape_op_test.cc | 10 +- tensorflow/core/platform/abi.cc | 4 +- tensorflow/core/platform/abi.h | 3 +- 36 files changed, 344 insertions(+), 331 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 173bbea596..79811ceae5 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -39,6 +39,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/partial_tensor_shape.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" // NOLINT #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index c046bd66cd..c195c9e01c 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/c/c_api_internal.h" #include "tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/lib/strings/strcat.h" diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc index a2c5a42c11..f68f8a3e90 100644 --- a/tensorflow/c/c_api_function.cc +++ b/tensorflow/c/c_api_function.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/strings/base64.h" diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc index 90e6f698ef..a00e136dd6 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/attr_value_util.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/lib/core/status.h" diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index 62996d1fd8..225025e995 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -25,15 +25,17 @@ tf_custom_op_library( name = "python/ops/_nccl_ops.so", srcs = [ "ops/nccl_ops.cc", - ], + ] + if_cuda(["kernels/nccl_rewrite.cc"]), gpu_srcs = if_not_windows_cuda([ "kernels/nccl_manager.cc", "kernels/nccl_manager.h", "kernels/nccl_ops.cc", ]), - deps = if_cuda([ + deps = [] + if_cuda([ "@local_config_nccl//:nccl", "//tensorflow/core:gpu_headers_lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:protos_all_proto_text", ]), ) @@ -57,32 +59,30 @@ tf_cuda_cc_test( "notap", ], deps = - [ + if_cuda([ + "@local_config_nccl//:nccl", "//tensorflow/core:cuda", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "@local_config_nccl//:nccl", - ], + ]), ) tf_kernel_library( name = "nccl_kernels", - srcs = [ + srcs = if_cuda([ "kernels/nccl_manager.cc", "kernels/nccl_manager.h", "kernels/nccl_ops.cc", - "kernels/nccl_rewrite.cc", - ], - deps = [ + ]), + deps = if_cuda([ + "@local_config_nccl//:nccl", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib", - "//tensorflow/core:proto_text", "//tensorflow/core:stream_executor", - "@local_config_nccl//:nccl", - ], + ]), alwayslink = 1, ) diff --git a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc index 4676e937e5..06ff86e6d8 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_rewrite.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/graph/node_builder.h" namespace tensorflow { diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 79ad3b8e54..957aa254e5 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -720,6 +720,7 @@ cc_library( name = "abi", srcs = ["platform/abi.cc"], hdrs = ["platform/abi.h"], + deps = [":platform_base"], ) cc_library( diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc index f8cb854b52..cf3d1f0b79 100644 --- a/tensorflow/core/common_runtime/copy_tensor.cc +++ b/tensorflow/core/common_runtime/copy_tensor.cc @@ -358,7 +358,7 @@ static Status WrappedTensorDeviceCopy( #define REGISTER_WRAPPED_TENSOR_COPY(DIRECTION) \ INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \ - Tensor, DIRECTION, "tensorflow::Tensor", WrappedTensorDeviceCopy) + Tensor, DIRECTION, WrappedTensorDeviceCopy) REGISTER_WRAPPED_TENSOR_COPY(VariantDeviceCopyDirection::HOST_TO_DEVICE); REGISTER_WRAPPED_TENSOR_COPY(VariantDeviceCopyDirection::DEVICE_TO_HOST); diff --git a/tensorflow/core/common_runtime/rendezvous_util.cc b/tensorflow/core/common_runtime/rendezvous_util.cc index 1e3fed0d6f..43ca3f1e3e 100644 --- a/tensorflow/core/common_runtime/rendezvous_util.cc +++ b/tensorflow/core/common_runtime/rendezvous_util.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/core/common_runtime/rendezvous_util.h" +#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/util/reffed_status_callback.h" diff --git a/tensorflow/core/common_runtime/single_threaded_cpu_device.h b/tensorflow/core/common_runtime/single_threaded_cpu_device.h index 04d5af9087..22650b0d83 100644 --- a/tensorflow/core/common_runtime/single_threaded_cpu_device.h +++ b/tensorflow/core/common_runtime/single_threaded_cpu_device.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/eigen_thread_pool.h" #include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/lib/core/threadpool.h" namespace tensorflow { diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index 888ed0c57b..2a7ee16a16 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/framework/log_memory.h" #include "tensorflow/core/framework/tracking_allocator.h" +#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/mem.h" #include "tensorflow/core/platform/mutex.h" @@ -56,6 +57,14 @@ void RunResourceDtor(ResourceHandle* p, size_t n) { for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle(); } +void Allocator::RunVariantCtor(Variant* p, size_t n) { + for (size_t i = 0; i < n; ++p, ++i) new (p) Variant(); +} + +void Allocator::RunVariantDtor(Variant* p, size_t n) { + for (size_t i = 0; i < n; ++p, ++i) p->~Variant(); +} + // If true, cpu allocator collects more stats. static bool cpu_allocator_collect_stats = false; // If true, cpu allocator collects full stats. diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h index 774b1fe137..ded120b704 100644 --- a/tensorflow/core/framework/allocator.h +++ b/tensorflow/core/framework/allocator.h @@ -23,12 +23,13 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/framework/type_traits.h" -#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { +class Variant; + // Attributes for a single allocation call. Different calls to the same // allocator could potentially have different allocation attributes. struct AllocationAttributes { @@ -228,13 +229,9 @@ class Allocator { for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle(); } - virtual void RunVariantCtor(Variant* p, size_t n) { - for (size_t i = 0; i < n; ++p, ++i) new (p) Variant(); - } + virtual void RunVariantCtor(Variant* p, size_t n); - virtual void RunVariantDtor(Variant* p, size_t n) { - for (size_t i = 0; i < n; ++p, ++i) p->~Variant(); - } + virtual void RunVariantDtor(Variant* p, size_t n); // TODO(jeff): Maybe provide some interface to give info about // current allocation state (total number of bytes available for diff --git a/tensorflow/core/framework/allocator_registry.h b/tensorflow/core/framework/allocator_registry.h index 24f282ce84..e907c52ba9 100644 --- a/tensorflow/core/framework/allocator_registry.h +++ b/tensorflow/core/framework/allocator_registry.h @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/numa.h" namespace tensorflow { diff --git a/tensorflow/core/framework/attr_value_util_test.cc b/tensorflow/core/framework/attr_value_util_test.cc index 1a3994736c..4ffd732f8e 100644 --- a/tensorflow/core/framework/attr_value_util_test.cc +++ b/tensorflow/core/framework/attr_value_util_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include #include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/test.h" diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h index 1b19ab5da3..696fd277cd 100644 --- a/tensorflow/core/framework/tensor.h +++ b/tensorflow/core/framework/tensor.h @@ -37,11 +37,12 @@ namespace tensorflow { class AllocationDescription; class Allocator; class OpKernelContext; +class Tensor; class TensorBuffer; class TensorCApi; class TensorDescription; class TensorProto; -class VariantTensorData; + namespace batch_util { Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index); Status MaybeMoveSliceToElement(Tensor* parent, Tensor* element, int64 index); diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index 84a373c196..9a78cdc91e 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_encode_decode.h" #include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/math/math_util.h" diff --git a/tensorflow/core/framework/tensor_util.h b/tensorflow/core/framework/tensor_util.h index 4bda8f9eb8..a7cf600bab 100644 --- a/tensorflow/core/framework/tensor_util.h +++ b/tensorflow/core/framework/tensor_util.h @@ -17,6 +17,7 @@ limitations under the License. #define TENSORFLOW_CORE_FRAMEWORK_TENSOR_UTIL_H_ #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include diff --git a/tensorflow/core/framework/types.h b/tensorflow/core/framework/types.h index 15b1add2c1..2e96b05787 100644 --- a/tensorflow/core/framework/types.h +++ b/tensorflow/core/framework/types.h @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/resource_handle.h" #include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -39,6 +38,8 @@ limitations under the License. namespace tensorflow { +class Variant; + // MemoryType is used to describe whether input or output Tensors of // an OpKernel should reside in "Host memory" (e.g., CPU memory) or // "Device" Memory (CPU memory for CPU devices, GPU memory for GPU diff --git a/tensorflow/core/framework/variant.cc b/tensorflow/core/framework/variant.cc index 5a507804b0..d43e3c72ec 100644 --- a/tensorflow/core/framework/variant.cc +++ b/tensorflow/core/framework/variant.cc @@ -23,11 +23,11 @@ limitations under the License. namespace tensorflow { -bool Variant::TryDecode(Variant* out) const { - const VariantTensorDataProto* p = get(); - if (p == nullptr) return false; - VariantTensorData data(*p); - return out->Decode(data); +bool Variant::Decode(VariantTensorData data) { + if (!is_empty()) { + return value_->Decode(std::move(data)); + } + return true; } template <> @@ -54,13 +54,12 @@ string TypeNameVariant(const VariantTensorDataProto& value) { template <> void EncodeVariant(const VariantTensorDataProto& value, VariantTensorData* data) { - data->FromProto(value); + data->FromConstProto(value); } template <> -bool DecodeVariant(const VariantTensorData& data, - VariantTensorDataProto* value) { - data.ToProto(value); +bool DecodeVariant(VariantTensorData* data, VariantTensorDataProto* value) { + data->ToProto(value); return true; } @@ -70,8 +69,8 @@ void EncodeVariant(const VariantTensorDataProto& value, string* buf) { } template <> -bool DecodeVariant(const string& buf, VariantTensorDataProto* value) { - return value->ParseFromString(buf); +bool DecodeVariant(string* buf, VariantTensorDataProto* value) { + return value->ParseFromString(*buf); } void EncodeVariantList(const Variant* variant_array, int64 n, @@ -93,8 +92,10 @@ bool DecodeVariantList(std::unique_ptr d, if (variant_array[i].is_empty()) { variant_array[i] = VariantTensorDataProto(); } + // TODO(ebrevdo): Replace with StringPiece? Any way to make this a + // zero-copy operation that keeps a reference to the data in d? string str(d->Data(sizes[i]), sizes[i]); - if (!variant_array[i].Decode(str)) return false; + if (!variant_array[i].Decode(std::move(str))) return false; if (!DecodeUnaryVariant(&variant_array[i])) { LOG(ERROR) << "Could not decode variant with type_name: \"" << variant_array[i].TypeName() diff --git a/tensorflow/core/framework/variant.h b/tensorflow/core/framework/variant.h index 52732801a0..10eabbc85f 100644 --- a/tensorflow/core/framework/variant.h +++ b/tensorflow/core/framework/variant.h @@ -23,7 +23,6 @@ limitations under the License. #include #include -#include "tensorflow/core/framework/tensor.pb.h" // TODO(b/62899350): Remove #include "tensorflow/core/framework/type_index.h" #include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/core/status.h" @@ -38,17 +37,19 @@ string TypeNameVariant(const T& value); template string DebugStringVariant(const T& value); +// Allows for specializations of Variant Decoding. `data` may be modified in +// the process of decoding to `value`. template -void EncodeVariant(const T& value, VariantTensorData* data); +bool DecodeVariant(VariantTensorData* data, T* value); template -bool DecodeVariant(const VariantTensorData& data, T* value); +bool DecodeVariant(string* buf, T* value); template -void EncodeVariant(const T& value, string* buf); +void EncodeVariant(const T& value, VariantTensorData* data); template -bool DecodeVariant(const string& buf, T* value); +void EncodeVariant(const T& value, string* buf); // This is an implementation of a type-erased container that can store an // object of any type. The implementation is very similar to std::any, but has @@ -67,7 +68,7 @@ bool DecodeVariant(const string& buf, T* value); // // string TypeName() const; // void Encode(VariantTensorData* data) const; -// void Decode(const VariantTensorData& data); +// void Decode(VariantTensorData data); // // Simple POD types can elide the Encode/Decode functions, they are provided by // helper methods. @@ -121,7 +122,7 @@ bool DecodeVariant(const string& buf, T* value); // x.Encode(&serialized_f); // // Variant y = Foo(); // default constructed Foo. -// y.Decode(&serialized_f); +// y.Decode(std::move(serialized_f)); // EXPECT_EQ(*x.get(), *y.get()); // // @@ -145,10 +146,6 @@ bool DecodeVariant(const string& buf, T* value); // EXPECT_EQ(x.TypeName(), y_type_unknown.TypeName()); // Looks like Foo. // EXPECT_EQ(MakeTypeIndex(), // y_type_unknown.TypeId()); -// // Decode and get y_type_unknown; compare to value in x. -// Foo f_decoded; -// EXPECT_TRUE(x.MaybeDecodeAndCopy(&f_decoded)); -// EXPECT_EQ(f_decoded, f); // class Variant { public: @@ -241,12 +238,7 @@ class Variant { } // Deserialize `data` and update the stored object. - bool Decode(const VariantTensorData& data) { - if (!is_empty()) { - return value_->Decode(data); - } - return true; - } + bool Decode(VariantTensorData data); // Helper methods to directly serialize/deserialize from strings. void Encode(string* buf) const { @@ -254,31 +246,13 @@ class Variant { value_->Encode(buf); } } - bool Decode(const string& buf) { + bool Decode(string buf) { if (!is_empty()) { - return value_->Decode(buf); + return value_->Decode(std::move(buf)); } return true; } - template - bool MaybeDecodeAndCopy(T* out) const { - const T* ret = get(); - if (ret != nullptr) { - *out = std::move(*ret); - return true; - }; - Variant decoded = T(); - if (!TryDecode(&decoded)) return false; - T* decoded_ret = decoded.get(); - CHECK_NOTNULL(decoded_ret); - *out = std::move(*decoded_ret); - return true; - } - - private: - bool TryDecode(Variant* out) const; - private: struct in_place_t {}; static constexpr in_place_t in_place{}; @@ -292,9 +266,9 @@ class Variant { virtual string TypeName() const = 0; virtual string DebugString() const = 0; virtual void Encode(VariantTensorData* data) const = 0; - virtual bool Decode(const VariantTensorData& data) = 0; + virtual bool Decode(VariantTensorData data) = 0; virtual void Encode(string* buf) const = 0; - virtual bool Decode(const string& data) = 0; + virtual bool Decode(string data) = 0; }; template @@ -325,15 +299,13 @@ class Variant { EncodeVariant(value, data); } - bool Decode(const VariantTensorData& data) override { - return DecodeVariant(data, &value); + bool Decode(VariantTensorData data) override { + return DecodeVariant(&data, &value); } void Encode(string* buf) const override { EncodeVariant(value, buf); } - bool Decode(const string& buf) override { - return DecodeVariant(buf, &value); - } + bool Decode(string buf) override { return DecodeVariant(&buf, &value); } T value; }; diff --git a/tensorflow/core/framework/variant_encode_decode.h b/tensorflow/core/framework/variant_encode_decode.h index f155aa4892..5e08e5a7a6 100644 --- a/tensorflow/core/framework/variant_encode_decode.h +++ b/tensorflow/core/framework/variant_encode_decode.h @@ -22,6 +22,7 @@ limitations under the License. #include #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/type_index.h" #include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/abi.h" @@ -81,7 +82,7 @@ void EncodeVariantImpl(const T& value, // Specialization for POD type template -bool DecodeVariantImpl(const VariantTensorData& data, +bool DecodeVariantImpl(VariantTensorData data, TypeResolver, T* value) { @@ -90,7 +91,7 @@ bool DecodeVariantImpl(const VariantTensorData& data, // Specialization for tensorflow::Tensor template -bool DecodeVariantImpl(const VariantTensorData& data, +bool DecodeVariantImpl(VariantTensorData data, TypeResolver, T* value) { @@ -100,7 +101,7 @@ bool DecodeVariantImpl(const VariantTensorData& data, // Specialization for protobuf template -bool DecodeVariantImpl(const VariantTensorData& data, +bool DecodeVariantImpl(VariantTensorData data, TypeResolver, T* value) { @@ -111,11 +112,11 @@ bool DecodeVariantImpl(const VariantTensorData& data, // Specialization for other types template -bool DecodeVariantImpl(const VariantTensorData& data, +bool DecodeVariantImpl(VariantTensorData data, TypeResolver, T* value) { - return value->Decode(data); + return value->Decode(std::move(data)); } template @@ -224,8 +225,8 @@ void EncodeVariant(const T& value, VariantTensorData* data) { } template -bool DecodeVariant(const VariantTensorData& data, T* value) { - return DecodeVariantImpl(data, TypeResolver(), value); +bool DecodeVariant(VariantTensorData* data, T* value) { + return DecodeVariantImpl(std::move(*data), TypeResolver(), value); } template @@ -238,26 +239,31 @@ void EncodeVariant(const T& value, string* buf) { } template -bool DecodeVariant(const string& buf, T* value) { +bool DecodeVariant(string* buf, T* value) { VariantTensorData data; - if (!data.ParseFromString(buf)) return false; - if (!DecodeVariantImpl(data, TypeResolver(), value)) return false; + if (!data.ParseFromString(*buf)) return false; + if (!DecodeVariantImpl(std::move(data), TypeResolver(), value)) { + return false; + } return true; } // Specializations for VariantTensorDataProto template <> string TypeNameVariant(const VariantTensorDataProto& value); + template <> void EncodeVariant(const VariantTensorDataProto& value, VariantTensorData* data); + template <> -bool DecodeVariant(const VariantTensorData& data, - VariantTensorDataProto* value); +bool DecodeVariant(VariantTensorData* data, VariantTensorDataProto* value); + template <> void EncodeVariant(const VariantTensorDataProto& value, string* buf); + template <> -bool DecodeVariant(const string& buf, VariantTensorDataProto* value); +bool DecodeVariant(string* buf, VariantTensorDataProto* value); // Encodes an array of Variant objects in to the given StringListEncoder. // `variant_array` is assumed to point to an array of `n` Variant objects. diff --git a/tensorflow/core/framework/variant_op_copy_test.cc b/tensorflow/core/framework/variant_op_copy_test.cc index 60fa7bd559..daa744e877 100644 --- a/tensorflow/core/framework/variant_op_copy_test.cc +++ b/tensorflow/core/framework/variant_op_copy_test.cc @@ -90,15 +90,15 @@ REGISTER_UNARY_VARIANT_DECODE_FUNCTION(StoredTensorValue, "StoredTensorValue"); INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( StoredTensorValue, VariantDeviceCopyDirection::HOST_TO_DEVICE, - "StoredTensorValue", StoredTensorValue::CopyCPUToGPU); + StoredTensorValue::CopyCPUToGPU); INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( StoredTensorValue, VariantDeviceCopyDirection::DEVICE_TO_HOST, - "StoredTensorValue", StoredTensorValue::CopyGPUToCPU); + StoredTensorValue::CopyGPUToCPU); INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( StoredTensorValue, VariantDeviceCopyDirection::DEVICE_TO_DEVICE, - "StoredTensorValue", StoredTensorValue::CopyGPUToGPU); + StoredTensorValue::CopyGPUToGPU); REGISTER_OP("CreateTestVariant") .Input("input: T") diff --git a/tensorflow/core/framework/variant_op_registry.cc b/tensorflow/core/framework/variant_op_registry.cc index ee07db1aee..ef5b240aea 100644 --- a/tensorflow/core/framework/variant_op_registry.cc +++ b/tensorflow/core/framework/variant_op_registry.cc @@ -38,21 +38,19 @@ UnaryVariantOpRegistry* UnaryVariantOpRegistry::Global() { } UnaryVariantOpRegistry::VariantShapeFn* UnaryVariantOpRegistry::GetShapeFn( - StringPiece type_name) { - auto found = shape_fns.find(type_name); + const TypeIndex& type_index) { + auto found = shape_fns.find(type_index); if (found == shape_fns.end()) return nullptr; return &found->second; } -void UnaryVariantOpRegistry::RegisterShapeFn(const string& type_name, +void UnaryVariantOpRegistry::RegisterShapeFn(const TypeIndex& type_index, const VariantShapeFn& shape_fn) { - CHECK(!type_name.empty()) << "Need a valid name for UnaryVariantShape"; - VariantShapeFn* existing = GetShapeFn(type_name); + VariantShapeFn* existing = GetShapeFn(type_index); CHECK_EQ(existing, nullptr) - << "Unary VariantShapeFn for type_name: " << type_name - << " already registered"; - shape_fns.insert(std::pair( - GetPersistentStringPiece(type_name), shape_fn)); + << "Unary VariantShapeFn for type_index: " + << port::MaybeAbiDemangle(type_index.name()) << " already registered"; + shape_fns.insert(std::pair(type_index, shape_fn)); } Status GetUnaryVariantShape(const Tensor& variant_tensor, TensorShape* shape) { @@ -60,11 +58,11 @@ Status GetUnaryVariantShape(const Tensor& variant_tensor, TensorShape* shape) { CHECK_EQ(variant_tensor.dims(), 0); const Variant& v = variant_tensor.scalar()(); UnaryVariantOpRegistry::VariantShapeFn* shape_fn = - UnaryVariantOpRegistry::Global()->GetShapeFn(v.TypeName()); + UnaryVariantOpRegistry::Global()->GetShapeFn(v.TypeId()); if (shape_fn == nullptr) { return errors::Internal( - "No unary variant shape function found for Variant type_name: ", - v.TypeName()); + "No unary variant shape function found for Variant type_index: ", + port::MaybeAbiDemangle(v.TypeId().name())); } return (*shape_fn)(v, shape); } @@ -79,7 +77,7 @@ Status ScalarShape(const T&, TensorShape* shape) { } // namespace #define REGISTER_VARIANT_SHAPE_TYPE(T) \ - REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(T, TF_STR(T), ScalarShape); + REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(T, ScalarShape); // No encode/shape registered for std::complex<> and Eigen::half // objects yet. @@ -143,25 +141,24 @@ REGISTER_VARIANT_DECODE_TYPE(double); UnaryVariantOpRegistry::AsyncVariantDeviceCopyFn* UnaryVariantOpRegistry::GetDeviceCopyFn( - const VariantDeviceCopyDirection direction, StringPiece type_name) { - auto found = device_copy_fns.find(std::make_pair(direction, type_name)); + const VariantDeviceCopyDirection direction, const TypeIndex& type_index) { + auto found = device_copy_fns.find(std::make_pair(direction, type_index)); if (found == device_copy_fns.end()) return nullptr; return &found->second; } void UnaryVariantOpRegistry::RegisterDeviceCopyFn( - const VariantDeviceCopyDirection direction, const string& type_name, + const VariantDeviceCopyDirection direction, const TypeIndex& type_index, const AsyncVariantDeviceCopyFn& device_copy_fn) { - CHECK(!type_name.empty()) << "Need a valid name for UnaryVariantDeviceCopy"; - AsyncVariantDeviceCopyFn* existing = GetDeviceCopyFn(direction, type_name); + AsyncVariantDeviceCopyFn* existing = GetDeviceCopyFn(direction, type_index); CHECK_EQ(existing, nullptr) << "UnaryVariantDeviceCopy for direction: " << direction - << " and type_name: " << type_name << " already registered"; + << " and type_index: " << port::MaybeAbiDemangle(type_index.name()) + << " already registered"; device_copy_fns.insert( - std::pair, - AsyncVariantDeviceCopyFn>( - std::make_pair(direction, GetPersistentStringPiece(type_name)), - device_copy_fn)); + std::pair, + AsyncVariantDeviceCopyFn>(std::make_pair(direction, type_index), + device_copy_fn)); } Status VariantDeviceCopy( @@ -170,35 +167,34 @@ Status VariantDeviceCopy( const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy_fn) { UnaryVariantOpRegistry::AsyncVariantDeviceCopyFn* device_copy_fn = UnaryVariantOpRegistry::Global()->GetDeviceCopyFn(direction, - from.TypeName()); + from.TypeId()); if (device_copy_fn == nullptr) { return errors::Internal( "No unary variant device copy function found for direction: ", - direction, " and Variant type_name: ", from.TypeName()); + direction, " and Variant type_index: ", + port::MaybeAbiDemangle(from.TypeId().name())); } return (*device_copy_fn)(from, to, copy_fn); } // Special casing UnaryOpFn per op and per device. UnaryVariantOpRegistry::VariantUnaryOpFn* UnaryVariantOpRegistry::GetUnaryOpFn( - VariantUnaryOp op, StringPiece device, StringPiece type_name) { - auto found = unary_op_fns.find({op, device, type_name}); + VariantUnaryOp op, StringPiece device, const TypeIndex& type_index) { + auto found = unary_op_fns.find({op, device, type_index}); if (found == unary_op_fns.end()) return nullptr; return &found->second; } void UnaryVariantOpRegistry::RegisterUnaryOpFn( - VariantUnaryOp op, const string& device, const string& type_name, + VariantUnaryOp op, const string& device, const TypeIndex& type_index, const VariantUnaryOpFn& unary_op_fn) { - CHECK(!type_name.empty()) << "Need a valid name for UnaryVariantUnaryOp"; - VariantUnaryOpFn* existing = GetUnaryOpFn(op, device, type_name); + VariantUnaryOpFn* existing = GetUnaryOpFn(op, device, type_index); CHECK_EQ(existing, nullptr) - << "Unary VariantUnaryOpFn for type_name: " << type_name + << "Unary VariantUnaryOpFn for type_index: " + << port::MaybeAbiDemangle(type_index.name()) << " already registered for device type: " << device; unary_op_fns.insert(std::pair, VariantUnaryOpFn>( - {op, GetPersistentStringPiece(device), - GetPersistentStringPiece(type_name)}, - unary_op_fn)); + {op, GetPersistentStringPiece(device), type_index}, unary_op_fn)); } namespace { @@ -212,7 +208,7 @@ Status ZerosLikeVariantPrimitiveType(OpKernelContext* ctx, const T& t, #define REGISTER_VARIANT_ZEROS_LIKE_TYPE(T) \ REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP, \ - DEVICE_CPU, T, TF_STR(T), \ + DEVICE_CPU, T, \ ZerosLikeVariantPrimitiveType); // No zeros_like registered for std::complex<> or Eigen::half objects yet. @@ -226,24 +222,22 @@ REGISTER_VARIANT_ZEROS_LIKE_TYPE(bool); // Special casing BinaryOpFn per op and per device. UnaryVariantOpRegistry::VariantBinaryOpFn* UnaryVariantOpRegistry::GetBinaryOpFn(VariantBinaryOp op, StringPiece device, - StringPiece type_name) { - auto found = binary_op_fns.find({op, device, type_name}); + const TypeIndex& type_index) { + auto found = binary_op_fns.find({op, device, type_index}); if (found == binary_op_fns.end()) return nullptr; return &found->second; } void UnaryVariantOpRegistry::RegisterBinaryOpFn( - VariantBinaryOp op, const string& device, const string& type_name, + VariantBinaryOp op, const string& device, const TypeIndex& type_index, const VariantBinaryOpFn& add_fn) { - CHECK(!type_name.empty()) << "Need a valid name for UnaryVariantBinaryOp"; - VariantBinaryOpFn* existing = GetBinaryOpFn(op, device, type_name); + VariantBinaryOpFn* existing = GetBinaryOpFn(op, device, type_index); CHECK_EQ(existing, nullptr) - << "Unary VariantBinaryOpFn for type_name: " << type_name + << "Unary VariantBinaryOpFn for type_index: " + << port::MaybeAbiDemangle(type_index.name()) << " already registered for device type: " << device; binary_op_fns.insert(std::pair, VariantBinaryOpFn>( - {op, GetPersistentStringPiece(device), - GetPersistentStringPiece(type_name)}, - add_fn)); + {op, GetPersistentStringPiece(device), type_index}, add_fn)); } namespace { @@ -257,8 +251,7 @@ Status AddVariantPrimitiveType(OpKernelContext* ctx, const T& a, const T& b, #define REGISTER_VARIANT_ADD_TYPE(T) \ REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_CPU, \ - T, TF_STR(T), \ - AddVariantPrimitiveType); + T, AddVariantPrimitiveType); // No add registered for std::complex<> or Eigen::half objects yet. REGISTER_VARIANT_ADD_TYPE(int); diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index e6a2665a56..7eb37e859f 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -22,10 +22,14 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/type_index.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/abi.h" namespace tensorflow { @@ -90,10 +94,11 @@ class UnaryVariantOpRegistry { AsyncVariantDeviceCopyFn; // Add a shape lookup function to the registry. - void RegisterShapeFn(const string& type_name, const VariantShapeFn& shape_fn); + void RegisterShapeFn(const TypeIndex& type_index, + const VariantShapeFn& shape_fn); - // Returns nullptr if no shape function was found for the given TypeName. - VariantShapeFn* GetShapeFn(StringPiece type_name); + // Returns nullptr if no shape function was found for the given TypeIndex. + VariantShapeFn* GetShapeFn(const TypeIndex& type_index); // Add a decode function to the registry. void RegisterDecodeFn(const string& type_name, @@ -104,33 +109,33 @@ class UnaryVariantOpRegistry { // Add a copy-to-GPU function to the registry. void RegisterDeviceCopyFn(const VariantDeviceCopyDirection direction, - const string& type_name, + const TypeIndex& type_index, const AsyncVariantDeviceCopyFn& device_copy_fn); // Returns nullptr if no copy function was found for the given // TypeName and direction. AsyncVariantDeviceCopyFn* GetDeviceCopyFn( - const VariantDeviceCopyDirection direction, StringPiece type_name); + const VariantDeviceCopyDirection direction, const TypeIndex& type_index); // Add a unary op function to the registry. void RegisterUnaryOpFn(VariantUnaryOp op, const string& device, - const string& type_name, + const TypeIndex& type_index, const VariantUnaryOpFn& unary_op_fn); // Returns nullptr if no unary op function was found for the given // op, device, and TypeName. VariantUnaryOpFn* GetUnaryOpFn(VariantUnaryOp op, StringPiece device, - StringPiece type_name); + const TypeIndex& type_index); // Add a binary op function to the registry. void RegisterBinaryOpFn(VariantBinaryOp op, const string& device, - const string& type_name, + const TypeIndex& type_index, const VariantBinaryOpFn& add_fn); // Returns nullptr if no binary op function was found for the given // op, device and TypeName. VariantBinaryOpFn* GetBinaryOpFn(VariantBinaryOp op, StringPiece device, - StringPiece type_name); + const TypeIndex& type_index); // Get a pointer to a global UnaryVariantOpRegistry object static UnaryVariantOpRegistry* Global(); @@ -145,24 +150,26 @@ class UnaryVariantOpRegistry { static std::unordered_set* PersistentStringStorage(); private: - std::unordered_map shape_fns; - std::unordered_map - decode_fns; + struct TypeIndexHash { + std::size_t operator()(const TypeIndex& x) const { return x.hash_code(); } + }; + + gtl::FlatMap shape_fns; + gtl::FlatMap decode_fns; // Map std::pair to function. struct PairHash { template - std::size_t operator()(const std::pair& x) const { + std::size_t operator()(const std::pair& x) const { // The hash of an enum is just its value as a std::size_t. std::size_t ret = static_cast(std::get<0>(x)); - ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x))); + ret = Hash64Combine(ret, std::get<1>(x).hash_code()); return ret; } - StringPieceHasher sp_hasher_; }; - std::unordered_map, - AsyncVariantDeviceCopyFn, PairHash> + gtl::FlatMap, + AsyncVariantDeviceCopyFn, PairHash> device_copy_fns; // Map std::tuple to function. @@ -172,10 +179,11 @@ class UnaryVariantOpRegistry { // and references therein template struct FuncTuple { - FuncTuple(const Op& op, const StringPiece& dev, const StringPiece& tname) - : op_type_(op), device_(dev), typename_(tname){}; + FuncTuple(const Op& op, const StringPiece& dev, const TypeIndex& type_index) + : op_type_(op), device_(dev), type_index_(type_index) {} Op op_type_; - StringPiece device_, typename_; + StringPiece device_; + TypeIndex type_index_; }; // friend declaration for operator== // needed for clang @@ -184,11 +192,11 @@ class UnaryVariantOpRegistry { struct TupleHash { template std::size_t operator()( - const std::tuple& x) const { + const std::tuple& x) const { // The hash of an enum is just its value as a std::size_t. std::size_t ret = static_cast(std::get<0>(x)); ret = Hash64Combine(ret, sp_hasher_(std::get<1>(x))); - ret = Hash64Combine(ret, sp_hasher_(std::get<2>(x))); + ret = Hash64Combine(ret, std::get<2>(x).hash_code()); return ret; } @@ -197,14 +205,14 @@ class UnaryVariantOpRegistry { // The hash of an enum is just its value as a std::size_t. std::size_t ret = static_cast(x.op_type_); ret = Hash64Combine(ret, sp_hasher_(x.device_)); - ret = Hash64Combine(ret, sp_hasher_(x.typename_)); + ret = Hash64Combine(ret, x.type_index_.hash_code()); return ret; } StringPieceHasher sp_hasher_; }; - std::unordered_map, VariantUnaryOpFn, TupleHash> + gtl::FlatMap, VariantUnaryOpFn, TupleHash> unary_op_fns; - std::unordered_map, VariantBinaryOpFn, TupleHash> + gtl::FlatMap, VariantBinaryOpFn, TupleHash> binary_op_fns; // Find or insert a string into a persistent string storage @@ -225,7 +233,7 @@ template inline bool operator==(const UnaryVariantOpRegistry::FuncTuple& lhs, const UnaryVariantOpRegistry::FuncTuple& rhs) { return (lhs.op_type_ == rhs.op_type_) && (lhs.device_ == rhs.device_) && - (lhs.typename_ == rhs.typename_); + (lhs.type_index_ == rhs.type_index_); } // Gets a TensorShape from a Tensor containing a scalar Variant. // Returns an Internal error if the Variant does not have a registered shape @@ -276,7 +284,7 @@ Status UnaryOpVariant(OpKernelContext* ctx, VariantUnaryOp op, const Variant& v, Variant* v_out) { const string& device = DeviceName::value; UnaryVariantOpRegistry::VariantUnaryOpFn* unary_op_fn = - UnaryVariantOpRegistry::Global()->GetUnaryOpFn(op, device, v.TypeName()); + UnaryVariantOpRegistry::Global()->GetUnaryOpFn(op, device, v.TypeId()); if (unary_op_fn == nullptr) { return errors::Internal( "No unary variant unary_op function found for unary variant op enum: ", @@ -297,15 +305,15 @@ Status UnaryOpVariant(OpKernelContext* ctx, VariantUnaryOp op, const Variant& v, template Status BinaryOpVariants(OpKernelContext* ctx, VariantBinaryOp op, const Variant& a, const Variant& b, Variant* out) { - if (a.TypeName() != b.TypeName()) { + if (a.TypeId() != b.TypeId()) { return errors::Internal( "BianryOpVariants: Variants a and b have different " - "type names: '", + "type ids. Type names: '", a.TypeName(), "' vs. '", b.TypeName(), "'"); } const string& device = DeviceName::value; UnaryVariantOpRegistry::VariantBinaryOpFn* binary_op_fn = - UnaryVariantOpRegistry::Global()->GetBinaryOpFn(op, device, a.TypeName()); + UnaryVariantOpRegistry::Global()->GetBinaryOpFn(op, device, a.TypeId()); if (binary_op_fn == nullptr) { return errors::Internal( "No unary variant binary_op function found for binary variant op " @@ -323,16 +331,18 @@ class UnaryVariantShapeRegistration { public: typedef std::function LocalVariantShapeFn; - UnaryVariantShapeRegistration(const string& type_name, + UnaryVariantShapeRegistration(const TypeIndex& type_index, const LocalVariantShapeFn& shape_fn) { + const string type_index_name = port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterShapeFn( - type_name, - [type_name, shape_fn](const Variant& v, TensorShape* s) -> Status { + type_index, + [type_index_name, shape_fn](const Variant& v, + TensorShape* s) -> Status { const T* t = v.get(); if (t == nullptr) { return errors::Internal( - "VariantShapeFn: Could not access object, type_name: ", - type_name); + "VariantShapeFn: Could not access object, type_index: ", + type_index_name); } return shape_fn(*t, s); }); @@ -355,11 +365,11 @@ class UnaryVariantDecodeRegistration { return false; } Variant decoded = T(); - VariantTensorData data(*t); - if (!decoded.Decode(data)) { + VariantTensorData data(std::move(*t)); + if (!decoded.Decode(std::move(data))) { return false; } - *v = std::move(decoded); + std::swap(decoded, *v); return true; }); } @@ -372,11 +382,12 @@ class UnaryVariantDeviceCopyRegistration { UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn)> LocalVariantDeviceCopyFn; UnaryVariantDeviceCopyRegistration( - const VariantDeviceCopyDirection direction, const string& type_name, + const VariantDeviceCopyDirection direction, const TypeIndex& type_index, const LocalVariantDeviceCopyFn& device_copy_fn) { + const string type_index_name = port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterDeviceCopyFn( - direction, type_name, - [type_name, device_copy_fn]( + direction, type_index, + [type_index_name, device_copy_fn]( const Variant& from, Variant* to, UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn device_copy_tensor_fn) -> Status { @@ -384,8 +395,8 @@ class UnaryVariantDeviceCopyRegistration { *to = T(); if (from.get() == nullptr) { return errors::Internal( - "VariantCopyToGPUFn: Could not access object, type_name: ", - type_name); + "VariantCopyToGPUFn: Could not access object, type_index: ", + type_index_name); } const T& t = *from.get(); T* t_out = to->get(); @@ -401,18 +412,19 @@ class UnaryVariantUnaryOpRegistration { public: UnaryVariantUnaryOpRegistration(VariantUnaryOp op, const string& device, - const string& type_name, + const TypeIndex& type_index, const LocalVariantUnaryOpFn& unary_op_fn) { + const string type_index_name = port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterUnaryOpFn( - op, device, type_name, - [type_name, unary_op_fn](OpKernelContext* ctx, const Variant& v, - Variant* v_out) -> Status { + op, device, type_index, + [type_index_name, unary_op_fn](OpKernelContext* ctx, const Variant& v, + Variant* v_out) -> Status { DCHECK_NE(v_out, nullptr); *v_out = T(); if (v.get() == nullptr) { return errors::Internal( - "VariantUnaryOpFn: Could not access object, type_name: ", - type_name); + "VariantUnaryOpFn: Could not access object, type_index: ", + type_index_name); } const T& t = *v.get(); T* t_out = v_out->get(); @@ -429,23 +441,25 @@ class UnaryVariantBinaryOpRegistration { public: UnaryVariantBinaryOpRegistration(VariantBinaryOp op, const string& device, - const string& type_name, + const TypeIndex& type_index, const LocalVariantBinaryOpFn& binary_op_fn) { + const string type_index_name = port::MaybeAbiDemangle(type_index.name()); UnaryVariantOpRegistry::Global()->RegisterBinaryOpFn( - op, device, type_name, - [type_name, binary_op_fn](OpKernelContext* ctx, const Variant& a, - const Variant& b, Variant* out) -> Status { + op, device, type_index, + [type_index_name, binary_op_fn](OpKernelContext* ctx, const Variant& a, + const Variant& b, + Variant* out) -> Status { DCHECK_NE(out, nullptr); *out = T(); if (a.get() == nullptr) { return errors::Internal( - "VariantBinaryOpFn: Could not access object 'a', type_name: ", - type_name); + "VariantBinaryOpFn: Could not access object 'a', type_index: ", + type_index_name); } if (b.get() == nullptr) { return errors::Internal( - "VariantBinaryOpFn: Could not access object 'b', type_name: ", - type_name); + "VariantBinaryOpFn: Could not access object 'b', type_index: ", + type_index_name); } const T& t_a = *a.get(); const T& t_b = *b.get(); @@ -459,19 +473,19 @@ class UnaryVariantBinaryOpRegistration { // Register a unary shape variant function with the signature: // Status ShapeFn(const T& t, TensorShape* s); -// to Variants having TypeName type_name. -#define REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(T, type_name, shape_function) \ - REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ_HELPER(__COUNTER__, T, type_name, \ - shape_function) +// to Variants having TypeIndex type_index. +#define REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(T, shape_function) \ + REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ_HELPER( \ + __COUNTER__, T, MakeTypeIndex(), shape_function) -#define REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ_HELPER(ctr, T, type_name, \ - shape_function) \ - REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ(ctr, T, type_name, shape_function) +#define REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ_HELPER(ctr, T, type_index, \ + shape_function) \ + REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ(ctr, T, type_index, shape_function) -#define REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ(ctr, T, type_name, \ +#define REGISTER_UNARY_VARIANT_SHAPE_FUNCTION_UNIQ(ctr, T, type_index, \ shape_function) \ static variant_op_registry_fn_registration::UnaryVariantShapeRegistration \ - register_unary_variant_op_shape_registration_fn_##ctr(type_name, \ + register_unary_variant_op_shape_registration_fn_##ctr(type_index, \ shape_function) // Register a unary decode variant function for the given type. @@ -519,63 +533,63 @@ class UnaryVariantBinaryOpRegistration { // ****** NOTE ****** // FOR INTERNAL USE ONLY. IF YOU USE THIS WE MAY BREAK YOUR CODE. // ****** NOTE ****** -#define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \ - T, direction, type_name, device_copy_fn) \ - INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, T, direction, type_name, device_copy_fn) +#define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(T, direction, \ + device_copy_fn) \ + INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ + __COUNTER__, T, direction, MakeTypeIndex(), device_copy_fn) #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \ - ctr, T, direction, type_name, device_copy_fn) \ + ctr, T, direction, type_index, device_copy_fn) \ INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ( \ - ctr, T, direction, type_name, device_copy_fn) + ctr, T, direction, type_index, device_copy_fn) -#define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ( \ - ctr, T, direction, type_name, device_copy_fn) \ - static variant_op_registry_fn_registration:: \ - UnaryVariantDeviceCopyRegistration \ - register_unary_variant_op_device_copy_fn_##ctr(direction, type_name, \ - device_copy_fn) +#define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ( \ + ctr, T, direction, type_index, device_copy_fn) \ + static variant_op_registry_fn_registration:: \ + UnaryVariantDeviceCopyRegistration \ + register_unary_variant_op_device_copy_fn_##ctr( \ + direction, type_index, device_copy_fn) // Register a unary unary_op variant function with the signature: // Status UnaryOpFn(OpKernelContext* ctx, const T& t, T* t_out); -// to Variants having TypeName type_name, for device string device, +// to Variants having TypeIndex type_index, for device string device, // for UnaryVariantOp enum op. -#define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(op, device, T, type_name, \ - unary_op_function) \ - REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, op, device, T, type_name, unary_op_function) +#define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(op, device, T, \ + unary_op_function) \ + REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ + __COUNTER__, op, device, T, MakeTypeIndex(), unary_op_function) -#define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ - ctr, op, device, T, type_name, unary_op_function) \ - REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ(ctr, op, device, T, type_name, \ - unary_op_function) +#define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER( \ + ctr, op, device, T, type_index, unary_op_function) \ + REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ(ctr, op, device, T, \ + type_index, unary_op_function) #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ( \ - ctr, op, device, T, type_name, unary_op_function) \ + ctr, op, device, T, type_index, unary_op_function) \ static variant_op_registry_fn_registration::UnaryVariantUnaryOpRegistration< \ T> \ - register_unary_variant_op_decoder_fn_##ctr(op, device, type_name, \ + register_unary_variant_op_decoder_fn_##ctr(op, device, type_index, \ unary_op_function) // Register a binary_op variant function with the signature: // Status BinaryOpFn(OpKernelContext* ctx, const T& a, const T& b, T* out); -// to Variants having TypeName type_name, for device string device, +// to Variants having TypeIndex type_index, for device string device, // for BinaryVariantOp enum OP. -#define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(op, device, T, type_name, \ - binary_op_function) \ - REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ - __COUNTER__, op, device, T, type_name, binary_op_function) +#define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(op, device, T, \ + binary_op_function) \ + REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ + __COUNTER__, op, device, T, MakeTypeIndex(), binary_op_function) #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \ - ctr, op, device, T, type_name, binary_op_function) \ + ctr, op, device, T, type_index, binary_op_function) \ REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ( \ - ctr, op, device, T, type_name, binary_op_function) + ctr, op, device, T, type_index, binary_op_function) -#define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ( \ - ctr, op, device, T, type_name, binary_op_function) \ - static variant_op_registry_fn_registration:: \ - UnaryVariantBinaryOpRegistration \ - register_unary_variant_op_decoder_fn_##ctr(op, device, type_name, \ +#define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ( \ + ctr, op, device, T, type_index, binary_op_function) \ + static variant_op_registry_fn_registration:: \ + UnaryVariantBinaryOpRegistration \ + register_unary_variant_op_decoder_fn_##ctr(op, device, type_index, \ binary_op_function) } // end namespace tensorflow diff --git a/tensorflow/core/framework/variant_op_registry_test.cc b/tensorflow/core/framework/variant_op_registry_test.cc index 7055e62c0e..b2443e8676 100644 --- a/tensorflow/core/framework/variant_op_registry_test.cc +++ b/tensorflow/core/framework/variant_op_registry_test.cc @@ -89,41 +89,37 @@ struct VariantValue { int value; }; -REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(VariantValue, "TEST VariantValue", - VariantValue::ShapeFn); +REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(VariantValue, VariantValue::ShapeFn); REGISTER_UNARY_VARIANT_DECODE_FUNCTION(VariantValue, "TEST VariantValue"); INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( VariantValue, VariantDeviceCopyDirection::HOST_TO_DEVICE, - "TEST VariantValue", VariantValue::CPUToGPUCopyFn); + VariantValue::CPUToGPUCopyFn); REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, VariantValue, - "TEST VariantValue", VariantValue::CPUZerosLikeFn); REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, VariantValue, - "TEST VariantValue", VariantValue::GPUZerosLikeFn); REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_CPU, - VariantValue, "TEST VariantValue", - VariantValue::CPUAddFn); + VariantValue, VariantValue::CPUAddFn); REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_GPU, - VariantValue, "TEST VariantValue", - VariantValue::GPUAddFn); + VariantValue, VariantValue::GPUAddFn); } // namespace TEST(VariantOpShapeRegistryTest, TestBasic) { - EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetShapeFn("YOU SHALL NOT PASS"), + class Blah {}; + EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetShapeFn(MakeTypeIndex()), nullptr); - auto* shape_fn = - UnaryVariantOpRegistry::Global()->GetShapeFn("TEST VariantValue"); + auto* shape_fn = UnaryVariantOpRegistry::Global()->GetShapeFn( + MakeTypeIndex()); EXPECT_NE(shape_fn, nullptr); TensorShape shape; @@ -142,10 +138,11 @@ TEST(VariantOpShapeRegistryTest, TestBasic) { TEST(VariantOpShapeRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantShapeFn f; - string kTypeName = "fjfjfj"; - registry.RegisterShapeFn(kTypeName, f); - EXPECT_DEATH(registry.RegisterShapeFn(kTypeName, f), - "fjfjfj already registered"); + class FjFjFj {}; + const auto kTypeIndex = MakeTypeIndex(); + registry.RegisterShapeFn(kTypeIndex, f); + EXPECT_DEATH(registry.RegisterShapeFn(kTypeIndex, f), + "FjFjFj already registered"); } TEST(VariantOpDecodeRegistryTest, TestBasic) { @@ -180,13 +177,14 @@ TEST(VariantOpDecodeRegistryTest, TestDuplicate) { TEST(VariantOpCopyToGPURegistryTest, TestBasic) { // No registered copy fn for GPU<->GPU. - EXPECT_EQ( - UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( - VariantDeviceCopyDirection::DEVICE_TO_DEVICE, "TEST VariantValue"), - nullptr); + EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( + VariantDeviceCopyDirection::DEVICE_TO_DEVICE, + MakeTypeIndex()), + nullptr); auto* copy_to_gpu_fn = UnaryVariantOpRegistry::Global()->GetDeviceCopyFn( - VariantDeviceCopyDirection::HOST_TO_DEVICE, "TEST VariantValue"); + VariantDeviceCopyDirection::HOST_TO_DEVICE, + MakeTypeIndex()); EXPECT_NE(copy_to_gpu_fn, nullptr); VariantValue vv{true /* early_exit */}; @@ -208,17 +206,19 @@ TEST(VariantOpCopyToGPURegistryTest, TestBasic) { TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::AsyncVariantDeviceCopyFn f; - string kTypeName = "fjfjfj"; + class FjFjFj {}; + const auto kTypeIndex = MakeTypeIndex(); registry.RegisterDeviceCopyFn(VariantDeviceCopyDirection::HOST_TO_DEVICE, - kTypeName, f); + kTypeIndex, f); EXPECT_DEATH(registry.RegisterDeviceCopyFn( - VariantDeviceCopyDirection::HOST_TO_DEVICE, kTypeName, f), - "fjfjfj already registered"); + VariantDeviceCopyDirection::HOST_TO_DEVICE, kTypeIndex, f), + "FjFjFj already registered"); } TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { + class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn( - ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, "YOU SHALL NOT PASS"), + ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, MakeTypeIndex()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 0 /* value */}; @@ -242,8 +242,9 @@ TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { #if GOOGLE_CUDA TEST(VariantOpUnaryOpRegistryTest, TestBasicGPU) { + class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn( - ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, "YOU SHALL NOT PASS"), + ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, MakeTypeIndex()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 0 /* value */}; @@ -269,25 +270,26 @@ TEST(VariantOpUnaryOpRegistryTest, TestBasicGPU) { TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantUnaryOpFn f; - string kTypeName = "fjfjfj"; + class FjFjFj {}; + const auto kTypeIndex = MakeTypeIndex(); - registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, kTypeName, - f); + registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, + kTypeIndex, f); EXPECT_DEATH(registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, - DEVICE_CPU, kTypeName, f), - "fjfjfj already registered"); + DEVICE_CPU, kTypeIndex, f), + "FjFjFj already registered"); - registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, kTypeName, - f); + registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, + kTypeIndex, f); EXPECT_DEATH(registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, - DEVICE_GPU, kTypeName, f), - "fjfjfj already registered"); + DEVICE_GPU, kTypeIndex, f), + "FjFjFj already registered"); } TEST(VariantOpAddRegistryTest, TestBasicCPU) { - return; + class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn( - ADD_VARIANT_BINARY_OP, DEVICE_CPU, "YOU SHALL NOT PASS"), + ADD_VARIANT_BINARY_OP, DEVICE_CPU, MakeTypeIndex()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 3 /* value */}; @@ -312,8 +314,9 @@ TEST(VariantOpAddRegistryTest, TestBasicCPU) { #if GOOGLE_CUDA TEST(VariantOpAddRegistryTest, TestBasicGPU) { + class Blah {}; EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn( - ADD_VARIANT_BINARY_OP, DEVICE_GPU, "YOU SHALL NOT PASS"), + ADD_VARIANT_BINARY_OP, DEVICE_GPU, MakeTypeIndex()), nullptr); VariantValue vv_early_exit{true /* early_exit */, 3 /* value */}; @@ -340,17 +343,18 @@ TEST(VariantOpAddRegistryTest, TestBasicGPU) { TEST(VariantOpAddRegistryTest, TestDuplicate) { UnaryVariantOpRegistry registry; UnaryVariantOpRegistry::VariantBinaryOpFn f; - string kTypeName = "fjfjfj"; + class FjFjFj {}; + const auto kTypeIndex = MakeTypeIndex(); - registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, kTypeName, f); + registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, kTypeIndex, f); EXPECT_DEATH(registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, - kTypeName, f), - "fjfjfj already registered"); + kTypeIndex, f), + "FjFjFj already registered"); - registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_GPU, kTypeName, f); + registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_GPU, kTypeIndex, f); EXPECT_DEATH(registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_GPU, - kTypeName, f), - "fjfjfj already registered"); + kTypeIndex, f), + "FjFjFj already registered"); } } // namespace tensorflow diff --git a/tensorflow/core/framework/variant_tensor_data.cc b/tensorflow/core/framework/variant_tensor_data.cc index 99712dc114..3e67e4a864 100644 --- a/tensorflow/core/framework/variant_tensor_data.cc +++ b/tensorflow/core/framework/variant_tensor_data.cc @@ -22,8 +22,8 @@ namespace tensorflow { VariantTensorData::VariantTensorData() {} -VariantTensorData::VariantTensorData(const VariantTensorDataProto& proto) { - FromProto(proto); +VariantTensorData::VariantTensorData(VariantTensorDataProto proto) { + FromProto(std::move(proto)); } VariantTensorData::~VariantTensorData() {} @@ -52,7 +52,19 @@ void VariantTensorData::ToProto(VariantTensorDataProto* proto) const { } } -bool VariantTensorData::FromProto(const VariantTensorDataProto& proto) { +bool VariantTensorData::FromProto(VariantTensorDataProto proto) { + // TODO(ebrevdo): Do this lazily. + set_type_name(proto.type_name()); + set_metadata(proto.metadata()); + for (const auto& tensor : proto.tensors()) { + Tensor tmp; + if (!tmp.FromProto(tensor)) return false; + tensors_.push_back(tmp); + } + return true; +} + +bool VariantTensorData::FromConstProto(const VariantTensorDataProto& proto) { set_type_name(proto.type_name()); set_metadata(proto.metadata()); for (const auto& tensor : proto.tensors()) { @@ -75,10 +87,10 @@ bool VariantTensorData::SerializeToString(string* buf) { return proto.SerializeToString(buf); } -bool VariantTensorData::ParseFromString(const string& s) { +bool VariantTensorData::ParseFromString(string s) { VariantTensorDataProto proto; const bool status = proto.ParseFromString(s); - if (status) FromProto(proto); + if (status) FromProto(std::move(proto)); return status; } diff --git a/tensorflow/core/framework/variant_tensor_data.h b/tensorflow/core/framework/variant_tensor_data.h index 7500e77d43..8a240ee1e3 100644 --- a/tensorflow/core/framework/variant_tensor_data.h +++ b/tensorflow/core/framework/variant_tensor_data.h @@ -19,13 +19,13 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { class VariantTensorDataProto; -class Tensor; // The serialization format for Variant objects. Objects with references to // other Tensors can simply store those tensors in the `tensors` field, and @@ -38,7 +38,7 @@ class Tensor; class VariantTensorData { public: VariantTensorData(); - VariantTensorData(const VariantTensorDataProto& proto); + VariantTensorData(VariantTensorDataProto proto); ~VariantTensorData(); // Name of the type of objects being serialized. @@ -68,12 +68,14 @@ class VariantTensorData { // Conversion to and from VariantTensorDataProto void ToProto(VariantTensorDataProto* proto) const; - bool FromProto(const VariantTensorDataProto& proto); + // This allows optimizations via std::move. + bool FromProto(VariantTensorDataProto proto); + bool FromConstProto(const VariantTensorDataProto& proto); // Serialization via VariantTensorDataProto string SerializeAsString() const; bool SerializeToString(string* buf); - bool ParseFromString(const string& s); + bool ParseFromString(string s); string DebugString() const; diff --git a/tensorflow/core/framework/variant_test.cc b/tensorflow/core/framework/variant_test.cc index eef5c47d15..08d09de7b8 100644 --- a/tensorflow/core/framework/variant_test.cc +++ b/tensorflow/core/framework/variant_test.cc @@ -144,8 +144,8 @@ TEST(VariantTest, TypeMismatch) { struct TensorList { void Encode(VariantTensorData* data) const { data->tensors_ = vec; } - bool Decode(const VariantTensorData& data) { - vec = data.tensors_; + bool Decode(VariantTensorData data) { + vec = std::move(data.tensors_); return true; } @@ -186,7 +186,7 @@ TEST(VariantTest, TensorListTest) { x.Encode(&serialized); Variant y = TensorList(); - y.Decode(serialized); + y.Decode(std::move(serialized)); const TensorList& decoded_vec = *y.get(); for (int i = 0; i < 4; ++i) { @@ -204,15 +204,6 @@ TEST(VariantTest, TensorListTest) { EXPECT_EQ(y_unknown.DebugString(), strings::StrCat( "Variant")); - - TensorList unknown_decoded_vec; - EXPECT_TRUE(y_unknown.MaybeDecodeAndCopy(&unknown_decoded_vec)); - for (int i = 0; i < 4; ++i) { - EXPECT_EQ(unknown_decoded_vec.vec[i].flat()(0), i); - } - for (int i = 0; i < 4; ++i) { - EXPECT_EQ(unknown_decoded_vec.vec[i + 4].flat()(0), 2 * i); - } } TEST(VariantTest, VariantArray) { diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index fe6d705eab..30c6585ba2 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -403,12 +403,12 @@ class IteratorStateVariant { } string TypeName() const { return kIteratorVariantTypeName; } void Encode(VariantTensorData* data) const { *data = *data_; } - bool Decode(const VariantTensorData& data) { + bool Decode(VariantTensorData data) { if (data.type_name() != TypeName()) { return false; } std::unique_ptr tensor_data(new VariantTensorData); - *tensor_data = data; + std::swap(*tensor_data, data); std::unique_ptr reader( new VariantTensorDataReader(tensor_data.get())); status_ = reader->status(); diff --git a/tensorflow/core/kernels/data/optional_ops.cc b/tensorflow/core/kernels/data/optional_ops.cc index b372d31a93..6180df5af2 100644 --- a/tensorflow/core/kernels/data/optional_ops.cc +++ b/tensorflow/core/kernels/data/optional_ops.cc @@ -231,10 +231,9 @@ static Status OptionalDeviceCopy( return Status::OK(); } -#define REGISTER_OPTIONAL_COPY(DIRECTION) \ - INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \ - OptionalVariant, DIRECTION, kOptionalVariantTypeName, \ - OptionalDeviceCopy) +#define REGISTER_OPTIONAL_COPY(DIRECTION) \ + INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \ + OptionalVariant, DIRECTION, OptionalDeviceCopy) REGISTER_OPTIONAL_COPY(VariantDeviceCopyDirection::HOST_TO_DEVICE); REGISTER_OPTIONAL_COPY(VariantDeviceCopyDirection::DEVICE_TO_HOST); diff --git a/tensorflow/core/kernels/gather_functor.h b/tensorflow/core/kernels/gather_functor.h index cd2873bdca..7710cf93d6 100644 --- a/tensorflow/core/kernels/gather_functor.h +++ b/tensorflow/core/kernels/gather_functor.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/platform/prefetch.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/list_kernels.cc b/tensorflow/core/kernels/list_kernels.cc index bca1cff41c..2088c13586 100644 --- a/tensorflow/core/kernels/list_kernels.cc +++ b/tensorflow/core/kernels/list_kernels.cc @@ -77,9 +77,9 @@ static Status TensorListDeviceCopy( return Status::OK(); } -#define REGISTER_LIST_COPY(DIRECTION) \ - INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \ - TensorList, DIRECTION, TensorList::kTypeName, TensorListDeviceCopy) +#define REGISTER_LIST_COPY(DIRECTION) \ + INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(TensorList, DIRECTION, \ + TensorListDeviceCopy) REGISTER_LIST_COPY(VariantDeviceCopyDirection::HOST_TO_DEVICE); REGISTER_LIST_COPY(VariantDeviceCopyDirection::DEVICE_TO_HOST); @@ -92,8 +92,7 @@ Status TensorListShape(const TensorList& t, TensorShape* s) { return Status::OK(); } -REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(TensorList, TensorList::kTypeName, - TensorListShape); +REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(TensorList, TensorListShape); bool TensorList::Decode(const VariantTensorData& data) { tensors = data.tensors(); @@ -625,12 +624,11 @@ REGISTER_TENSOR_LIST_FROM_TENSOR_CPU(bfloat16); #undef REGISTER_TENSOR_LIST_FROM_TENSOR_CPU REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_CPU, - TensorList, TensorList::kTypeName, + TensorList, TensorListBinaryAdd); REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, TensorList, - TensorList::kTypeName, TensorListZerosLike); } // namespace tensorflow diff --git a/tensorflow/core/kernels/list_kernels.cu.cc b/tensorflow/core/kernels/list_kernels.cu.cc index c591226b76..a00bf700ca 100644 --- a/tensorflow/core/kernels/list_kernels.cu.cc +++ b/tensorflow/core/kernels/list_kernels.cu.cc @@ -94,11 +94,10 @@ REGISTER_TENSOR_LIST_FROM_TENSOR_GPU(bool); #undef REGISTER_TENSOR_LIST_FROM_TENSOR_GPU REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_GPU, - TensorList, TensorList::kTypeName, + TensorList, TensorListBinaryAdd); REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, TensorList, - TensorList::kTypeName, TensorListZerosLike); } // namespace tensorflow diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc index 9cd590ae61..30cb1e0a7f 100644 --- a/tensorflow/core/kernels/shape_op_test.cc +++ b/tensorflow/core/kernels/shape_op_test.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/abi.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/types.h" @@ -60,8 +61,7 @@ Status GetShapeFromKnownVecSize(const KnownVecSize& ks, TensorShape* s) { REGISTER_UNARY_VARIANT_DECODE_FUNCTION(KnownVecSize, "KNOWN VECTOR SIZE TYPE"); -REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(KnownVecSize, "KNOWN VECTOR SIZE TYPE", - GetShapeFromKnownVecSize); +REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(KnownVecSize, GetShapeFromKnownVecSize); static void ExpectHasError(const Status& s, StringPiece substr) { EXPECT_TRUE(str_util::StrContains(s.ToString(), substr)) @@ -94,9 +94,9 @@ TEST_F(ShapeOpTest, Simple) { Status s = session.Run({{input, variant_tensor}}, {shape_output}, &outputs); EXPECT_FALSE(s.ok()); ExpectHasError( - s, - "No unary variant shape function found for Variant type_name: " - "NO KNOWN SHAPE"); + s, strings::StrCat( + "No unary variant shape function found for Variant type_index: ", + port::MaybeAbiDemangle(MakeTypeIndex().name()))); } { diff --git a/tensorflow/core/platform/abi.cc b/tensorflow/core/platform/abi.cc index e597a490d6..d7a13a3528 100644 --- a/tensorflow/core/platform/abi.cc +++ b/tensorflow/core/platform/abi.cc @@ -37,13 +37,13 @@ extern "C" char* __unDName(char* output_string, const char* name, namespace tensorflow { namespace port { -std::string MaybeAbiDemangle(const char* name) { +string MaybeAbiDemangle(const char* name) { #if defined(_MSC_VER) std::unique_ptr demangled{__unDName(nullptr, name, 0, std::malloc, std::free, static_cast(0))}; - return std::string(demangled.get() != nullptr ? demangled.get() : name); + return string(demangled.get() != nullptr ? demangled.get() : name); #else int status = 0; std::unique_ptr res{ diff --git a/tensorflow/core/platform/abi.h b/tensorflow/core/platform/abi.h index 591e83b0c4..d1498a6a64 100644 --- a/tensorflow/core/platform/abi.h +++ b/tensorflow/core/platform/abi.h @@ -17,11 +17,12 @@ limitations under the License. #define TENSORFLOW_CORE_PLATFORM_ABI_H_ #include +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace port { -std::string MaybeAbiDemangle(const char* name); +string MaybeAbiDemangle(const char* name); } // namespace port } // namespace tensorflow -- cgit v1.2.3 From 232fcbb6fcf8c5ab3713261a0ef9a771b270753e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Sep 2018 10:49:24 -0700 Subject: Add basic logging to metagraph transform PiperOrigin-RevId: 212480467 --- .../contrib/meta_graph_transform/meta_graph_transform.py | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py index c35e60a554..b1c852c2c6 100644 --- a/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py +++ b/tensorflow/contrib/meta_graph_transform/meta_graph_transform.py @@ -31,6 +31,7 @@ from tensorflow.python.client import session as _session from tensorflow.python.framework import graph_util as _graph_util from tensorflow.python.framework import importer as _importer from tensorflow.python.framework import ops as _ops +from tensorflow.python.platform import tf_logging as _logging from tensorflow.python.saved_model import constants as _saved_model_constants from tensorflow.python.training import saver as _saver_lib from tensorflow.python.util import compat as _compat @@ -476,6 +477,12 @@ def _add_pruned_collection(base_meta_graph_def, meta_graph_def, collection.bytes_list.value[:] = [ s for s in base_collection.bytes_list.value if not _is_removed_mentioned(s, removed_op_names)] + _logging.info( + 'In collection %s, nodes excluded are: %s', collection_name, + sorted([ + s for s in base_collection.bytes_list.value + if _is_removed_mentioned(s, removed_op_names) + ])) elif base_collection.HasField('node_list'): collection.node_list.value[:] = [ s for s in base_collection.node_list.value @@ -745,6 +752,9 @@ def meta_graph_transform( retained_op_names = [_compat.as_str(node.name) for node in meta_graph_def.graph_def.node] removed_op_names = set(base_op_names) - set(retained_op_names) + _logging.info('Node names in base graph: %s', sorted(base_op_names)) + _logging.info('Node names retained: %s', sorted(retained_op_names)) + _logging.info('Node names removed: %s', sorted(removed_op_names)) # Copy saver, excluding any pruned nodes if graph was not frozen. # TODO(b/63447631): Revisit this once the problem is addressed. Currently -- cgit v1.2.3 From 7e5ae7109f558cafaa87e3bcebabfc0e1f67aabc Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 11 Sep 2018 11:12:34 -0700 Subject: Handle control dependencies from switch nodes as nonreachable. In DeleteReachableNodes all the nodes reachable from nodes deleted from the graph during extraction was considered. But if a node had a control dependency on a switch, then that node doesn't conditionally execute based on the switch predicate and is not part of the conditional extracted, so it should be considered reachable for deletion. Additionally perform sweep of graph for dead nodes together with deleting the reachable nodes to keep all dead node deletion together. Also delete a dead function and ensure all graph dumps from functionalize_cond has that as prefix. PiperOrigin-RevId: 212485183 --- tensorflow/compiler/tf2xla/functionalize_cond.cc | 71 +++++++++++++++++------- tensorflow/compiler/tf2xla/functionalize_cond.h | 13 ++--- 2 files changed, 54 insertions(+), 30 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index 0911550f1f..3ad1d1d5b4 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -217,10 +217,6 @@ void StateMap::ResetAncestorId(const Node* node, StateMap::AncestorId id) { added_node_ancestorid_mapping_[node->id()] = id; } -const StateMap::CondState& StateMap::LookupState(const Node* node) const { - return *LookupCondId(node); -} - void StateMap::MarkDead(const Node* node) { ResetCondId(node, dead_id_); } string StateMap::CondStateToString(const Node* node) const { @@ -791,7 +787,6 @@ Status Conditional::BuildAndReplace(Graph* graph, TF_RETURN_IF_ERROR(AddInputEdges(graph)); TF_RETURN_IF_ERROR(AddOutputEdges(graph)); TF_RETURN_IF_ERROR(parent_->PropagateUpdatedState(if_node_)); - for (Node* m : merges_) state_map_->MarkDead(m); // Check that the if_node doesn't feed into itself. TF_RETURN_WITH_CONTEXT_IF_ERROR( @@ -1056,7 +1051,6 @@ Status FunctionalizeCond::RemoveRedundantMerge(Node* node) { " has no non-dead inputs."); } state_map_.MarkDead(node); - delete_nodes_.push_back(node->id()); VLOG(5) << "removing redundant merge: " << node->name(); while (!node->out_edges().empty()) { const Edge* oe = *node->out_edges().begin(); @@ -1132,7 +1126,6 @@ Status FunctionalizeCond::RemoveRedundantSwitch(Node* node) { } } else if (BranchType(switch_branch) != b) { state_map_.MarkDead(dst_node); - delete_nodes_.push_back(dst_node->id()); continue; } graph_->AddEdge( @@ -1154,7 +1147,7 @@ Status FunctionalizeCond::DetermineStates(std::vector rev_topo_order) { VLOG(5) << dst->name() << " :: " << state_map_.CondStateToString(dst) << " @ " << state_map_.AncestorStateToString(dst); - if (VLOG_IS_ON(10)) DumpGraphWithCondState("cond_it"); + if (VLOG_IS_ON(10)) DumpGraphWithCondState("it"); } return Status::OK(); } @@ -1184,23 +1177,62 @@ Status FunctionalizeCond::DetermineAncestorState(Node* dst) { return Status::OK(); } -void FunctionalizeCond::DeleteReachableNodes() { +void FunctionalizeCond::DeleteReachableAndDeadNodes( + const std::vector& switch_ids, const std::vector& merge_order) { // Delete all nodes that have been extracted or are reachable from // deleted/dead nodes. The input and outgoing edges should have already been // removed. + std::deque delete_nodes; std::vector deleted(graph_->num_node_ids(), false); // Don't try to delete source or sink nodes. deleted[graph_->kSourceId] = true; deleted[graph_->kSinkId] = true; - while (!delete_nodes_.empty()) { - int d_id = delete_nodes_.front(); - delete_nodes_.pop_front(); + + // All remaining Switch nodes are not reachable from a Merge node and + // removed. This is to account for dead Switch nodes. + for (int s_id : switch_ids) { + Node* s = graph_->FindNodeId(s_id); + if (s == nullptr) continue; + for (const Edge* e : s->out_edges()) { + // Control outputs of switch nodes (which are unconditionally executed if + // the switch is) are not removed as they need not be part of a + // conditional. + if (!e->IsControlEdge()) delete_nodes.push_back(e->dst()->id()); + } + deleted[s_id] = true; + graph_->RemoveNode(s); + } + + // All merge nodes should have been transformed at this point and we remove + // them from the graph here. + for (Node* m : merge_order) { + for (const Edge* e : m->out_edges()) { + // Similar to control outputs of switch nodes don't remove control + // outputs of merge nodes. + // TODO(jpienaar): Check cases where output edges still exist here vs + // being removed in AddOutputEdges. + if (!e->IsControlEdge()) delete_nodes.push_back(e->dst()->id()); + } + deleted[m->id()] = true; + graph_->RemoveNode(m); + } + + // Enqueue all the dead nodes. + for (Node* n : graph_->nodes()) { + if (state_map_.IsDead(state_map_.LookupCondId(n))) { + delete_nodes.push_back(n->id()); + } + } + + while (!delete_nodes.empty()) { + int d_id = delete_nodes.front(); + delete_nodes.pop_front(); if (deleted[d_id]) continue; Node* d = graph_->FindNodeId(d_id); // Switch and Merge nodes could have been deleted already. if (d == nullptr) continue; for (const Edge* e : d->out_edges()) { - delete_nodes_.push_back(e->dst()->id()); + delete_nodes.push_back(e->dst()->id()); } deleted[d_id] = true; graph_->RemoveNode(d); @@ -1274,7 +1306,7 @@ Status FunctionalizeCond::FunctionalizeInternal() { } TF_RETURN_IF_ERROR(DetermineStates(std::move(rev_topo_order))); - if (VLOG_IS_ON(4)) DumpGraphWithCondState("cond_id"); + if (VLOG_IS_ON(4)) DumpGraphWithCondState("id"); // Sort the merge nodes from innermost outwards. SortMergeNodes(&merge_order); @@ -1312,11 +1344,7 @@ Status FunctionalizeCond::FunctionalizeInternal() { if (VLOG_IS_ON(4)) DumpGraphWithCondState("after_extract"); } - // All remaining Switch nodes are not reachable from a Merge node and - // removed. This is to account for dead Switch nodes. - for (int s_id : switch_ids) delete_nodes_.push_back(s_id); - for (Node* m : merge_order) delete_nodes_.push_back(m->id()); - DeleteReachableNodes(); + DeleteReachableAndDeadNodes(switch_ids, merge_order); return Status::OK(); } @@ -1331,8 +1359,9 @@ void FunctionalizeCond::DumpGraphWithCondState(const string& name) { state_map_.AncestorStateToString(n))); } LOG(INFO) << "FunctionalizeControlFlow (" << name << "): " - << dump_graph::DumpGraphToFile(absl::StrCat("functionalize_", name), - *graph_, library_); + << dump_graph::DumpGraphToFile( + absl::StrCat("functionalize_cond_", name), *graph_, + library_); } Status FunctionalizeCond::Functionalize(Graph* graph, diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.h b/tensorflow/compiler/tf2xla/functionalize_cond.h index 28301150ea..1899808940 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.h +++ b/tensorflow/compiler/tf2xla/functionalize_cond.h @@ -91,10 +91,6 @@ class StateMap { // Resets the AncestorId for a given node. void ResetAncestorId(const Node* node, AncestorId id); - // Returns the CondState for a Node. - // REQUIRES: node has a non-empty CondState. - const CondState& LookupState(const Node* node) const; - // Marks `node` as dead. void MarkDead(const Node* node); @@ -221,8 +217,10 @@ class FunctionalizeCond { // nesting depth. void SortMergeNodes(std::vector* merge_order); - // Deletes all nodes in/consumers of `delete_nodes_`. - void DeleteReachableNodes(); + // Deletes all nodes in/consumers reachable from switch/merge nodes that were + // extracted. + void DeleteReachableAndDeadNodes(const std::vector& switch_ids, + const std::vector& merge_order); // Member used to unique the CondState to a unique CondId (AncestorState to a // unique AncestorId) and keep track of CondState/CondId @@ -232,9 +230,6 @@ class FunctionalizeCond { // Mapping from merge nodes to predicate. std::unordered_map merge_to_predicate_; - // Nodes to be deleted. - std::deque delete_nodes_; - FunctionLibraryDefinition* library_; Graph* graph_; -- cgit v1.2.3 From ded099749d4f987b404b9d5fd7169baf1671582b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 11 Sep 2018 11:16:06 -0700 Subject: Add missing spaces to error message. PiperOrigin-RevId: 212485820 --- tensorflow/core/graph/graph_constructor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index ee10194142..7399613f6a 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -1042,12 +1042,12 @@ Status GraphConstructor::Convert() { } if (processed < node_defs_.size()) { - LOG(WARNING) << "IN " << __func__ << (node_defs_.size() - processed) + LOG(WARNING) << "IN " << __func__ << " " << (node_defs_.size() - processed) << " NODES IN A CYCLE"; for (int64 i = 0; i < node_defs_.size(); i++) { if (pending_count_[i] != 0) { LOG(WARNING) << "PENDING: " << SummarizeNodeDef(*node_defs_[i]) - << "WITH PENDING COUNT = " << pending_count_[i]; + << " WITH PENDING COUNT = " << pending_count_[i]; } } return errors::InvalidArgument(node_defs_.size() - processed, -- cgit v1.2.3 From a346aa260d32eb83621bb7ed501a2b07ba186480 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 11 Sep 2018 11:22:27 -0700 Subject: Automated rollback of commit 624ff13fdf4e54e255d23971ef2beec3c48c3bb2. Revert #21826. PiperOrigin-RevId: 212487142 --- tensorflow/python/ops/ctc_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 32d455bdad..908e793902 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -242,11 +242,11 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, If `merge_repeated` is `True`, merge repeated classes in the output beams. This means that if consecutive entries in a beam are the same, - only the first of these is emitted. That is, when the sequence is - `A B B * B * B` (where '*' is the blank label), the return value is: + only the first of these is emitted. That is, when the top path + is `A B B B B`, the return value is: * `A B` if `merge_repeated = True`. - * `A B B B` if `merge_repeated = False`. + * `A B B B B` if `merge_repeated = False`. Args: inputs: 3-D `float` `Tensor`, size -- cgit v1.2.3 From 6cb9189c567397b0779f1c52604e2ea6255a9183 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 11 Sep 2018 11:25:23 -0700 Subject: Removes option of pass-through runner on eager execution. It is possible it will deadlock by running code in the GPU event manager thread. PiperOrigin-RevId: 212487862 --- tensorflow/core/common_runtime/eager/context.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc index 37fc031985..263467a5b6 100644 --- a/tensorflow/core/common_runtime/eager/context.cc +++ b/tensorflow/core/common_runtime/eager/context.cc @@ -66,13 +66,9 @@ EagerContext::EagerContext(const SessionOptions& opts, local_unowned_device_manager_ = device_mgr; } InitDeviceMapAndAsync(); - if (opts.config.inter_op_parallelism_threads() > 0) { - runner_ = [this](std::function closure) { - this->thread_pool_->Schedule(closure); - }; - } else { - runner_ = [](std::function closure) { closure(); }; - } + runner_ = [this](std::function closure) { + this->thread_pool_->Schedule(closure); + }; } void EagerContext::InitDeviceMapAndAsync() { -- cgit v1.2.3 From 9b8c30fb0abf42f34c17050ff455d36166fa0e24 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 11 Sep 2018 11:26:28 -0700 Subject: Contraction mapper for cuboid convolutions. Directly pack rhs memory for the gebp kernes with a gemm_pack_rhs specialization. It's similar to optimized memory packing in eigen_spatial_convolutions. Works for: 1. CuboidConvolution 2. CuboidConvolutionBackwardInput ~2x-4x speedup when compiled with AVX (depends on tensor&patch dimensions). PiperOrigin-RevId: 212488060 --- tensorflow/core/kernels/eigen_cuboid_convolution.h | 1356 ++++++++++++++++++++ 1 file changed, 1356 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/eigen_cuboid_convolution.h b/tensorflow/core/kernels/eigen_cuboid_convolution.h index 62e9f9123d..c41fbc42d3 100644 --- a/tensorflow/core/kernels/eigen_cuboid_convolution.h +++ b/tensorflow/core/kernels/eigen_cuboid_convolution.h @@ -21,6 +21,1362 @@ limitations under the License. namespace Eigen { +namespace internal { + +// WARNING: Most of the code here implicitly assumes that the matrix is in +// ColMajor layout. This is guaranteed by the tensor contraction (see +// TensorContraction.h). +// +// Inside Eigen a tensor contraction is represented by a matrix multiplication. +// We don't want to actually extract volume patches and reshape the result into +// a matrix (this involves allocating huge extra memory), so the patch +// extraction and reshape operations are implicit. +// +// TensorContractionInputMapper takes a matrix index and returns the coefficient +// (or the packet) of the "virtual tensor", that would be at that index if we +// were to actually reshape the result of patch extraction. +// +// TensorContractionSubMapper provides a similar view into the "virtual matrix" +// at the given vertical and horizontal offsets. +// +// "Virtual matrix" dimensions: +// *0: kernelChannels * kernelDepth * kernelRows * kernelCols; +// 1: out_depth * out_height * out_width; * OTHERS (e.g batches, etc...) +// +// *) extracted patches are continuous in memory (innermost dimension assuming +// col major layout) +// +// With this dimensions: +// row - offset within a single patch (in code: patchId) +// col - index of the extracted patch (in code: patchIndex) +// patchIndex ∈ [0..num_patches * OTHERS] (batch and other dimensions) +// +template +class TensorContractionInputMapper< + Scalar_, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> { + public: + typedef Scalar_ Scalar; + typedef TensorContractionInputMapper< + Scalar, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + Self; + typedef TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + SubMapper; + typedef SubMapper VectorMapper; + typedef SubMapper LinearMapper; + typedef typename packet_traits::type Packet; + + EIGEN_DEVICE_FUNC + TensorContractionInputMapper( + const TensorEvaluator< + const TensorReshapingOp< + NewDimension, + const TensorVolumePatchOp >, + Device>& tensor, + const nocontract_t&, const nocontract_t&, const contract_t&, + const contract_t&) + : m_impl(tensor.impl().impl()) { + if (internal::traits::Layout == ColMajor) { + m_patch_depth = tensor.impl().dimensions()[0]; + m_patch_planes = tensor.impl().dimensions()[1]; + m_patch_rows = tensor.impl().dimensions()[2]; + m_patch_cols = tensor.impl().dimensions()[3]; + m_num_patches = tensor.impl().dimensions()[4]; + } else { + const int NumDims = tensor.impl().dimensions().size(); + m_patch_depth = tensor.impl().dimensions()[NumDims - 1]; + m_patch_planes = tensor.impl().dimensions()[NumDims - 2]; + m_patch_rows = tensor.impl().dimensions()[NumDims - 3]; + m_patch_cols = tensor.impl().dimensions()[NumDims - 4]; + m_num_patches = tensor.impl().dimensions()[NumDims - 5]; + } + + // Strides for the output tensor. + // IMPORTANT: These strides are used to locate an element in a patch at a + // depth zero (channel), which is not quite the same as "traditional" + // stride. + m_rowStride = m_patch_planes; + m_colStride = m_patch_rows * m_rowStride; + m_patchStride = m_colStride * m_patch_cols * m_patch_depth; + m_otherStride = m_patchStride * m_num_patches; + + m_outputPlanes = tensor.impl().outputPlanes(); + m_outputRows = tensor.impl().outputRows(); + m_outputCols = tensor.impl().outputCols(); + + m_outputPlanesRows = m_outputPlanes * m_outputRows; + + m_plane_strides = tensor.impl().userPlaneStride(); + m_row_strides = tensor.impl().userRowStride(); + m_col_strides = tensor.impl().userColStride(); + + m_in_plane_strides = tensor.impl().userInPlaneStride(); + m_in_row_strides = tensor.impl().userInRowStride(); + m_in_col_strides = tensor.impl().userInColStride(); + + m_patch_plane_inflate_strides = tensor.impl().planeInflateStride(); + m_patch_row_inflate_strides = tensor.impl().rowInflateStride(); + m_patch_col_inflate_strides = tensor.impl().colInflateStride(); + + if (internal::traits::Layout == ColMajor) { + m_inputDepth = tensor.impl().impl().dimensions()[0]; + m_inputPlanes = tensor.impl().impl().dimensions()[1]; + m_inputRows = tensor.impl().impl().dimensions()[2]; + m_inputCols = tensor.impl().impl().dimensions()[3]; + } else { + const int NumDims = tensor.impl().impl().dimensions().size(); + m_inputDepth = tensor.impl().impl().dimensions()[NumDims - 1]; + m_inputPlanes = tensor.impl().impl().dimensions()[NumDims - 2]; + m_inputRows = tensor.impl().impl().dimensions()[NumDims - 3]; + m_inputCols = tensor.impl().impl().dimensions()[NumDims - 4]; + } + + // Strides for navigating through the input tensor. + m_planeInputStride = m_inputDepth; + m_rowInputStride = m_inputDepth * m_inputPlanes; + m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes; + m_patchInputStride = + m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes; + + m_planePaddingTop = tensor.impl().planePaddingTop(); + m_rowPaddingTop = tensor.impl().rowPaddingTop(); + m_colPaddingLeft = tensor.impl().colPaddingLeft(); + + m_fastNumPatches = internal::TensorIntDivisor(m_num_patches); + + m_fastInputPlaneStride = + internal::TensorIntDivisor(m_patch_plane_inflate_strides); + m_fastInputRowStride = + internal::TensorIntDivisor(m_patch_row_inflate_strides); + m_fastInputColStride = + internal::TensorIntDivisor(m_patch_col_inflate_strides); + + m_fastRowStride = internal::TensorIntDivisor(m_rowStride); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + + m_fastDimZero = internal::TensorIntDivisor(m_patch_depth); + m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); + m_fastOutputPlanes = internal::TensorIntDivisor(m_outputPlanes); + m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); + m_fastOutputCols = internal::TensorIntDivisor(m_outputCols); + + m_fastOutputPlanesRows = + internal::TensorIntDivisor(m_outputPlanesRows); + } + + EIGEN_DEVICE_FUNC + TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper) + : m_impl(base_mapper.m_impl) { + m_patch_depth = base_mapper.m_patch_depth; + m_patch_planes = base_mapper.m_patch_planes; + m_patch_rows = base_mapper.m_patch_rows; + m_patch_cols = base_mapper.m_patch_cols; + m_num_patches = base_mapper.m_num_patches; + + m_rowStride = base_mapper.m_rowStride; + m_colStride = base_mapper.m_colStride; + m_patchStride = base_mapper.m_patchStride; + m_otherStride = base_mapper.m_otherStride; + + m_planeInputStride = base_mapper.m_planeInputStride; + m_rowInputStride = base_mapper.m_rowInputStride; + m_colInputStride = base_mapper.m_colInputStride; + m_patchInputStride = base_mapper.m_patchInputStride; + m_otherInputStride = base_mapper.m_otherInputStride; + + m_inputDepth = base_mapper.m_inputDepth; + m_inputPlanes = base_mapper.m_inputPlanes; + m_inputRows = base_mapper.m_inputRows; + m_inputCols = base_mapper.m_inputCols; + + m_outputPlanes = base_mapper.m_outputPlanes; + m_outputRows = base_mapper.m_outputRows; + m_outputCols = base_mapper.m_outputCols; + + m_plane_strides = base_mapper.m_plane_strides; + m_row_strides = base_mapper.m_row_strides; + m_col_strides = base_mapper.m_col_strides; + + m_in_plane_strides = base_mapper.m_in_plane_strides; + m_in_row_strides = base_mapper.m_in_row_strides; + m_in_col_strides = base_mapper.m_in_col_strides; + + m_patch_plane_inflate_strides = base_mapper.m_patch_plane_inflate_strides; + m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides; + m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides; + + m_planePaddingTop = base_mapper.m_planePaddingTop; + m_rowPaddingTop = base_mapper.m_rowPaddingTop; + m_colPaddingLeft = base_mapper.m_colPaddingLeft; + + m_outputPlanesRows = base_mapper.m_outputPlanesRows; + + m_fastNumPatches = base_mapper.m_fastNumPatches; + m_fastInputPlaneStride = base_mapper.m_fastInputPlaneStride; + m_fastInputRowStride = base_mapper.m_fastInputRowStride; + m_fastInputColStride = base_mapper.m_fastInputColStride; + m_fastRowStride = base_mapper.m_fastRowStride; + m_fastColStride = base_mapper.m_fastColStride; + m_fastOutputPlanes = base_mapper.m_fastOutputPlanes; + m_fastOutputRows = base_mapper.m_fastOutputRows; + m_fastOutputCols = base_mapper.m_fastOutputCols; + m_fastDimZero = base_mapper.m_fastDimZero; + m_fastOutputPlanesRows = base_mapper.m_fastOutputPlanesRows; + } + + // If true, turns off some optimizations for loading packets since the image + // patches are "non-standard" such as there are non-trivial strides or + // inflations in the input. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { + return m_in_plane_strides != 1 || m_in_row_strides != 1 || + m_in_col_strides != 1 || m_patch_plane_inflate_strides != 1 || + m_patch_row_inflate_strides != 1 || m_patch_col_inflate_strides != 1; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar operator()(Index row) const { + Index planeIndex, rowIndex, colIndex, otherIndex; + computeBaseIndices(0, planeIndex, rowIndex, colIndex, otherIndex); + return loadCoeff(row, planeIndex, rowIndex, colIndex, otherIndex); + } + + // Load the coefficient at the patchIndex location instead of the usual + // m_rowIndex, m_colIndex, m_otherIndex. This is currently only used by the + // gpu code. + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row, Index patchIndex) const { + Index planeIndex, rowIndex, colIndex, otherIndex; + computeBaseIndices(patchIndex, planeIndex, rowIndex, colIndex, otherIndex); + return loadCoeff(row, planeIndex, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index row) const { + Index planeIndex, rowIndex, colIndex, otherIndex; + computeBaseIndices(0, planeIndex, rowIndex, colIndex, otherIndex); + return loadPacket(row, planeIndex, rowIndex, colIndex, otherIndex); + } + + // Load the packet at the patchIndex location instead of the usual m_rowIndex, + // m_colIndex, m_otherIndex. This is currently only used by the gpu code. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index row, Index patchIndex) const { + Index planeIndex, rowIndex, colIndex, otherIndex; + computeBaseIndices(patchIndex, planeIndex, rowIndex, colIndex, otherIndex); + return loadPacket(row, planeIndex, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE const TensorEvaluator& impl() const { + return m_impl; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_patch_depth; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchPlanes() const { return m_patch_planes; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchRows() const { return m_patch_rows; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchCols() const { return m_patch_cols; } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, + const Index baseIndex) const { + const Index inputIndex = depth + baseIndex; + return m_impl.template packet(inputIndex); + } + + private: + friend class TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment>; + + // Load coefficient from a patch specified by the "within patch offset" + // (patchId) and the precomputed indices of the first element of the patch. + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index planeIndex, + Index rowIndex, Index colIndex, + Index otherIndex) const { + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset * m_in_col_strides; + const Index origInputCol = + (m_patch_col_inflate_strides == 1) + ? inputCol + : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + + const Index rowOffset = + (patchOffset - colOffset * m_colStride) / m_fastRowStride; + const Index inputRow = rowIndex + rowOffset * m_in_row_strides; + const Index origInputRow = + (m_patch_row_inflate_strides == 1) + ? inputRow + : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + + const Index planeOffset = + patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; + const Index inputPlane = planeIndex + planeOffset * m_in_plane_strides; + const Index origInputPlane = + (m_patch_plane_inflate_strides == 1) + ? inputPlane + : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0); + + if (origInputCol < 0 || origInputRow < 0 || origInputPlane < 0 || + origInputCol >= m_inputCols || origInputRow >= m_inputRows || + origInputPlane >= m_inputPlanes || + (inputCol != origInputCol * m_patch_col_inflate_strides) || + (inputRow != origInputRow * m_patch_row_inflate_strides) || + (inputPlane != origInputPlane * m_patch_plane_inflate_strides)) { + return Scalar(0); + } + + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + origInputPlane * m_planeInputStride + + origInputRow * m_rowInputStride + + origInputCol * m_colInputStride + otherIndex; + + return m_impl.coeff(inputIndex); + } + + // This is the same as loadCoeff(...), but optimized for all `inflate_strides` + // and `in_strides` equal to 1 (template specialization without templates). + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index planeIndex, + Index rowIndex, Index colIndex, + Index otherIndex) const { + eigen_assert(!nonStandardPatches()); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset; + + const Index rowOffset = + (patchOffset - colOffset * m_colStride) / m_fastRowStride; + const Index inputRow = rowIndex + rowOffset; + + const Index planeOffset = + patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; + const Index inputPlane = planeIndex + planeOffset; + + if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 || + inputRow >= m_inputRows || inputPlane < 0 || + inputPlane >= m_inputPlanes) { + return Scalar(0); + } + + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + inputPlane * m_planeInputStride + + inputRow * m_rowInputStride + + inputCol * m_colInputStride + otherIndex; + + return m_impl.coeff(inputIndex); + } + + // Load packet from a patch specified by the "within patch offset" + // (patchId) and the precomputed indices of the first element of the patch. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index planeIndex, + Index rowIndex, Index colIndex, + Index otherIndex) const { + const Index packetSize = internal::unpacket_traits::size; + + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < + patchDepth() * patchPlanes() * patchRows() * patchCols()); + + if (nonStandardPatches()) { + return packetWithPossibleZero(patchId, planeIndex, rowIndex, colIndex, + otherIndex); + } + return loadPacketStandard(patchId, planeIndex, rowIndex, colIndex, + otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index patchId, Index planeIndex, + Index rowIndex, Index colIndex, + Index otherIndex) const { + const Index packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < + patchDepth() * patchPlanes() * patchRows() * patchCols()); + eigen_assert(!nonStandardPatches()); + + if ((patchDepth() % packetSize) == 0) { + return loadPacketFast(patchId, planeIndex, rowIndex, colIndex, + otherIndex); + } else { + // Offsets and input calculation here are identical to + // loadCoeffStandard(...), but repeated twice. + + const Index patchOffsets[2] = { + patchId / m_fastDimZero, (patchId + packetSize - 1) / m_fastDimZero}; + + const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, + patchOffsets[1] / m_fastColStride}; + eigen_assert(colOffsets[0] <= colOffsets[1]); + + const Index inputCols[2] = {colIndex + colOffsets[0], + colIndex + colOffsets[1]}; + if (inputCols[0] >= m_inputCols || inputCols[1] < 0) { + return internal::pset1(Scalar(0)); + } + + if (inputCols[0] == inputCols[1]) { + const Index rowOffsets[2] = { + (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride, + (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + const Index inputRows[2] = {rowIndex + rowOffsets[0], + rowIndex + rowOffsets[1]}; + + if (inputRows[0] >= m_inputRows || inputRows[1] < 0) { + return internal::pset1(Scalar(0)); + } + + if (inputRows[0] == inputRows[1]) { + const Index planeOffsets[2] = { + patchOffsets[0] - colOffsets[0] * m_colStride - + rowOffsets[0] * m_rowStride, + patchOffsets[1] - colOffsets[1] * m_colStride - + rowOffsets[1] * m_rowStride}; + eigen_assert(planeOffsets[0] <= planeOffsets[1]); + const Index inputPlanes[2] = {planeIndex + planeOffsets[0], + planeIndex + planeOffsets[1]}; + + if (inputPlanes[0] >= m_inputPlanes || inputPlanes[1] < 0) { + return internal::pset1(Scalar(0)); + } + + if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) { + const Index depth = patchId - patchOffsets[0] * patchDepth(); + const Index inputIndex = + depth + inputPlanes[0] * m_planeInputStride + + inputRows[0] * m_rowInputStride + + inputCols[0] * m_colInputStride + otherIndex; + return m_impl.template packet(inputIndex); + } + } + } + } + + return packetWithPossibleZero(patchId, planeIndex, rowIndex, colIndex, + otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index patchId, Index planeIndex, + Index rowIndex, Index colIndex, + Index otherIndex) const { + const Index packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < + patchDepth() * patchPlanes() * patchRows() * patchCols()); + + eigen_assert(!nonStandardPatches()); + eigen_assert((patchDepth() % packetSize) == 0); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset); + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset; + const Index rowOffset = + (patchOffset - colOffset * m_colStride) / m_fastRowStride; + const Index inputRow = rowIndex + rowOffset; + const Index planeOffset = + patchOffset - colOffset * m_colStride - rowOffset * m_rowStride; + const Index inputPlane = planeIndex + planeOffset; + + if (inputCol < 0 || inputRow < 0 || inputPlane < 0 || + inputCol >= m_inputCols || inputRow >= m_inputRows || + inputPlane >= m_inputPlanes) { + return internal::pset1(Scalar(0)); + } + + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + inputPlane * m_planeInputStride + + inputRow * m_rowInputStride + + inputCol * m_colInputStride + otherIndex; + return m_impl.template packet(inputIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet + packetWithPossibleZero(Index patchId, Index planeIndex, Index rowIndex, + Index colIndex, Index otherIndex) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX + typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = + loadCoeff(patchId + i, planeIndex, rowIndex, colIndex, otherIndex); + } + Packet rslt = internal::pload(values); + return rslt; + } + + // Precompute the indices (plane, row, col, other) of the first element of + // the given patch index, within the output tensor of the TensorVolumePatchOp. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices( + Index patchIndex, Index& planeIndex, Index& rowIndex, Index& colIndex, + Index& otherIndex) const { + const int NumInputDims = array_size< + typename TensorEvaluator::Dimensions>::value; + + // Check if patchIndex might contain batch and other dimensions. + otherIndex = (NumInputDims == 4) ? 0 : patchIndex / m_fastNumPatches; + + // Compute index of the patch within the batch (and other dimensions). + const Index patch3DIndex = (NumInputDims == 4) + ? patchIndex + : (patchIndex - otherIndex * m_num_patches); + + otherIndex *= m_patchInputStride; + + colIndex = patch3DIndex / m_fastOutputPlanesRows; + rowIndex = + (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes; + planeIndex = + patch3DIndex - (colIndex * m_outputRows + rowIndex) * m_outputPlanes; + + colIndex = colIndex * m_col_strides - m_colPaddingLeft; + rowIndex = rowIndex * m_row_strides - m_rowPaddingTop; + planeIndex = planeIndex * m_plane_strides - m_planePaddingTop; + } + + Index m_patch_depth; // number of channels in the patch + Index m_patch_planes; // number of planes in the patch + Index m_patch_rows; // number of rows in the patch + Index m_patch_cols; // number of columns in the patch + Index m_num_patches; // number of patches to extract + + // Strides for the output tensor. + Index m_rowStride; + Index m_colStride; + Index m_patchStride; + Index m_otherStride; + + Index m_planeInputStride; // Plane stride in the input tensor + Index m_rowInputStride; // Row stride in the input tensor + Index m_colInputStride; // Col stride in the input tensor + Index m_patchInputStride; // Patch stride in the input tensor + Index m_otherInputStride; + + Index m_inputDepth; // Depth of the input tensor + Index m_inputPlanes; // Number of planes in the input tensor + Index m_inputRows; // Number of rows in the input tensor + Index m_inputCols; // Number of cols in the input tensor + + Index m_outputPlanes; // Number of output planes + Index m_outputRows; // Number of output rows + Index m_outputCols; // Number of output cols + Index m_outputPlanesRows; // Cached outputPlanes * outputRows. + + Index m_plane_strides; // User specified plane stride + Index m_row_strides; // User specified row stride + Index m_col_strides; // User specified col stride + + // User specified plane/row/col atrous convolution strides. + Index m_in_plane_strides; + Index m_in_row_strides; + Index m_in_col_strides; + + // User specified plane/row/col inflation strides in the image patch. + Index m_patch_plane_inflate_strides; + Index m_patch_row_inflate_strides; + Index m_patch_col_inflate_strides; + + Index m_planePaddingTop; // Plane padding + Index m_rowPaddingTop; // Row padding + Index m_colPaddingLeft; // Column padding + + // Fast representation of various divisors. + internal::TensorIntDivisor m_fastNumPatches; + + internal::TensorIntDivisor m_fastInputPlaneStride; + internal::TensorIntDivisor m_fastInputRowStride; + internal::TensorIntDivisor m_fastInputColStride; + + internal::TensorIntDivisor m_fastRowStride; + internal::TensorIntDivisor m_fastColStride; + + internal::TensorIntDivisor m_fastDimZero; // aka output depth + internal::TensorIntDivisor m_fastOutputPlanes; + internal::TensorIntDivisor m_fastOutputRows; + internal::TensorIntDivisor m_fastOutputCols; + internal::TensorIntDivisor m_fastOutputPlanesRows; + + const TensorEvaluator m_impl; +}; + +template +class TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> { + public: + typedef typename packet_traits::type Packet; + typedef typename packet_traits::half HalfPacket; + + typedef TensorContractionInputMapper< + Scalar, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + ParentMapper; + typedef TensorContractionSubMapper< + Scalar, Index, Side, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + Self; + typedef Self LinearMapper; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper( + const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), + m_depth_offset(vert_offset), + m_col_offset(horiz_offset) { + m_base_mapper.computeBaseIndices(m_col_offset, m_planeIndex, m_rowIndex, + m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper( + const Self& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper.m_base_mapper), + m_depth_offset(vert_offset + base_mapper.m_depth_offset), + m_col_offset(horiz_offset + base_mapper.m_col_offset) { + m_base_mapper.computeBaseIndices(m_col_offset, m_planeIndex, m_rowIndex, + m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_base_mapper.loadCoeff(i + m_depth_offset, m_planeIndex, m_rowIndex, + m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, + Index j) const { + return m_base_mapper(i + m_depth_offset, j + m_col_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return m_base_mapper.loadPacket(i + m_depth_offset, m_planeIndex, + m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, + Index j) const { + return m_base_mapper.template loadPacket(i + m_depth_offset, + j + m_col_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar + loadCoeffStandard(Index i) const { + return m_base_mapper.loadCoeffStandard( + i + m_depth_offset, m_planeIndex, m_rowIndex, m_colIndex, m_otherIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index i) const { + return m_base_mapper.loadPacketFast(i + m_depth_offset, m_planeIndex, + m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet + loadPacketStandard(Index i) const { + return m_base_mapper.loadPacketStandard( + i + m_depth_offset, m_planeIndex, m_rowIndex, m_colIndex, m_otherIndex); + } + template + EIGEN_DEVICE_FUNC bool aligned(Index) const { + return false; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { + return m_base_mapper.nonStandardPatches(); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchDepth() const { + return m_base_mapper.m_patch_depth; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchPlanes() const { + return m_base_mapper.m_patch_planes; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchRows() const { + return m_base_mapper.m_patch_rows; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchCols() const { + return m_base_mapper.m_patch_cols; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, + const Index baseIndex) const { + const Index inputIndex = depth + baseIndex; + return m_base_mapper.m_impl.template packet(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padPlane(const Index plane) const { + const Index p = m_planeIndex + plane; + return p < 0 || p >= m_base_mapper.m_inputPlanes; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padRow(const Index row) const { + const Index r = m_rowIndex + row; + return r < 0 || r >= m_base_mapper.m_inputRows; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padCol(const Index col) const { + const Index c = m_colIndex + col; + return c < 0 || c >= m_base_mapper.m_inputCols; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index baseIndex(const Index plane, const Index row, + const Index col) const { + const Index p = m_planeIndex + plane; + const Index r = m_rowIndex + row; + const Index c = m_colIndex + col; + return p * m_base_mapper.m_planeInputStride + + r * m_base_mapper.m_rowInputStride + + c * m_base_mapper.m_colInputStride + m_otherIndex; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index planeOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + const Index rowOffset = + (patchOffset - colOffset * m_base_mapper.m_colStride) / + m_base_mapper.m_fastRowStride; + const Index planeOffset = patchOffset - + colOffset * m_base_mapper.m_colStride - + rowOffset * m_base_mapper.m_rowStride; + return planeOffset; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index rowOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + const Index rowOffset = + (patchOffset - colOffset * m_base_mapper.m_colStride) / + m_base_mapper.m_fastRowStride; + return rowOffset; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index colOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + return colOffset; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index depthOffset() const { + const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth(); + return patchOffset; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper + getLinearMapper(Index i, Index j) const { + return LinearMapper(m_base_mapper, i + m_depth_offset, j + m_col_offset); + } + + private: + const ParentMapper& m_base_mapper; + Index m_depth_offset; // First row in the input matrix + Index m_col_offset; // First col in the input matrix + + // Knowing that: col_offset == patchIndex * OTHERS, we keep precomputed base + // indices for the first element in a patch specified by col_offset + // (see computeBaseIndices(...) for details). + Index m_planeIndex; + Index m_rowIndex; + Index m_colIndex; + Index m_otherIndex; +}; + +// Arrange a block of the right input matrix (in our case it's always a "virtual +// matrix" constructed from extracted volume patches) in contiguous memory. +// +// Given column major input (A0 beside A1 in memory): +// A0 B0 C0 D0 E0 F0 G0 H0 ... +// A1 B1 C1 D1 E1 F1 G1 H1 ... +// A2 B2 C2 D2 E2 F2 G2 H2 ... +// A3 B3 C3 D3 E3 F3 G3 H3 ... +// A4 B4 C4 D4 E4 F4 G4 H4 ... +// A5 B5 C5 D5 E5 F5 G5 H5 ... +// A6 B6 C6 D6 E6 F6 G6 H6 ... +// A7 B7 C7 D7 E7 F7 G7 H7 ... +// A8 ... +// ... +// +// Packing yields row major output (A0 beside A1 in memory): +// A0 A1 A2 A3 A4 A5 A6 A7 +// B0 B1 B2 B3 B4 B5 B6 B7 +// C0 ... +// ... +// +// *) A, B, C, ... - patches extracted from the original input. +// *) nr - number of registers along the 'n' dimension. +// See GeneralBlockPanelKernel.h and "Anatomy of High-Performance Matrix +// Multiplication" paper. +template +struct gemm_pack_rhs< + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { + typedef TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, packet_size, inner_dim_contiguous, + inner_dim_reordered, Alignment> + SubMapper; + typedef SubMapper DataMapper; + + EIGEN_DEVICE_FUNC + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + typedef typename packet_traits::type Packet; + + const Index packet_cols4 = (cols / 4) * 4; + const Index peeled_k = (depth / packet_size) * packet_size; + const bool non_standard_patches = rhs.nonStandardPatches(); + + for (Index j2 = 0; j2 < packet_cols4; j2 += 4) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + Index k = 0; + if ((packet_size % 4) == 0 && !non_standard_patches) { + const Index patch_depth = rhs.patchDepth(); + + if ((patch_depth % packet_size) == 0) { + const Index patch_cols = rhs.patchCols(); + const Index patch_rows = rhs.patchRows(); + const Index patch_planes = rhs.patchPlanes(); + + const Index startCol = rhs.colOffset(); + const Index max_cols = std::min( + Eigen::divup(peeled_k, patch_rows * patch_planes * patch_depth) + + startCol, + patch_cols); + + for (Index c = startCol; c < max_cols; ++c) { + eigen_assert(k < peeled_k); + + const Index startRow = (c == startCol) ? rhs.rowOffset() : 0; + const Index max_rows = std::min( + Eigen::divup( + peeled_k - c * patch_rows * patch_planes * patch_depth, + patch_planes * patch_depth) + + startRow, + patch_rows); + + const bool pad_col0 = dm0.padCol(c); + const bool pad_col1 = dm1.padCol(c); + const bool pad_col2 = dm2.padCol(c); + const bool pad_col3 = dm3.padCol(c); + + for (Index r = startRow; r < max_rows; ++r) { + eigen_assert(k < peeled_k); + + const Index startPlane = + ((c == startCol) && (r == startRow)) ? rhs.planeOffset() : 0; + const Index max_planes = std::min( + Eigen::divup( + peeled_k - + c * patch_rows * patch_planes * patch_depth - // col + r * patch_planes * patch_depth, // row + patch_depth) + + startPlane, + patch_planes); + + const bool pad_row0 = dm0.padRow(r); + const bool pad_row1 = dm1.padRow(r); + const bool pad_row2 = dm2.padRow(r); + const bool pad_row3 = dm3.padRow(r); + + for (Index p = startPlane; p < max_planes; ++p) { + eigen_assert(k < peeled_k); + + const bool pad0 = pad_col0 || pad_row0 || dm0.padPlane(p); + const bool pad1 = pad_col1 || pad_row1 || dm1.padPlane(p); + const bool pad2 = pad_col2 || pad_row2 || dm2.padPlane(p); + const bool pad3 = pad_col3 || pad_row3 || dm3.padPlane(p); + + const Index idx0 = dm0.baseIndex(p, r, c); + const Index idx1 = dm1.baseIndex(p, r, c); + const Index idx2 = dm2.baseIndex(p, r, c); + const Index idx3 = dm3.baseIndex(p, r, c); + + const Index startDepth = + ((c == startCol) && (r == startRow) && (p == startPlane)) + ? rhs.depthOffset() + : 0; + const Index max_depth = std::min( + peeled_k - + c * patch_rows * patch_planes * patch_depth - // col + r * patch_planes * patch_depth - // row + p * patch_depth + // plane + startDepth, + patch_depth); + eigen_assert((max_depth - startDepth) % packet_size == 0); + + for (Index d = startDepth; d < max_depth; d += packet_size) { + eigen_assert(k < peeled_k); + PacketBlock kernel; + kernel.packet[0] = pad0 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx0); + kernel.packet[1] = pad1 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx1); + kernel.packet[2] = pad2 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx2); + kernel.packet[3] = pad3 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx3); + ptranspose(kernel); + pstoreu(block + 0 * packet_size, kernel.packet[0]); + pstoreu(block + 1 * packet_size, kernel.packet[1]); + pstoreu(block + 2 * packet_size, kernel.packet[2]); + pstoreu(block + 3 * packet_size, kernel.packet[3]); + block += 4 * packet_size; + k += packet_size; + } + } + } + } + + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel; + kernel.packet[0] = dm0.loadPacketFast(k); + kernel.packet[1] = dm1.loadPacketFast(k); + kernel.packet[2] = dm2.loadPacketFast(k); + kernel.packet[3] = dm3.loadPacketFast(k); + ptranspose(kernel); + pstoreu(block + 0 * packet_size, kernel.packet[0]); + pstoreu(block + 1 * packet_size, kernel.packet[1]); + pstoreu(block + 2 * packet_size, kernel.packet[2]); + pstoreu(block + 3 * packet_size, kernel.packet[3]); + block += 4 * packet_size; + } + } else { + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel; + kernel.packet[0] = dm0.loadPacketStandard(k); + kernel.packet[1] = dm1.loadPacketStandard(k); + kernel.packet[2] = dm2.loadPacketStandard(k); + kernel.packet[3] = dm3.loadPacketStandard(k); + ptranspose(kernel); + pstoreu(block + 0 * packet_size, kernel.packet[0]); + pstoreu(block + 1 * packet_size, kernel.packet[1]); + pstoreu(block + 2 * packet_size, kernel.packet[2]); + pstoreu(block + 3 * packet_size, kernel.packet[3]); + block += 4 * packet_size; + } + } + } + if (!rhs.nonStandardPatches()) { + for (; k < depth; k++) { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } else { + for (; k < depth; k++) { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for (Index j2 = packet_cols4; j2 < cols; ++j2) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for (Index k = 0; k < depth; k++) { + *block = dm0(k); + block += 1; + } + } + } +}; + +// Template specialization for packet_size = 2. We must special-case packet +// blocks with nr > packet_size, e.g. PacketBlock. +template +struct gemm_pack_rhs< + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, /*packet_size*/ 2, inner_dim_contiguous, + inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { + typedef TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, /*packet_size*/ 2, inner_dim_contiguous, + inner_dim_reordered, Alignment> + SubMapper; + typedef SubMapper DataMapper; + + EIGEN_DEVICE_FUNC + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + typedef typename packet_traits::type Packet; + + const int packet_size = 2; + + const Index packet_cols4 = (cols / 4) * 4; + const Index peeled_k = (depth / packet_size) * packet_size; + const bool non_standard_patches = rhs.nonStandardPatches(); + + for (Index j2 = 0; j2 < packet_cols4; j2 += 4) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + Index k = 0; + if (!non_standard_patches) { + const Index patch_depth = rhs.patchDepth(); + + if ((patch_depth % packet_size) == 0) { + const Index patch_cols = rhs.patchCols(); + const Index patch_rows = rhs.patchRows(); + const Index patch_planes = rhs.patchPlanes(); + + const Index startCol = rhs.colOffset(); + const Index max_cols = std::min( + Eigen::divup(peeled_k, patch_rows * patch_planes * patch_depth) + + startCol, + patch_cols); + + for (Index c = startCol; c < max_cols; ++c) { + eigen_assert(k < peeled_k); + + const Index startRow = (c == startCol) ? rhs.rowOffset() : 0; + const Index max_rows = std::min( + Eigen::divup( + peeled_k - c * patch_rows * patch_planes * patch_depth, + patch_planes * patch_depth) + + startRow, + patch_rows); + + const bool pad_col0 = dm0.padCol(c); + const bool pad_col1 = dm1.padCol(c); + const bool pad_col2 = dm2.padCol(c); + const bool pad_col3 = dm3.padCol(c); + + for (Index r = startRow; r < max_rows; ++r) { + eigen_assert(k < peeled_k); + + const Index startPlane = + ((c == startCol) && (r == startRow)) ? rhs.planeOffset() : 0; + const Index max_planes = std::min( + Eigen::divup( + peeled_k - + c * patch_rows * patch_planes * patch_depth - // col + r * patch_planes * patch_depth, // row + patch_depth) + + startPlane, + patch_planes); + + const bool pad_row0 = dm0.padRow(r); + const bool pad_row1 = dm1.padRow(r); + const bool pad_row2 = dm2.padRow(r); + const bool pad_row3 = dm3.padRow(r); + + for (Index p = startPlane; p < max_planes; ++p) { + eigen_assert(k < peeled_k); + + const bool pad0 = pad_col0 || pad_row0 || dm0.padPlane(p); + const bool pad1 = pad_col1 || pad_row1 || dm1.padPlane(p); + const bool pad2 = pad_col2 || pad_row2 || dm2.padPlane(p); + const bool pad3 = pad_col3 || pad_row3 || dm3.padPlane(p); + + const Index idx0 = dm0.baseIndex(p, r, c); + const Index idx1 = dm1.baseIndex(p, r, c); + const Index idx2 = dm2.baseIndex(p, r, c); + const Index idx3 = dm3.baseIndex(p, r, c); + + const Index startDepth = + ((c == startCol) && (r == startRow) && (p == startPlane)) + ? rhs.depthOffset() + : 0; + const Index max_depth = std::min( + peeled_k - + c * patch_rows * patch_planes * patch_depth - // col + r * patch_planes * patch_depth - // row + p * patch_depth + // plane + startDepth, + patch_depth); + eigen_assert((max_depth - startDepth) % packet_size == 0); + + for (Index d = startDepth; d < max_depth; d += packet_size) { + eigen_assert(k < peeled_k); + PacketBlock kernel0; + PacketBlock kernel1; + kernel0.packet[0] = pad0 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx0); + kernel0.packet[1] = pad1 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx1); + kernel1.packet[0] = pad2 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx2); + kernel1.packet[1] = pad3 ? pset1(Scalar(0)) + : rhs.packetNoPadding(d, idx3); + ptranspose(kernel0); + ptranspose(kernel1); + pstoreu(block + 0 * packet_size, kernel0.packet[0]); + pstoreu(block + 1 * packet_size, kernel1.packet[0]); + pstoreu(block + 2 * packet_size, kernel0.packet[1]); + pstoreu(block + 3 * packet_size, kernel1.packet[1]); + block += 4 * packet_size; + k += packet_size; + } + } + } + } + + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel0; + PacketBlock kernel1; + kernel0.packet[0] = dm0.loadPacketFast(k); + kernel0.packet[1] = dm1.loadPacketFast(k); + kernel1.packet[0] = dm2.loadPacketFast(k); + kernel1.packet[1] = dm3.loadPacketFast(k); + ptranspose(kernel0); + ptranspose(kernel1); + pstoreu(block + 0 * packet_size, kernel0.packet[0]); + pstoreu(block + 1 * packet_size, kernel1.packet[0]); + pstoreu(block + 2 * packet_size, kernel0.packet[1]); + pstoreu(block + 3 * packet_size, kernel1.packet[1]); + block += 4 * packet_size; + } + } else { + for (; k < peeled_k; k += packet_size) { + PacketBlock kernel0; + PacketBlock kernel1; + kernel0.packet[0] = dm0.loadPacketStandard(k); + kernel0.packet[1] = dm1.loadPacketStandard(k); + kernel1.packet[0] = dm2.loadPacketStandard(k); + kernel1.packet[1] = dm3.loadPacketStandard(k); + ptranspose(kernel0); + ptranspose(kernel1); + pstoreu(block + 0 * packet_size, kernel0.packet[0]); + pstoreu(block + 1 * packet_size, kernel1.packet[0]); + pstoreu(block + 2 * packet_size, kernel0.packet[1]); + pstoreu(block + 3 * packet_size, kernel1.packet[1]); + block += 4 * packet_size; + } + } + } + if (!rhs.nonStandardPatches()) { + for (; k < depth; k++) { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } else { + for (; k < depth; k++) { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for (Index j2 = packet_cols4; j2 < cols; ++j2) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for (Index k = 0; k < depth; k++) { + *block = dm0(k); + block += 1; + } + } + } +}; + +// Special case for non-vectorized types such as float16 (packet_size = 1). +template +struct gemm_pack_rhs< + Scalar, Index, + TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, /*packet_size*/ 1, inner_dim_contiguous, + inner_dim_reordered, Alignment>, + nr, ColMajor, false, false> { + typedef TensorContractionSubMapper< + Scalar, Index, Rhs, + TensorEvaluator >, + Device>, + nocontract_t, contract_t, 1, inner_dim_contiguous, inner_dim_reordered, + Alignment> + SubMapper; + typedef SubMapper DataMapper; + + EIGEN_DEVICE_FUNC + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const Index packet_cols4 = (cols / 4) * 4; + + for (Index j2 = 0; j2 < packet_cols4; j2 += 4) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + if (!rhs.nonStandardPatches()) { + for (Index k = 0; k < depth; k++) { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } else { + for (Index k = 0; k < depth; k++) { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for (Index j2 = packet_cols4; j2 < cols; ++j2) { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for (Index k = 0; k < depth; k++) { + *block = dm0(k); + block += 1; + } + } + } +}; + +} // namespace internal + /** CuboidConvolution * \ingroup CXX11_NeuralNetworks_Module * -- cgit v1.2.3 From 29c3c08f23e14eaff1dbd7a3c66139314c045574 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 11 Sep 2018 11:47:14 -0700 Subject: Convert NumPy arrays to Tensors when they're arguments to a defun Previously they were counted in the cache key as if they were Tensors, but were not fed as placeholders, leading to stale values when the trace was reused. There is an 8%ish performance impact from the tuple comprehension on the defun no-signature-call microbenchmarks. I don't see a much faster way to do this without rewriting it in C, but I'm open to ideas. I've avoided re-packing the input tuple unless there's actually a numpy array, so this CL will slow down NumPy defun calls more (in addition to the convert_to_tensor overhead). After: entry { name: "MicroBenchmarks.benchmark_defun_with_signature" iters: 30000 wall_time: 134.219272931 extras { key: "examples_per_sec" value { double_value: 7450.49483699 } } } entry { name: "MicroBenchmarks.benchmark_defun_with_signature_and_kwargs" iters: 30000 wall_time: 142.88717111 extras { key: "examples_per_sec" value { double_value: 6998.52892485 } } } entry { name: "MicroBenchmarks.benchmark_defun_without_signature" iters: 30000 wall_time: 76.2096961339 extras { key: "examples_per_sec" value { double_value: 13121.6898994 } } } entry { name: "MicroBenchmarks.benchmark_defun_without_signature_and_with_kwargs" iters: 30000 wall_time: 81.8309704463 extras { key: "examples_per_sec" value { double_value: 12220.3121208 } } } Before: entry { name: "MicroBenchmarks.benchmark_defun_with_signature" iters: 30000 wall_time: 129.392266273 extras { key: "examples_per_sec" value { double_value: 7728.43716862 } } } entry { name: "MicroBenchmarks.benchmark_defun_with_signature_and_kwargs" iters: 30000 wall_time: 141.65956974 extras { key: "examples_per_sec" value { double_value: 7059.1771656 } } } entry { name: "MicroBenchmarks.benchmark_defun_without_signature" iters: 30000 wall_time: 70.6333637238 extras { key: "examples_per_sec" value { double_value: 14157.6154282 } } } entry { name: "MicroBenchmarks.benchmark_defun_without_signature_and_with_kwargs" iters: 30000 wall_time: 78.4090677897 extras { key: "examples_per_sec" value { double_value: 12753.6269489 } } } PiperOrigin-RevId: 212491803 --- tensorflow/python/eager/function.py | 21 +++++++++++++++++---- tensorflow/python/eager/function_test.py | 9 +++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 03f12139f6..8c30550708 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -34,6 +34,7 @@ from tensorflow.python.eager import execute from tensorflow.python.eager import tape from tensorflow.python.eager.graph_only_ops import graph_placeholder from tensorflow.python.framework import c_api_util +from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as pydev from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import ops @@ -879,9 +880,6 @@ def _encode_arg(arg): _TensorType(arg.values.dtype, arg.values._shape_tuple()), _TensorType(arg.indices.dtype, arg.indices._shape_tuple()), ]) - elif isinstance(arg, np.ndarray): - tensor = ops.convert_to_tensor(arg) - return _TensorType(tensor.dtype, tensor._shape_tuple()) # pylint: enable=protected-access elif isinstance(arg, (list, tuple)): return tuple([_encode_arg(elem) for elem in arg]) @@ -1089,6 +1087,17 @@ class PolymorphicFunction(object): # opposed to named arguments called in a keyword-like fashion. kwds.pop(arg) inputs = args + _deterministic_dict_values(arg_indices_to_values) + flat_inputs = nest.flatten(inputs) + + # Check for NumPy arrays in arguments and convert them to Tensors. + need_packing = False + for index, value in enumerate(flat_inputs): + if isinstance(value, np.ndarray): + flat_inputs[index] = constant_op.constant(value) + need_packing = True + if need_packing: + inputs = nest.pack_sequence_as(structure=inputs, + flat_sequence=flat_inputs) if self._input_signature is None: return inputs, kwds else: @@ -1098,7 +1107,6 @@ class PolymorphicFunction(object): except (ValueError, TypeError): raise ValueError("Structure of Python function inputs does not match " "input_signature.") - flat_inputs = nest.flatten(inputs) if any(not isinstance(arg, ops.Tensor) for arg in flat_inputs): raise ValueError("When input_signature is provided, all inputs to " "the Python function must be Tensors.") @@ -1271,6 +1279,11 @@ def defun(func=None, input_signature=None): tracing the execution of `f(*args, **kwargs)`; this graph is bound to an input signature inferred from `(*args, **kwargs)` and cached for future reuse. + NumPy arrays passed as inputs to `F` are converted to `tf.Tensor` objects + before being passed to `f`, and are treated as Tensors for caching. This + allows a function to be called multiple times with NumPy arrays having + different values but the same shape and dtype without re-tracing each time. + `tf.contrib.eager.defun` caches graphs for your convenience, letting you define TensorFlow functions without explicitly specifying their signatures. However, this policy is conservative and potentially expensive; for example, diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 92254a2c00..6507bc6d71 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -22,6 +22,8 @@ import functools from multiprocessing.pool import ThreadPool import sys +import numpy + from tensorflow.core.protobuf import config_pb2 from tensorflow.python.data.ops import iterator_ops from tensorflow.python.eager import backprop @@ -314,6 +316,7 @@ class FunctionTest(test.TestCase): def testDefunNumpyArraysConvertedToTensors(self): def f(x): + self.assertIsInstance(x, ops.Tensor) return x x = random_ops.random_uniform([2, 2]).numpy() @@ -327,6 +330,12 @@ class FunctionTest(test.TestCase): # shouldn't trigger another function definition. self.assertEqual(len(defined._function_cache), 1) + # Test that the numpy array is properly an argument to the graph function. + self.assertEqual(1., defined(numpy.ones([])).numpy()) + self.assertEqual(0., defined(numpy.zeros([])).numpy()) + self.assertEqual(1., defined(array_ops.ones([])).numpy()) + self.assertEqual(0., defined(array_ops.zeros([])).numpy()) + def testDefunCapturedInt32(self): x = constant_op.constant(1, dtype=dtypes.int32) -- cgit v1.2.3 From a9e73ddb3d40514af4144278f6450e5c1c806f8b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 11 Sep 2018 12:03:48 -0700 Subject: Make exhaustive_f32_elementwise_op_test build again and mark it as broken It was not running as part of TAP and there have been some regressions. Mark it as broken while we figure out what's going on to unblock b/114790989. PiperOrigin-RevId: 212494775 --- tensorflow/compiler/xla/tests/BUILD | 1 + .../compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index d0bda45cf8..30e3077edb 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -647,6 +647,7 @@ xla_test( ], shard_count = 48, tags = [ + "broken", "manual", "notap", ], diff --git a/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc b/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc index 738f2600d4..51b50d456e 100644 --- a/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc +++ b/tensorflow/compiler/xla/tests/exhaustive_f32_elementwise_op_test.cc @@ -45,22 +45,22 @@ class ExhaustiveF32ElementwiseOpTest i < known_incorrect_range.second) { // If the operation is known to be buggy on a specific input clamp that // input to 0 under the assumption that the op is at least correct on 0. - input_literal->Set({i - begin}, 0.0f); + input_literal.Set({i - begin}, 0.0f); } else { - input_literal->Set({i - begin}, tensorflow::bit_cast(i)); + input_literal.Set({i - begin}, tensorflow::bit_cast(i)); } } TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr input_data, - client_->TransferToServer(*input_literal)); + client_->TransferToServer(input_literal)); - auto input = Parameter(&builder, 0, input_literal->shape(), "input"); + auto input = Parameter(&builder, 0, input_literal.shape(), "input"); enqueue_op(&builder, input); std::vector expected_result; expected_result.reserve(input_size); for (int64 i = 0; i < input_size; i++) { - expected_result.push_back(evaluate_op(input_literal->Get({i}))); + expected_result.push_back(evaluate_op(input_literal.Get({i}))); } ComputeAndCompareR1(&builder, expected_result, {input_data.get()}, -- cgit v1.2.3 From 1025b0c68b819a7292b51e51bbf7badc8818f286 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Tue, 11 Sep 2018 12:18:34 -0700 Subject: disable failing test PiperOrigin-RevId: 212497382 --- tensorflow/contrib/distributions/BUILD | 1 + 1 file changed, 1 insertion(+) (limited to 'tensorflow') diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 97c53ae2b9..9aadc634da 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -166,6 +166,7 @@ cuda_py_test( "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform_test", ], + tags = ["notap"], ) cuda_py_test( -- cgit v1.2.3 From dad6912b530c92b2f362f1cc2a83006a22f604b6 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 11 Sep 2018 13:12:21 -0700 Subject: Handle model deserialization when output tensor shape is NULL. In flatbuffers, vectors default to NULL. Original change by alanchiao@. PiperOrigin-RevId: 212506392 --- tensorflow/contrib/lite/model.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tensorflow') diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 241865b3d8..6311d60b91 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -177,6 +177,11 @@ TfLiteStatus InterpreterBuilder::BuildLocalIndexToRegistrationMapping() { namespace { template std::vector FlatBufferIntArrayToVector(T* flat_array) { + // Initialize shape of tensors with null shape. Empty vectors are converted + // to nullptr for models that are constructed via flatbuffers::Pack. + if (flat_array == nullptr) { + return {}; + } std::vector ret(flat_array->Length()); for (int i = 0; i < flat_array->Length(); i++) { ret[i] = flat_array->Get(i); -- cgit v1.2.3 From 418c7258687166fc79a04f5a8c903c782a8ad295 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 11 Sep 2018 13:12:57 -0700 Subject: Optimize Spatial&Cuboid backward kernel convolutions. Without shuffle TensorExecutor uses optimized (specialized) gemm_pack_rhs to pack memory before contraction. Custom rhs packer is much faster than contracting by inner dimension with default packer. 1. CuboidConvolutionBwdKernel: ~10x-25x speedup 2. SpatialConvolutionBwdKernel: ~2x-10x speedup PiperOrigin-RevId: 212506483 --- .../kernels/eigen_backward_cuboid_convolutions.h | 44 ++++++++++------------ .../kernels/eigen_backward_spatial_convolutions.h | 41 +++++++++----------- 2 files changed, 38 insertions(+), 47 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h b/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h index 27918b410b..f12c8d943d 100644 --- a/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h +++ b/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h @@ -239,8 +239,8 @@ CuboidConvolutionBackwardInput( } } - // We will contract along the fused dimension that contains the kernelFilters, - // kernelPlanes, kernelRows and kernelCols. + // We will contract along the collapsed dimension that contains the + // kernelFilters, kernelPlanes, kernelRows and kernelCols. array, 1> contract_dims; if (isColMajor) { // col-major: kernel.contract(output.patches) @@ -331,24 +331,18 @@ EIGEN_ALWAYS_INLINE static const typename internal::conditional< const TensorReshapingOp< const DSizes::Index, 2>, const OutputBackward>, - const TensorShufflingOp< - const array::Index, - 2>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorVolumePatchOp > > > >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorVolumePatchOp > > >, TensorReshapingOp< const DSizes::Index, 5>, const TensorContractionOp< const array::Index>, 1>, - const TensorShufflingOp< - const array::Index, - 2>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorVolumePatchOp > >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorVolumePatchOp >, const TensorReshapingOp< const DSizes::Index, 2>, const OutputBackward> > > >::type @@ -458,12 +452,16 @@ CuboidConvolutionBackwardKernel( eigen_assert(output_dims[0] == pre_contract_dims[0]); } - array shuffle_dims; - shuffle_dims[0] = 1; - shuffle_dims[1] = 0; - + // We will contract along the collapsed dimension that contains the + // outputCols, outputRows, outputPlanes and OTHERS. array, 1> contract_dims; - contract_dims[0] = IndexPair(1, 0); + if (isColMajor) { + // col-major: output_backward.contract(input.patches) + contract_dims[0] = IndexPair(1, 1); + } else { + // row-major: input.patches.contract(output_backward) + contract_dims[0] = IndexPair(0, 0); + } DSizes kernel_dims; if (isColMajor) { @@ -489,8 +487,7 @@ CuboidConvolutionBackwardKernel( strideRows, strideCols, 1, 1, 1, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right) - .reshape(pre_contract_dims) - .shuffle(shuffle_dims), + .reshape(pre_contract_dims), contract_dims) .reshape(kernel_dims), input @@ -499,7 +496,6 @@ CuboidConvolutionBackwardKernel( padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right) .reshape(pre_contract_dims) - .shuffle(shuffle_dims) .contract(output_backward.reshape(output_dims), contract_dims) .reshape(kernel_dims)); } diff --git a/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h b/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h index 8d06107553..960920c55b 100644 --- a/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h +++ b/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h @@ -238,8 +238,8 @@ SpatialConvolutionBackwardInput( } } - // We will contract along the fused dimension that contains the kernelFilters, - // the kernelRows and the kernelCols. + // We will contract along the collapsed dimension that contains the + // kernelFilters, the kernelRows and the kernelCols. array, 1> contract_dims; if (isColMajor) { // col-major: kernel.contract(output.patches) @@ -332,23 +332,16 @@ EIGEN_ALWAYS_INLINE static const typename internal::conditional< const TensorReshapingOp< const DSizes::Index, 2>, const OutputBackward>, - const TensorShufflingOp< - const array::Index, - 2>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorImagePatchOp > > > >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp > > >, TensorReshapingOp< const DSizes::Index, 4>, const TensorContractionOp< const array::Index>, 1>, - const TensorShufflingOp< - const array::Index, - 2>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorImagePatchOp > >, + const TensorReshapingOp< + const DSizes::Index, 2>, + const TensorImagePatchOp >, const TensorReshapingOp< const DSizes::Index, 2>, const OutputBackward> > > >::type @@ -456,12 +449,16 @@ SpatialConvolutionBackwardKernel( eigen_assert(output_dims[0] == pre_contract_dims[0]); } - array shuffle_dims; - shuffle_dims[0] = 1; - shuffle_dims[1] = 0; - + // We will contract along the collapsed dimension that contains the + // outputCols, outputRows and OTHERS. array, 1> contract_dims; - contract_dims[0] = IndexPair(1, 0); + if (isColMajor) { + // col-major: output_backward.contract(input.patches) + contract_dims[0] = IndexPair(1, 1); + } else { + // row-major: input.patches.contract(output_backward) + contract_dims[0] = IndexPair(0, 0); + } // After the contraction, the kernel will have the desired shape // out_depth X in_shape X kernel_rows X kernel_cols @@ -487,8 +484,7 @@ SpatialConvolutionBackwardKernel( kernelRows, kernelCols, row_stride, col_stride, row_in_stride, col_in_stride, 1, 1, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) - .reshape(pre_contract_dims) - .shuffle(shuffle_dims), + .reshape(pre_contract_dims), contract_dims) .reshape(kernel_dims), input @@ -497,7 +493,6 @@ SpatialConvolutionBackwardKernel( padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)) .reshape(pre_contract_dims) - .shuffle(shuffle_dims) .contract(output_backward.reshape(output_dims), contract_dims) .reshape(kernel_dims)); } -- cgit v1.2.3 From da99f7ca018d4916447d7b984d9d65be1a9615a8 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 11 Sep 2018 13:46:29 -0700 Subject: Make control_flow_ops._ENABLE_COND_V2 public. Note this is not part of the official public API, but we do allow other modules to modify this value (e.g. in tests). PiperOrigin-RevId: 212512883 --- tensorflow/python/framework/test_util.py | 10 ++- .../kernel_tests/control_flow_ops_py_test.py | 72 +++++++++++----------- tensorflow/python/ops/control_flow_ops.py | 4 +- 3 files changed, 42 insertions(+), 44 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index b33cc8f544..6a2c897f3f 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -413,15 +413,13 @@ def enable_cond_v2(fn): The wrapped function """ - # pylint: disable=protected-access def wrapper(*args, **kwargs): - prev_value = control_flow_ops._ENABLE_COND_V2 - control_flow_ops._ENABLE_COND_V2 = True + prev_value = control_flow_ops.ENABLE_COND_V2 + control_flow_ops.ENABLE_COND_V2 = True try: fn(*args, **kwargs) finally: - control_flow_ops._ENABLE_COND_V2 = prev_value - # pylint: enable=protected-access + control_flow_ops.ENABLE_COND_V2 = prev_value return wrapper @@ -438,7 +436,7 @@ def with_cond_v2(cls): Returns: cls with new test methods added """ - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return cls for name, value in cls.__dict__.copy().items(): diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index eac97af4ed..bdf7e0e4a0 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -333,7 +333,7 @@ class ControlFlowTest(test.TestCase): res.eval(feed_dict={data: 1.0}) def testCondBool(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113296297") values = constant_op.constant(10) @@ -384,7 +384,7 @@ class ControlFlowTest(test.TestCase): sess.run(r, feed_dict={t: 3}) def testCondIndexedSlices(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113296180") with self.test_session(): @@ -402,7 +402,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(0, ind) def testCondSparseTensor(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113296161 (SparseTensors)") with self.test_session(): @@ -422,7 +422,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(r.values.get_shape(), (2,)) def testCondResource(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -438,7 +438,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(1.0, control_flow_ops.cond(rv, case, lambda: t).eval()) def testCondIndexedSlicesDifferentTypes(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113293074") with self.test_session(): @@ -484,14 +484,14 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(11, result) def testCond_1(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") self._testCond_1(use_gpu=False) self._testCond_1(use_gpu=True) def testCond_2(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -503,7 +503,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(9, result) def testCond_3(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -518,7 +518,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(12, result) def testCond_4(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113324949 (ref vars)") with self.test_session(): @@ -556,7 +556,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(4, count.eval()) def testCond_6(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -583,7 +583,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual([11, 12], sess.run(r)) def testCondRef(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -599,7 +599,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual([2.0], r.eval()) def testCondWithControl(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/79881896") with self.test_session() as sess: @@ -641,7 +641,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual([1.0], sess.run(merged_op.output)) def testCondSwitchIdentity(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/112477618 (Operation returned from cond)") # Make sure the recv identity is not removed by optimization. @@ -658,7 +658,7 @@ class ControlFlowTest(test.TestCase): sess.run(r) def testCondRecvIdentity(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/112477618 (Operation returned from cond)") # Make sure the switch identity is not removed by optimization. @@ -677,7 +677,7 @@ class ControlFlowTest(test.TestCase): sess.run(r) def testCondGrad_1(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113346829 (gpu failure)") graph = ops.Graph() @@ -706,7 +706,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(3.0, grad.eval(feed_dict={c: 3})) def testCondGrad_3(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/110550782 (gradient w.r.t external variable)") with self.test_session(): @@ -741,7 +741,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(1.0, result.eval()) def testCondGrad_Gather(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113327884") with self.test_session() as sess: @@ -916,7 +916,7 @@ class ControlFlowTest(test.TestCase): _ = gradients_impl.gradients(loop_with_maxiter, v) def testInvalidMaximumIterationsFromSiblingContextWhileLoopInXLAContext(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294340 (enable while_v2)") v = constant_op.constant(1.0) @@ -1375,7 +1375,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(10, sess.run(r, {b: True})) def testWhileCondWithControl(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") # Ensure that no control edges by an outer control dependency context are @@ -1392,7 +1392,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(0, sess.run(loop)) def testWhileCondWithControl_1(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113324949 (ref vars)") with self.test_session(): @@ -1417,7 +1417,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(65536.0, v.eval()) def testWhileCondExitControl(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294340 (enable while_v2)") with self.test_session(): @@ -1443,7 +1443,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(99, v.eval()) def testCondWhile_1(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -1456,7 +1456,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, r.eval()) def testCondWhile_2(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -1469,7 +1469,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, r.eval()) def _testCondWhile_3(self, use_gpu): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294340 (enable while_v2)") with self.test_session(use_gpu=use_gpu) as sess: @@ -1498,7 +1498,7 @@ class ControlFlowTest(test.TestCase): self._testCondWhile_3(use_gpu=True) def testWhileCond_1(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") with self.test_session(): @@ -1516,7 +1516,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, r.eval()) def testWhileCond_2(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") with self.test_session(): @@ -1527,7 +1527,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(10, r.eval()) def testWhileCond_3(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") with self.test_session(): @@ -1872,7 +1872,7 @@ class ControlFlowTest(test.TestCase): self._testWhileGrad_Mul(use_gpu=True, p_iters=10) def _testNestedWhileCondWhileGrad(self, use_gpu): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") with self.test_session(use_gpu=use_gpu): @@ -1913,7 +1913,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(216.0, r[0].eval()) def testWhileGradInCond(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/110550782 (gradient w.r.t external variable)") with self.test_session(): @@ -1964,7 +1964,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0})) def testCondGradInNestedWhiles(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113346829 (gpu failure)") def outer_body(i, x): @@ -2280,7 +2280,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(1024.0, r.eval()) def testWhileCondGrad_Simple(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113294377 (unknown shape)") self._testWhileCondGrad_Simple(use_gpu=False) @@ -2633,7 +2633,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual(5.0, result.eval()) def testOneValueCond(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -2651,7 +2651,7 @@ class ControlFlowTest(test.TestCase): self.assertEqual([2], i.eval(feed_dict={c: 0})) def testExampleCond(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/111124878 (don't return tuple)") with self.test_session(): @@ -2669,7 +2669,7 @@ class ControlFlowTest(test.TestCase): self.assertAllClose(2.0 * math.sqrt(2), i.eval(feed_dict={d: 2})) def testCase(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/112477618 (Operation returned from cond)") with self.test_session(): @@ -2724,7 +2724,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(r6.eval(), 0) def testCaseSideEffects(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/112477618 (Operation returned from cond)") with self.test_session() as sess: @@ -2762,7 +2762,7 @@ class ControlFlowTest(test.TestCase): self.assertAllEqual(sess.run([v0, v1, v2]), [0, -1, -1]) def testOneOpCond(self): - if control_flow_ops._ENABLE_COND_V2: + if control_flow_ops.ENABLE_COND_V2: return unittest.skip("b/113324949 (ref vars)") with self.test_session(): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index e3c1aa3d5a..3c915b055a 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -61,7 +61,7 @@ from tensorflow.python.util import tf_should_use from tensorflow.python.util.tf_export import tf_export -_ENABLE_COND_V2 = os.getenv("TF_ENABLE_COND_V2", "0") != "0" +ENABLE_COND_V2 = os.getenv("TF_ENABLE_COND_V2", "0") != "0" # We override the 'tuple' for a control flow op, so we keep python's @@ -2026,7 +2026,7 @@ def cond(pred, ``` """ - if _ENABLE_COND_V2: + if ENABLE_COND_V2: return cond_v2_impl.cond_v2(pred, true_fn, false_fn, name) # We needed to make true_fn/false_fn keyword arguments for -- cgit v1.2.3 From 2832a4f9e125c00b64614880fb08376ee03fa2da Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Tue, 11 Sep 2018 14:04:27 -0700 Subject: Use Eigen::CuboidConvolutionBackwardInput in Conv3DBackpropInput. Instead of multiple primitive Eigen ops in Conv3DBackpropInput, call directly into the ex-NeuralNetworks module's function CuboidConvolutionBackwardInput. Modest ~10% latency improvement and ~15-20% peak memory reduction. PiperOrigin-RevId: 212516586 --- tensorflow/core/kernels/conv_3d.h | 22 ++++++++++++ tensorflow/core/kernels/conv_grad_ops_3d.cc | 53 +++++++---------------------- 2 files changed, 35 insertions(+), 40 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/conv_3d.h b/tensorflow/core/kernels/conv_3d.h index 02e3655ad1..e5054e062e 100644 --- a/tensorflow/core/kernels/conv_3d.h +++ b/tensorflow/core/kernels/conv_3d.h @@ -19,6 +19,7 @@ limitations under the License. #define TENSORFLOW_CORE_KERNELS_CONV_3D_H_ #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h" #include "tensorflow/core/kernels/eigen_cuboid_convolution.h" namespace tensorflow { @@ -28,6 +29,10 @@ namespace functor { template struct CuboidConvolution; +// Backward input pass for the cuboid convolution. +template +struct CuboidConvolutionBackwardInput; + typedef Eigen::ThreadPoolDevice CPUDevice; template @@ -42,6 +47,23 @@ struct CuboidConvolution { } }; +template +struct CuboidConvolutionBackwardInput { + void operator()(const CPUDevice& d, + typename TTypes::Tensor input_backward, + typename TTypes::ConstTensor filter, + typename TTypes::ConstTensor output_backward, + int stride_planes, int stride_rows, int stride_cols) { + // Need to swap the order of plane/row/col strides when calling Eigen. + input_backward.device(d) = Eigen::CuboidConvolutionBackwardInput( + filter, output_backward, + input_backward.dimension(3), // input_planes + input_backward.dimension(2), // input_rows + input_backward.dimension(1), // input_cols + stride_cols, stride_rows, stride_planes); + } +}; + } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc index 15f1bf9aba..ec7c02ac2b 100644 --- a/tensorflow/core/kernels/conv_grad_ops_3d.cc +++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc @@ -201,50 +201,23 @@ class Conv3DBackpropInputOp : public OpKernel { input_shape = context->input(0).shape(); } EXTRACT_AND_VERIFY_DIMENSIONS("Conv3DBackpropInput"); - Eigen::array, 5> pad_dims{ - {0, 0}, - {top_pad_planes, bottom_pad_planes}, - {top_pad_rows, bottom_pad_rows}, - {left_pad_cols, right_pad_cols}, - {0, 0}}; + Tensor* in_backprop; OP_REQUIRES_OK(context, context->allocate_output(0, input_shape, &in_backprop)); - // Fill out a padded out_backprop. - TensorShape padded_out_shape({batch, padded_out_planes, padded_out_rows, - padded_out_cols, out_depth}); - Tensor padded_output; - OP_REQUIRES_OK(context, - context->allocate_temp(DataTypeToEnum::v(), - padded_out_shape, &padded_output)); - Eigen::DSizes no_op_shuffle{0, 1, 2, 3, 4}; - Eigen::DSizes eigen_strides{1, strides[0], strides[1], - strides[2], 1}; - functor::InflatePadAndShuffle()( - context->eigen_device(), out_backprop.tensor(), - eigen_strides, pad_dims, no_op_shuffle, padded_output.tensor()); - const Tensor& padded_output_cref = padded_output; - - // Fill a new "reverted" filter. We need to transpose the in_depth and - // out_depth for the filter and reverse the planes, rows and cols. - TensorShape r_filter_shape( - {filter_size[0], filter_size[1], filter_size[2], out_depth, in_depth}); - Tensor r_filter; - OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), - r_filter_shape, &r_filter)); - Eigen::DSizes filter_order{0, 1, 2, 4, 3}; - Eigen::array filter_rev_dims{true, true, true, false, false}; - functor::ShuffleAndReverse()( - context->eigen_device(), filter.tensor(), filter_order, - filter_rev_dims, r_filter.tensor()); - const Tensor& r_filter_cref = r_filter; - - // Now we can call conv_3d directly. - functor::CuboidConvolution()( - context->eigen_device(), in_backprop->tensor(), - padded_output_cref.tensor(), r_filter_cref.tensor(), 1, 1, - 1, BrainPadding2EigenPadding(VALID)); + // There is no need to explicitly compute padding values (and pad + // out_backprop), because Eigen uses the same padding inference mechanism as + // Tensorflow. + functor::CuboidConvolutionBackwardInput()( + context->eigen_device(), + in_backprop->tensor(), // input_backward + filter.tensor(), // filter + out_backprop.tensor(), // output_backward + // Order of strides will be reversed before passing to Eigen. + static_cast(strides[0]), // stride_planes + static_cast(strides[1]), // stride_rows + static_cast(strides[2])); // stride_cols } private: -- cgit v1.2.3 From b40ab8d8a024bb934f25ebc3f5260b64c5816ef5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Sep 2018 14:05:59 -0700 Subject: Adds generator support directly to Keras's fit, evaluate, and predict. PiperOrigin-RevId: 212516939 --- tensorflow/python/keras/engine/training.py | 146 +++++++++++++++++---- tensorflow/python/keras/engine/training_test.py | 51 +++++++ tensorflow/python/keras/engine/training_utils.py | 12 ++ tensorflow/python/keras/utils/data_utils.py | 8 +- tensorflow/python/util/tf_inspect.py | 5 + .../api/golden/v1/tensorflow.keras.-model.pbtxt | 6 +- .../golden/v1/tensorflow.keras.-sequential.pbtxt | 6 +- .../golden/v1/tensorflow.keras.models.-model.pbtxt | 6 +- .../v1/tensorflow.keras.models.-sequential.pbtxt | 6 +- .../api/golden/v2/tensorflow.keras.-model.pbtxt | 6 +- .../golden/v2/tensorflow.keras.-sequential.pbtxt | 6 +- .../golden/v2/tensorflow.keras.models.-model.pbtxt | 6 +- .../v2/tensorflow.keras.models.-sequential.pbtxt | 6 +- 13 files changed, 223 insertions(+), 47 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 49b25e307e..c6749468c8 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -41,6 +41,7 @@ from tensorflow.python.keras.engine import training_eager from tensorflow.python.keras.engine import training_generator from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.engine.network import Network +from tensorflow.python.keras.utils import data_utils from tensorflow.python.keras.utils.generic_utils import slice_arrays from tensorflow.python.ops import math_ops from tensorflow.python.ops import weights_broadcast_ops @@ -1338,6 +1339,9 @@ class Model(Network): initial_epoch=0, steps_per_epoch=None, validation_steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, **kwargs): """Trains the model for a fixed number of epochs (iterations on a dataset). @@ -1350,19 +1354,23 @@ class Model(Network): - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - A `tf.data` dataset or a dataset iterator. Should return a tuple - of either (inputs, targets) or (inputs, targets, sample_weights). + of either `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + - A generator or `keras.utils.Sequence` returning `(inputs, targets)` + or `(inputs, targets, sample weights)`. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and - tensor targets, or inversely). If `x` is a dataset or dataset - iterator, `y` should not be specified - (since targets will be obtained from the iterator). + tensor targets, or inversely). If `x` is a dataset, dataset + iterator, generator, or `keras.utils.Sequence` instance, `y` should + not be specified (since targets will be obtained from `x`). batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` if your data is in the - form of symbolic tensors, datasets, or dataset iterators - (since they generate batches). + form of symbolic tensors, dataset, dataset iterators, + generators, or `keras.utils.Sequence` instances (since they generate + batches). epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. @@ -1384,7 +1392,8 @@ class Model(Network): on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is - not supported when `x` is a dataset or a dataset iterator. + not supported when `x` is a dataset, dataset iterator, generator or + `keras.utils.Sequence` instance. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. @@ -1415,8 +1424,9 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. This argument is not - supported when `x` is a dataset or a dataset iterator, instead - provide the sample_weights as the third element of `x`. + supported when `x` is a dataset, dataset iterator, generator, or + `keras.utils.Sequence` instance, instead provide the sample_weights + as the third element of `x`. initial_epoch: Integer. Epoch at which to start training (useful for resuming a previous training run). @@ -1430,6 +1440,20 @@ class Model(Network): validation_steps: Only relevant if `steps_per_epoch` is specified. Total number of steps (batches of samples) to validate before stopping. + max_queue_size: Integer. Used for generator or `keras.utils.Sequence` + input only. Maximum size for the generator queue. + If unspecified, `max_queue_size` will default to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up + when using process-based threading. If unspecified, `workers` + will default to 1. If 0, will execute the generator on the main + thread. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-picklable arguments to + the generator as they can't be passed easily to children processes. **kwargs: Used for backwards compatibility. Returns: @@ -1446,6 +1470,23 @@ class Model(Network): # TODO(fchollet): this method may be creating reference cycles, which would # lead to accumulating garbage in memory when called in a loop. Investigate. + if data_utils.is_generator_or_sequence(x): + training_utils.check_generator_arguments(y, sample_weight) + return self.fit_generator( + x, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch) + # Backwards compatibility if batch_size is None and steps_per_epoch is None: batch_size = 32 @@ -1588,7 +1629,10 @@ class Model(Network): batch_size=None, verbose=1, sample_weight=None, - steps=None): + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False): """Returns the loss value & metrics values for the model in test mode. Computation is done in batches. @@ -1602,18 +1646,21 @@ class Model(Network): - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. - A `tf.data` dataset or a dataset iterator. + - A generator or `keras.utils.Sequence` instance. y: Target data. Like the input data `x`, it could be either Numpy array(s) or TensorFlow tensor(s). It should be consistent with `x` (you cannot have Numpy inputs and tensor targets, or inversely). - If `x` is a dataset or a dataset iterator, `y` should not be specified - (since targets will be obtained from the iterator/dataset). + If `x` is a dataset, dataset iterator, generator or + `keras.utils.Sequence` instance, `y` should not be specified (since + targets will be obtained from the iterator/dataset). batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` is your data is in the - form of symbolic tensors, datasets, or dataset iterators - (since they generate batches). + form of symbolic tensors, dataset, dataset iterators, + generators, or `keras.utils.Sequence` instances (since they generate + batches). verbose: 0 or 1. Verbosity mode. 0 = silent, 1 = progress bar. sample_weight: Optional Numpy array of weights for @@ -1627,11 +1674,25 @@ class Model(Network): to apply a different weight to every timestep of every sample. In this case you should make sure to specify `sample_weight_mode="temporal"` in `compile()`. This argument is not - supported when `x` is a dataset or a dataset iterator. + supported when `x` is a dataset or a dataset iterator, instead pass + sample weights as the third element of `x`. steps: Integer or `None`. Total number of steps (batches of samples) before declaring the evaluation round finished. Ignored with the default value of `None`. + max_queue_size: Integer. Used for generator or `keras.utils.Sequence` + input only. Maximum size for the generator queue. + If unspecified, `max_queue_size` will default to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default + to 1. If 0, will execute the generator on the main thread. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-picklable arguments to + the generator as they can't be passed easily to children processes. Returns: Scalar test loss (if the model has a single output and no metrics) @@ -1642,6 +1703,16 @@ class Model(Network): Raises: ValueError: in case of invalid arguments. """ + if data_utils.is_generator_or_sequence(x): + training_utils.check_generator_arguments(y, sample_weight) + return self.evaluate_generator( + x, + steps=steps, + verbose=verbose, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing) + # Backwards compatibility. if batch_size is None and steps is None: batch_size = 32 @@ -1688,7 +1759,14 @@ class Model(Network): verbose=verbose, steps=steps) - def predict(self, x, batch_size=None, verbose=0, steps=None): + def predict(self, + x, + batch_size=None, + verbose=0, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False): """Generates output predictions for the input samples. Computation is done in batches. @@ -1700,16 +1778,32 @@ class Model(Network): - A TensorFlow tensor, or a list of tensors (in case the model has multiple inputs). - A `tf.data` dataset or a dataset iterator. + - A generator or `keras.utils.Sequence` instance. batch_size: Integer or `None`. Number of samples per gradient update. If unspecified, `batch_size` will default to 32. Do not specify the `batch_size` is your data is in the - form of symbolic tensors, dataset, or dataset iterators - (since they generate batches). + form of symbolic tensors, dataset, dataset iterators, + generators, or `keras.utils.Sequence` instances (since they generate + batches). verbose: Verbosity mode, 0 or 1. steps: Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of `None`. + max_queue_size: Integer. Used for generator or `keras.utils.Sequence` + input only. Maximum size for the generator queue. + If unspecified, `max_queue_size` will default to 10. + workers: Integer. Used for generator or `keras.utils.Sequence` input + only. Maximum number of processes to spin up when using + process-based threading. If unspecified, `workers` will default + to 1. If 0, will execute the generator on the main thread. + use_multiprocessing: Boolean. Used for generator or + `keras.utils.Sequence` input only. If `True`, use process-based + threading. If unspecified, `use_multiprocessing` will default to + `False`. Note that because this implementation relies on + multiprocessing, you should not pass non-picklable arguments to + the generator as they can't be passed easily to children processes. + Returns: Numpy array(s) of predictions. @@ -1720,6 +1814,15 @@ class Model(Network): or in case a stateful model receives a number of samples that is not a multiple of the batch size. """ + if data_utils.is_generator_or_sequence(x): + return self.predict_generator( + x, + steps=steps, + verbose=verbose, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing) + # Backwards compatibility. if batch_size is None and steps is None: batch_size = 32 @@ -2071,7 +2174,7 @@ class Model(Network): Arguments: generator: Generator yielding tuples (inputs, targets) or (inputs, targets, sample_weights) - or an instance of Sequence (keras.utils.Sequence) + or an instance of `keras.utils.Sequence` object in order to avoid duplicate data when using multiprocessing. steps: Total number of steps (batches of samples) @@ -2135,9 +2238,8 @@ class Model(Network): Arguments: generator: Generator yielding batches of input samples - or an instance of Sequence (keras.utils.Sequence) - object in order to avoid duplicate data - when using multiprocessing. + or an instance of `keras.utils.Sequence` object in order to + avoid duplicate data when using multiprocessing. steps: Total number of steps (batches of samples) to yield from `generator` before stopping. Optional for `Sequence`: if unspecified, will use diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 8938333b1a..380130095b 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1322,6 +1322,57 @@ class TestGeneratorMethods(test.TestCase): workers=0, use_multiprocessing=False) + @tf_test_util.run_in_graph_and_eager_modes + def test_generator_input_to_fit_eval_predict(self): + val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + def custom_generator(): + while True: + yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + inputs = keras.layers.Input(shape=(10,)) + x = keras.layers.Dense(10, activation='relu')(inputs) + outputs = keras.layers.Dense(1, activation='sigmoid')(x) + model = keras.Model(inputs, outputs) + + model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy') + model.fit( + custom_generator(), + steps_per_epoch=2, + validation_data=val_data, + epochs=2) + model.evaluate(custom_generator(), steps=2) + model.predict(custom_generator(), steps=2) + + @tf_test_util.run_in_graph_and_eager_modes + def test_sequence_input_to_fit_eval_predict(self): + val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + class CustomSequence(keras.utils.Sequence): + + def __getitem__(self, idx): + return np.ones([10, 10], np.float32), np.ones([10, 1], np.float32) + + def __len__(self): + return 2 + + inputs = keras.layers.Input(shape=(10,)) + x = keras.layers.Dense(10, activation='relu')(inputs) + outputs = keras.layers.Dense(1, activation='sigmoid')(x) + model = keras.Model(inputs, outputs) + + model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy') + model.fit(CustomSequence(), validation_data=val_data, epochs=2) + model.evaluate(CustomSequence()) + model.predict(CustomSequence()) + + with self.assertRaisesRegexp(ValueError, '`y` argument is not supported'): + model.fit(CustomSequence(), y=np.ones([10, 1])) + + with self.assertRaisesRegexp(ValueError, + '`sample_weight` argument is not supported'): + model.fit(CustomSequence(), sample_weight=np.ones([10, 1])) + class TestTrainingUtils(test.TestCase): diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 898e9223cb..8e9fab81d6 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -797,6 +797,18 @@ def validate_iterator_input(x, y, sample_weight, validation_split=None): 'Received: x=%s, validation_split=%f' % (x, validation_split)) +def check_generator_arguments(y=None, sample_weight=None): + """Validates arguments passed when using a generator.""" + if y is not None: + raise ValueError('`y` argument is not supported when data is' + 'a generator or Sequence instance. Instead pass targets' + ' as the second element of the generator.') + if sample_weight is not None: + raise ValueError('`sample_weight` argument is not supported when data is' + 'a generator or Sequence instance. Instead pass sample' + ' weights as the third element of the generator.') + + def check_steps_argument(input_data, steps, steps_name): """Validates `steps` argument based on input data's type. diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py index d93a7b6afc..b736daa46d 100644 --- a/tensorflow/python/keras/utils/data_utils.py +++ b/tensorflow/python/keras/utils/data_utils.py @@ -40,6 +40,7 @@ from six.moves.urllib.error import URLError from six.moves.urllib.request import urlopen from tensorflow.python.keras.utils.generic_utils import Progbar +from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export @@ -93,6 +94,11 @@ else: from six.moves.urllib.request import urlretrieve +def is_generator_or_sequence(x): + """Check if `x` is a Keras generator type.""" + return tf_inspect.isgenerator(x) or isinstance(x, Sequence) + + def _extract_archive(file_path, path='.', archive_format='auto'): """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. @@ -551,7 +557,7 @@ class OrderedEnqueuer(SequenceEnqueuer): self.executor_fn = lambda seqs: multiprocessing.Pool( # pylint: disable=g-long-lambda workers, initializer=init_pool, initargs=(seqs,)) else: - # We do not need the init since it's threads. + # We do not need the init since it's threads. self.executor_fn = lambda _: ThreadPool(workers) self.workers = workers self.queue = queue.Queue(max_queue_size) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 778121e15b..967c872c2a 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -325,6 +325,11 @@ def isfunction(object): # pylint: disable=redefined-builtin return _inspect.isfunction(tf_decorator.unwrap(object)[1]) +def isgenerator(object): # pylint: disable=redefined-builtin + """TFDecorator-aware replacement for inspect.isgenerator.""" + return _inspect.isgenerator(tf_decorator.unwrap(object)[1]) + + def ismethod(object): # pylint: disable=redefined-builtin """TFDecorator-aware replacement for inspect.ismethod.""" return _inspect.ismethod(tf_decorator.unwrap(object)[1]) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt index d843194ef0..0869de0243 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt @@ -151,7 +151,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -159,7 +159,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -219,7 +219,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_generator" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt index b8e9baca71..20f39fae1e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt @@ -156,7 +156,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -164,7 +164,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -228,7 +228,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_classes" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt index 472b9818df..4011719317 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt @@ -151,7 +151,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -159,7 +159,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -219,7 +219,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_generator" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt index 937516eff1..8a12ac1ad8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt @@ -156,7 +156,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -164,7 +164,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -228,7 +228,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_classes" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt index d843194ef0..0869de0243 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt @@ -151,7 +151,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -159,7 +159,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -219,7 +219,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_generator" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt index b8e9baca71..20f39fae1e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt @@ -156,7 +156,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -164,7 +164,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -228,7 +228,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_classes" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt index 472b9818df..4011719317 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt @@ -151,7 +151,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -159,7 +159,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -219,7 +219,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_generator" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt index 937516eff1..8a12ac1ad8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt @@ -156,7 +156,7 @@ tf_class { } member_method { name: "evaluate" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "evaluate_generator" @@ -164,7 +164,7 @@ tf_class { } member_method { name: "fit" - argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'epochs\', \'verbose\', \'callbacks\', \'validation_split\', \'validation_data\', \'shuffle\', \'class_weight\', \'sample_weight\', \'initial_epoch\', \'steps_per_epoch\', \'validation_steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'1\', \'1\', \'None\', \'0.0\', \'None\', \'True\', \'None\', \'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "fit_generator" @@ -228,7 +228,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\'], " + argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], " } member_method { name: "predict_classes" -- cgit v1.2.3 From 72410969ca8dd7f1be48672c6cb943940edb9f31 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 11 Sep 2018 14:10:31 -0700 Subject: Update defun to support extra params as function attributes. PiperOrigin-RevId: 212517784 --- tensorflow/python/eager/function.py | 79 ++++++++++++++++++++++++++++++-- tensorflow/python/eager/function_test.py | 61 ++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 4 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 8c30550708..348bf4650f 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -27,6 +27,7 @@ import threading import numpy as np import six +from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import function_pb2 from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context @@ -60,6 +61,10 @@ cond_v2_impl._function = sys.modules[__name__] # pylint: disable=protected-acce gradients_impl._function = sys.modules[__name__] # pylint: disable=protected-access +# TODO(scottzhu): Update this to allow arbitrary attribute names in future. +WHITELIST_FUNCTION_ATTRIBUTE_PREFIX = "experimental_" + + def _create_substitute_placeholder(value, name, dtype=None): """Creates a placeholder for `value` and propagates shape info to it.""" # Note: setting ops.control_dependencies(None) ensures we always put @@ -100,6 +105,44 @@ def _get_device_functions(ctx, graph): return tuple(graph._device_functions_outer_to_inner) # pylint: disable=protected-access +def _parse_func_attrs(attributes): + """Convert the keyword arguments into function_def attributes. + + Currently only support primitive types: bool, int, float and string. + + Args: + attributes: the dictionary of attributes. + Returns: + A dict of attributes where the key is the name of attribute and the value + is the AttrValue proto. + Raises: + ValueError: If the kwargs contains unwhitelisted name or unsupported value + types. + """ + attrs = {} + for key, value in attributes.items(): + if not key.startswith(WHITELIST_FUNCTION_ATTRIBUTE_PREFIX): + raise ValueError("Attribute name is not whitelisted. " + "Whitelisted: prefix %s, got: %s" % + (WHITELIST_FUNCTION_ATTRIBUTE_PREFIX, key)) + + if isinstance(value, attr_value_pb2.AttrValue): + attrs[key] = value + # bool type check has to happen before int since bool is a subclass of int. + elif isinstance(value, bool): + attrs[key] = attr_value_pb2.AttrValue(b=value) + elif isinstance(value, int): + attrs[key] = attr_value_pb2.AttrValue(i=value) + elif isinstance(value, float): + attrs[key] = attr_value_pb2.AttrValue(f=value) + elif isinstance(value, str): + attrs[key] = attr_value_pb2.AttrValue(s=compat.as_bytes(value)) + else: + raise ValueError("Unsupported attribute type for %s with type %s" % + (key, type(value))) + return attrs + + class FuncGraph(ops.Graph): """Graph representing a function body. @@ -486,7 +529,7 @@ class Function(object): self._num_outputs = len(self._func_graph.outputs) self._output_shapes = tuple( output.shape for output in self._func_graph.outputs) - self._attrs = attrs or {} + self._attrs = _parse_func_attrs(attrs) self._device_functions = tuple( self._func_graph._device_functions_outer_to_inner) # pylint: disable=protected-access @@ -909,7 +952,8 @@ class PolymorphicFunction(object): def __init__(self, python_function, name, - input_signature=None): + input_signature=None, + attributes=None): """Initializes a polymorphic function. Args: @@ -918,6 +962,8 @@ class PolymorphicFunction(object): input_signature: a possibly nested sequence of `TensorSpec` objects specifying the input signature of this function. If `None`, a separate function is instantiated for each inferred input signature. + attributes: dict, extra keyword arguments that will be added as attribute + of the function. Raises: ValueError: if `input_signature` is not None and the `python_function`'s @@ -935,6 +981,7 @@ class PolymorphicFunction(object): self._name = name self._function_cache = collections.OrderedDict() self._variables = [] + self._function_attributes = attributes or {} self._lock = threading.Lock() @@ -1149,7 +1196,8 @@ class PolymorphicFunction(object): if graph_function is None: graph_function = Function( func_graph_from_py_func(self._name, self._python_function, args, - kwds, self._input_signature)) + kwds, self._input_signature), + self._function_attributes) self._variables.extend( [v for v in graph_function.variables if v not in self._variables]) self._function_cache[cache_key] = graph_function @@ -1483,7 +1531,29 @@ def defun(func=None, input_signature=None): TypeError: If `input_signature` is neither `None` nor a sequence of `tf.contrib.eager.TensorSpec` objects. """ + return defun_with_attributes(func=func, input_signature=input_signature) + + +def defun_with_attributes(func=None, input_signature=None, attributes=None): + """Compiles a Python function into a callable TensorFlow graph. + + This function supports adding extra function attributes. See detailed + documentation in defun(). Currently this is not exposed in public API since we + don't expect user to directly use attributes, and attribute won't work by + itself. This assumption might change in future. + Args: + func: function to be compiled. + input_signature: same as defun()'s input_signature. + attributes: A dictionary of arguments which will be added to function def as + attributes. Currently only support primitive types as value, and only + whitelisted attribute name is allowed. Unwhitelisted attribute name or + unsupported value will result into ValueError. + + Returns: + Same as the return value of defun, with attributes added to the function in + graph. + """ if input_signature is not None: _validate_signature(input_signature) @@ -1495,7 +1565,8 @@ def defun(func=None, input_signature=None): name = "function" return tf_decorator.make_decorator( function, - PolymorphicFunction(function, name, input_signature=input_signature)) + PolymorphicFunction(function, name, input_signature=input_signature, + attributes=attributes)) # This code path is for the `foo = tfe.defun(foo, ...)` use case if func is not None: diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 6507bc6d71..e6a49b66cf 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1501,6 +1501,67 @@ class FunctionTest(test.TestCase): side_effecting_function.python_function() self.assertAllEqual(state, [0, 0]) + def testFunctionWithExtraAttributes(self): + @function.defun_with_attributes(attributes={'experimental_1': 'value1', + 'experimental_2': 2}) + def matmul(x, y): + return math_ops.matmul(x, y) + + def add(x, y): + return math_ops.add(x, y) + defun_add = function.defun_with_attributes( + add, attributes={'experimental_3': True, 'experimental_4': 1.0}) + + with context.graph_mode(), self.test_session(): + with ops.get_default_graph().as_default(): + t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + sq = matmul(t, t) + double = defun_add(t, t) + self.assertAllEqual(sq.eval().reshape(-1), [7, 10, 15, 22]) + self.assertAllEqual(double.eval().reshape(-1), [2, 4, 6, 8]) + + graph = ops.get_default_graph() + # pylint: disable=protected-access + self.assertEqual(len(graph._functions), 2) + functions = list(graph._functions.values()) + self.assertRegexpMatches( + functions[0].definition.signature.name, '.*matmul.*') + attrs = functions[0].definition.attr + self.assertEqual(len(attrs), 2) + self.assertEqual(attrs['experimental_1'].s, b'value1') + self.assertEqual(attrs['experimental_2'].i, 2) + + self.assertRegexpMatches( + functions[1].definition.signature.name, '.*add.*') + attrs = functions[1].definition.attr + self.assertEqual(len(attrs), 2) + self.assertEqual(attrs['experimental_3'].b, True) + self.assertEqual(attrs['experimental_4'].f, 1.0) + # pylint: enable=protected-access + + def testFunctionWithInvalidAttribute(self): + @function.defun_with_attributes(attributes={'attr1': 'value1'}) + def matmul(x, y): + return math_ops.matmul(x, y) + + with self.assertRaisesRegexp(ValueError, + '.*Attribute name is not whitelisted.*'): + with context.graph_mode(), self.test_session(): + with ops.get_default_graph().as_default(): + t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + matmul(t, t) + + @function.defun_with_attributes(attributes={'experimental_1': ['value1']}) + def add(x, y): + return math_ops.add(x, y) + + with self.assertRaisesRegexp(ValueError, + '.*Unsupported attribute type.*'): + with context.graph_mode(), self.test_session(): + with ops.get_default_graph().as_default(): + t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) + add(t, t) + @test_util.with_c_shapes class AutomaticControlDependenciesTest(test.TestCase): -- cgit v1.2.3 From 6ebe0abcc6bb3c3b50975cd2550bec2012389673 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 11 Sep 2018 14:17:07 -0700 Subject: Construct placer after the first optimization pass is run. PiperOrigin-RevId: 212518982 --- tensorflow/core/kernels/partitioned_function_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 7bb403290d..3ab7404ea9 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -127,12 +127,12 @@ class PartitionedCallOp : public AsyncOpKernel { optimization_options.graph = &graph; optimization_options.flib_def = overlay_lib; optimization_options.device_set = &device_set; - Placer placer(graph.get(), &device_set); OP_REQUIRES_OK_ASYNC( ctx, OptimizationPassRegistry::Global()->RunGrouping( OptimizationPassRegistry::PRE_PLACEMENT, optimization_options), done); + Placer placer(graph.get(), &device_set); OP_REQUIRES_OK_ASYNC(ctx, placer.Run(), done); OP_REQUIRES_OK_ASYNC( ctx, -- cgit v1.2.3 From 328aeaeec83795c7de2589ca97a0b6d8b9a873e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Sep 2018 14:31:09 -0700 Subject: Fixing broadcast pow. PiperOrigin-RevId: 212521825 --- .../lite/kernels/internal/reference/reference_ops.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 0abacf85e1..977367026d 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -4877,16 +4877,22 @@ inline void Pow(const RuntimeShape& input1_shape, const T* input1_data, } template -inline void BroadcastPow4DSlow(const RuntimeShape& input1_shape, +inline void BroadcastPow4DSlow(const RuntimeShape& unextended_input1_shape, const T* input1_data, - const RuntimeShape& input2_shape, + const RuntimeShape& unextended_input2_shape, const T* input2_data, - const RuntimeShape& output_shape, + const RuntimeShape& unextended_output_shape, T* output_data) { + TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4); + RuntimeShape output_shape = + RuntimeShape::ExtendedShape(4, unextended_output_shape); + NdArrayDesc<4> desc1; NdArrayDesc<4> desc2; - NdArrayDescsForElementwiseBroadcast(input1_shape, input2_shape, &desc1, - &desc2); + NdArrayDescsForElementwiseBroadcast(unextended_input1_shape, + unextended_input2_shape, &desc1, &desc2); for (int b = 0; b < output_shape.Dims(0); ++b) { for (int y = 0; y < output_shape.Dims(1); ++y) { -- cgit v1.2.3 From ba650a5c989106330519dbde0de368f580435a8b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Sep 2018 14:45:36 -0700 Subject: Fix typos in the comment for the class Categorical. PiperOrigin-RevId: 212524769 --- tensorflow/python/ops/distributions/categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/ops/distributions/categorical.py b/tensorflow/python/ops/distributions/categorical.py index dd25fce2ec..fbbacf2521 100644 --- a/tensorflow/python/ops/distributions/categorical.py +++ b/tensorflow/python/ops/distributions/categorical.py @@ -69,7 +69,7 @@ class Categorical(distribution.Distribution): The Categorical distribution is closely related to the `OneHotCategorical` and `Multinomial` distributions. The Categorical distribution can be intuited as generating samples according to `argmax{ OneHotCategorical(probs) }` itself - being identical to `argmax{ Multinomial(probs, total_count=1) }. + being identical to `argmax{ Multinomial(probs, total_count=1) }`. #### Mathematical Details @@ -83,7 +83,7 @@ class Categorical(distribution.Distribution): The number of classes, `K`, must not exceed: - the largest integer representable by `self.dtype`, i.e., - `2**(mantissa_bits+1)` (IEE754), + `2**(mantissa_bits+1)` (IEEE 754), - the maximum `Tensor` index, i.e., `2**31-1`. In other words, -- cgit v1.2.3 From f3242baaf10842ff4753b5974f426cf963fa8eef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 11 Sep 2018 15:02:21 -0700 Subject: Add support for populating a feature columns to output tensors dictionary in input_layer. PiperOrigin-RevId: 212528172 --- tensorflow/python/feature_column/feature_column.py | 25 ++++++++++++---- .../python/feature_column/feature_column_test.py | 34 ++++++++++++++++++++++ .../api/golden/v1/tensorflow.feature_column.pbtxt | 2 +- .../api/golden/v2/tensorflow.feature_column.pbtxt | 2 +- 4 files changed, 55 insertions(+), 8 deletions(-) (limited to 'tensorflow') diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 2246d2f3e9..9984379e9d 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -169,7 +169,8 @@ def _internal_input_layer(features, weight_collections=None, trainable=True, cols_to_vars=None, - scope=None): + scope=None, + cols_to_output_tensors=None): """See input_layer. `scope` is a name or variable scope to use.""" feature_columns = _normalize_feature_columns(feature_columns) @@ -202,14 +203,17 @@ def _internal_input_layer(features, trainable=trainable) num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] - output_tensors.append( - array_ops.reshape(tensor, shape=(batch_size, num_elements))) + output_tensor = array_ops.reshape( + tensor, shape=(batch_size, num_elements)) + output_tensors.append(output_tensor) if cols_to_vars is not None: # Retrieve any variables created (some _DenseColumn's don't create # variables, in which case an empty list is returned). cols_to_vars[column] = ops.get_collection( ops.GraphKeys.GLOBAL_VARIABLES, scope=variable_scope.get_variable_scope().name) + if cols_to_output_tensors is not None: + cols_to_output_tensors[column] = output_tensor _verify_static_batch_size_equality(output_tensors, ordered_columns) return array_ops.concat(output_tensors, 1) @@ -219,7 +223,8 @@ def input_layer(features, feature_columns, weight_collections=None, trainable=True, - cols_to_vars=None): + cols_to_vars=None, + cols_to_output_tensors=None): """Returns a dense `Tensor` as input layer based on given `feature_columns`. Generally a single example in training data is described with FeatureColumns. @@ -264,6 +269,9 @@ def input_layer(features, dimension=10): [ Date: Tue, 11 Sep 2018 15:17:57 -0700 Subject: Add a printout at the start of MetaOptimizer::Optimize() to make it easier to see the total cost of running Grappler in logs. Also add a couple of VLOG(1) statements to see breakdown between main graph and function optimization. PiperOrigin-RevId: 212531430 --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tensorflow') diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index a5fd33d28b..8c99598748 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -331,10 +331,12 @@ Status MetaOptimizer::RunOptimizer( Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { + LOG(INFO) << "Starting optimization for grappler item: " << item.id; optimization_results_.clear(); // 1. Optimize main graph TF_RETURN_IF_ERROR(OptimizeGraph(cluster, item, optimized_graph)); + VLOG(1) << "Optimized main graph."; // 2. Optimize function library FunctionLibraryDefinition flib(OpRegistry::Global(), @@ -398,7 +400,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } } - VLOG(3) << "Optimized " << optimized_funcs.size() + VLOG(1) << "Optimized " << optimized_funcs.size() << " functions: " << str_util::Join(optimized_funcs, ", "); return Status::OK(); -- cgit v1.2.3 From 7f9f25a008369ac90e7b96c4f58a3dd1c662d89c Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 11 Sep 2018 15:28:10 -0700 Subject: Move Quantile Stream Resource to core. Allow each Resource to manage multiple streams that share the same quantile config -- number of quantiles and epsilon. Previously each resource manage only one stream, so we will have to create resources equal to the number of features, which is cumbersome when input is high dimensional. If 1000 features use 100 quantiles (which is hardcoded today), then 1000 resources is required. This cl will create the number of resources linear to the number of parameter servers, if 2 parameter servers are present, then only 2 resources is required, one for each ps. Remove time stamp token as the ops are called once. PiperOrigin-RevId: 212533735 --- .../base_api/api_def_BoostedTreesBucketize.pbtxt | 34 ++ ..._BoostedTreesCreateQuantileStreamResource.pbtxt | 29 ++ ...api_def_BoostedTreesMakeQuantileSummaries.pbtxt | 40 ++ ...edTreesQuantileStreamResourceAddSummaries.pbtxt | 22 + ...f_BoostedTreesQuantileStreamResourceFlush.pbtxt | 31 ++ ...QuantileStreamResourceGetBucketBoundaries.pbtxt | 27 ++ ...oostedTreesQuantileStreamResourceHandleOp.pbtxt | 5 + ...tedTreesQuantileStreamResourceInitialized.pbtxt | 20 + tensorflow/core/kernels/boosted_trees/BUILD | 16 +- .../core/kernels/boosted_trees/quantile_ops.cc | 453 +++++++++++++++++++++ .../core/kernels/boosted_trees/quantiles/BUILD | 4 +- .../quantiles/quantile_stream_resource.h | 96 +++++ tensorflow/core/ops/boosted_trees_ops.cc | 125 ++++++ tensorflow/python/kernel_tests/boosted_trees/BUILD | 13 + .../boosted_trees/quantile_ops_test.py | 140 +++++++ tensorflow/python/ops/boosted_trees_ops.py | 6 + 16 files changed, 1059 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesBucketize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesCreateQuantileStreamResource.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesMakeQuantileSummaries.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesQuantileStreamResourceAddSummaries.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesQuantileStreamResourceFlush.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_BoostedTreesQuantileStreamResourceHandleOp.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_IsBoostedTreesQuantileStreamResourceInitialized.pbtxt create mode 100644 tensorflow/core/kernels/boosted_trees/quantile_ops.cc create mode 100644 tensorflow/core/kernels/boosted_trees/quantiles/quantile_stream_resource.h create mode 100644 tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py (limited to 'tensorflow') diff --git a/tensorflow/core/api_def/base_api/api_def_BoostedTreesBucketize.pbtxt b/tensorflow/core/api_def/base_api/api_def_BoostedTreesBucketize.pbtxt new file mode 100644 index 0000000000..cdaeb5091c --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_BoostedTreesBucketize.pbtxt @@ -0,0 +1,34 @@ +op { + graph_op_name: "BoostedTreesBucketize" + visibility: HIDDEN + in_arg { + name: "float_values" + description: <