diff options
65 files changed, 1521 insertions, 268 deletions
diff --git a/configure.py b/configure.py index 186fdc9ddc..ef5051d275 100644 --- a/configure.py +++ b/configure.py @@ -685,10 +685,13 @@ def set_tf_cunn_version(environ_cp): ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', - cudnn_path_from_ldconfig).group(1) - if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)): - cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) - break + cudnn_path_from_ldconfig) + if cudnn_path_from_ldconfig: + cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) + if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, + tf_cudnn_version)): + cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) + break # Reset and Retry print( diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index 6e44a72e2b..68c324f2b9 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -146,6 +146,8 @@ class TensorCApi { } }; +Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst); + TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status); Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out); diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 01e251a1ac..e70539ceef 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -151,10 +151,11 @@ TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { return TF_SessionListDevices(ctx->session, status); } -TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t) { - return new TFE_TensorHandle( - tensorflow::TensorCApi::MakeTensor(t->dtype, t->shape, t->buffer), - nullptr); +TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { + tensorflow::Tensor tensor; + status->status = tensorflow::TF_TensorToTensor(t, &tensor); + if (!status->status.ok()) return nullptr; + return new TFE_TensorHandle(tensor, nullptr); } void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { delete h; } diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 476c9288f8..88a0dd343f 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -43,7 +43,7 @@ extern TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, // placed in memory of different devices or remote address spaces. typedef struct TFE_TensorHandle TFE_TensorHandle; -extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t); +extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status); extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h); diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 6f5c21c947..72e0fe8a15 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -34,8 +34,11 @@ TFE_TensorHandle* TestMatrixTensorHandle() { TF_Tensor* t = TF_AllocateTensor( TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); - TFE_TensorHandle* th = TFE_NewTensorHandle(t); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteTensor(t); + TF_DeleteStatus(status); return th; } @@ -383,7 +386,9 @@ TFE_TensorHandle* CreateVariable(TFE_Context* ctx, float value, memcpy(TF_TensorData(t.get()), &value, TF_TensorByteSize(t.get())); std::unique_ptr<TFE_TensorHandle, decltype(&TFE_DeleteTensorHandle)> - value_handle(TFE_NewTensorHandle(t.get()), TFE_DeleteTensorHandle); + value_handle(TFE_NewTensorHandle(t.get(), status), + TFE_DeleteTensorHandle); + if (TF_GetCode(status) != TF_OK) return nullptr; TFE_OpAddInput(op, value_handle.get(), status); if (TF_GetCode(status) != TF_OK) return nullptr; diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 66a943410e..82469261e5 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -78,6 +78,10 @@ class SymbolicGradientBuilder { const std::vector<Output>& grad_inputs, std::vector<Output>* grad_outputs); + // Returns a list mapping whether each node in the graph is reachable + // from outputs_. Keyed by node id. + std::vector<bool> GetReachableNodes(); + const Scope& scope_; const ops::GradOpRegistry* registry_; const std::vector<Output>& outputs_; @@ -143,11 +147,36 @@ Status SymbolicGradientBuilder::BackpropAlongEdge(const Output& dst_grad, return Status::OK(); } +std::vector<bool> SymbolicGradientBuilder::GetReachableNodes() { + std::vector<bool> reachable_nodes(scope_.graph()->num_node_ids(), false); + std::deque<Node*> queue; + for (const Output& out : outputs_) { + if (!reachable_nodes[out.node()->id()]) { + queue.push_back(out.node()); + reachable_nodes[out.node()->id()] = true; + } + } + + while (!queue.empty()) { + Node* n = queue.front(); + queue.pop_front(); + for (const Edge* e : n->in_edges()) { + if (e->IsControlEdge()) continue; + queue.push_back(e->src()); + reachable_nodes[e->src()->id()] = true; + } + } + return reachable_nodes; +} + Status SymbolicGradientBuilder::Initialize() { if (outputs_.size() != grad_inputs_.size()) { return errors::InvalidArgument( "Must specify a gradient input for each output."); } + std::vector<bool> reachable_nodes = GetReachableNodes(); + // TODO(theflofly) Check that inputs_ are reachable from + // outputs_ using reachable_nodes grad_outputs_->clear(); grad_outputs_->resize(inputs_.size()); // Populate `output_nodes_` from node ids in `outputs_`. @@ -188,12 +217,15 @@ Status SymbolicGradientBuilder::Initialize() { if (output_nodes_.find(n->id()) == output_nodes_.end()) { // Internal node: continue BFS along connected outputs. for (const Edge* e : n->out_edges()) { - if (e->IsControlEdge()) continue; - ++num_expected_backprops; + // If a node is not reachable from outputs_, + // we don't expect it to receive a backpropagated gradient. + // It will not be counted in num_expected_backprops. + if (e->IsControlEdge() || !reachable_nodes[e->dst()->id()]) continue; if (visited.find(e->dst()) == visited.end()) { queue.push_back(e->dst()); visited.insert(e->dst()); } + ++num_expected_backprops; } } else { // Output node: stop BFS and update `num_expected_backprops` for diff --git a/tensorflow/cc/framework/gradients_test.cc b/tensorflow/cc/framework/gradients_test.cc index 24af7d567b..032ab93623 100644 --- a/tensorflow/cc/framework/gradients_test.cc +++ b/tensorflow/cc/framework/gradients_test.cc @@ -364,6 +364,73 @@ TEST_F(GradientsTest, MultipleNodeOutputGrads) { test::AsTensor<int>({60, 61, 62, 63, 66, 66, 66, 67}, {4, 2})); } +TEST_F(GradientsTest, UnreachableEdgeGradOneOutput) { + auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE); + auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}); + auto x_assign = Assign(scope_test_, x, x_const); + + auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE); + auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}}); + auto y_assign = Assign(scope_test_, y, y_const); + + auto m1 = MatMul(scope_test_, x, y); + + auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE); + auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}}); + auto z_assign = Assign(scope_test_, z, z_const); + + auto m2 = MatMul(scope_test_, y, z); + + auto dm1 = Const(scope_test_, {{0.5}, {0.5}}); + + std::vector<Output> grad_outputs; + TF_ASSERT_OK( + AddSymbolicGradients(scope_test_, {m1}, {y}, {dm1}, &grad_outputs)); + + std::vector<Tensor> outputs; + test::GetTensors(scope_test_, {x_assign, y_assign, z_assign}, + {grad_outputs[0]}, &outputs); + // dz/dy = xT * dm1 + test::ExpectTensorNear<double>( + outputs[0], test::AsTensor<double>({2.5, 3.5, 4.5}, {3, 1}), 1e-5); +} + +TEST_F(GradientsTest, UnreachableEdgeGradTwoOutputs) { + auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE); + auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}); + auto x_assign = Assign(scope_test_, x, x_const); + + auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE); + auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}}); + auto y_assign = Assign(scope_test_, y, y_const); + + auto m1 = MatMul(scope_test_, x, y); + + auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE); + auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}}); + auto z_assign = Assign(scope_test_, z, z_const); + + auto m2 = MatMul(scope_test_, y, z); + + auto dm1 = Const(scope_test_, {{0.5}, {0.5}}); + auto dm2 = + Const(scope_test_, {{0.5, 0.5, 0.5}, {0.6, 0.7, 0.8}, {0.6, 0.7, 0.9}}); + + std::vector<Output> grad_outputs; + TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {m1, m2}, {y}, {dm1, dm2}, + &grad_outputs)); + + std::vector<Tensor> outputs; + test::GetTensors(scope_test_, {x_assign, y_assign, z_assign}, + {grad_outputs[0]}, &outputs); + + // the gradients from m1 and m2 will be summed to compute the gradient + // w.r.t y + // dz/dy = xT * dm1 + dm2 * zT + test::ExpectTensorNear<double>( + outputs[0], test::AsTensor<double>({17.5, 24.7, 26.8}, {3, 1}), 1e-5); +} + // StopGradientSingleOutputMultiEdgeTest tests combinations of valid and // 'NoGradient' (induced by StopGradient op) returned along multiple edges from // a single nodes output. diff --git a/tensorflow/cc/framework/testutil.cc b/tensorflow/cc/framework/testutil.cc index ca78f31db5..57d573e3c5 100644 --- a/tensorflow/cc/framework/testutil.cc +++ b/tensorflow/cc/framework/testutil.cc @@ -36,5 +36,19 @@ void GetTensor(const Scope& scope, Output tensor, Tensor* out) { *out = outputs[0]; } +void GetTensors(const Scope& scope, const std::vector<Output>& assign_vars, + const OutputList& tensors, std::vector<Tensor>* out) { + ClientSession session(scope); + TF_CHECK_OK(session.Run(assign_vars, nullptr)); + TF_CHECK_OK(session.Run(tensors, out)); +} + +void GetTensor(const Scope& scope, const std::vector<Output>& assign_vars, + Output tensor, Tensor* out) { + std::vector<Tensor> outputs; + GetTensors(scope, assign_vars, {std::move(tensor)}, &outputs); + *out = outputs[0]; +} + } // end namespace test } // end namespace tensorflow diff --git a/tensorflow/cc/framework/testutil.h b/tensorflow/cc/framework/testutil.h index d027ad3744..a3e19870ec 100644 --- a/tensorflow/cc/framework/testutil.h +++ b/tensorflow/cc/framework/testutil.h @@ -26,9 +26,21 @@ namespace test { void GetTensors(const Scope& scope, OutputList tensors, std::vector<Tensor>* out); +// Computes the outputs listed in 'tensors', returns the tensors in 'out'. +// assign_vars are extra outputs that should be run +// e.g. to assign values to variables. +void GetTensors(const Scope& scope, const std::vector<Output>& assign_vars, + const OutputList& tensors, std::vector<Tensor>* out); + /// Computes the output 'tensor', returning the resulting tensor in 'out'. void GetTensor(const Scope& scope, Output tensor, Tensor* out); +// Computes the output 'tensor', returning the resulting tensor in 'out'. +// assign_vars are extra outputs that should be run +// e.g. to assign values to variables. +void GetTensor(const Scope& scope, const std::vector<Output>& assign_vars, + Output tensor, Tensor* out); + } // namespace test } // namespace tensorflow diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 422df3063e..c249a28556 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -33,6 +33,7 @@ option(tensorflow_BUILD_MORE_PYTHON_TESTS "Build more python unit tests for cont option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") +option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) if (NOT WIN32) # Threads: defines CMAKE_THREAD_LIBS_INIT and adds -pthread compile option @@ -204,6 +205,12 @@ if(tensorflow_ENABLE_JEMALLOC_SUPPORT) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES jemalloc) include_directories(${jemalloc_INCLUDE_DIRS}) endif() +if(tensorflow_ENABLE_SNAPPY_SUPPORT) + include(snappy) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${snappy_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy) + include_directories(${snappy_INCLUDE_DIR}) +endif() if(WIN32) list(APPEND tensorflow_EXTERNAL_LIBRARIES wsock32 ws2_32 shlwapi) endif() diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index 04a9664701..dc27eadaca 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(boringssl_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src/boringssl/include) #set(boringssl_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src) set(boringssl_URL https://boringssl.googlesource.com/boringssl) -set(boringssl_TAG 17cf2cb1d226b0ba2401304242df7ddd3b6f1ff2) +set(boringssl_TAG ee7aa02) set(boringssl_BUILD ${CMAKE_BINARY_DIR}/boringssl/src/boringssl-build) #set(boringssl_LIBRARIES ${boringssl_BUILD}/obj/so/libboringssl.so) set(boringssl_STATIC_LIBRARIES diff --git a/tensorflow/contrib/cmake/external/snappy.cmake b/tensorflow/contrib/cmake/external/snappy.cmake new file mode 100644 index 0000000000..a35d8654fb --- /dev/null +++ b/tensorflow/contrib/cmake/external/snappy.cmake @@ -0,0 +1,50 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +include (ExternalProject) + +set(snappy_URL https://github.com/google/snappy.git) +set(snappy_TAG "55924d11095df25ab25c405fadfe93d0a46f82eb") +set(snappy_BUILD ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) +set(snappy_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/snappy/src/snappy) + +if(WIN32) + set(snappy_STATIC_LIBRARIES ${snappy_BUILD}/$(Configuration)/snappy.lib) +else() + set(snappy_STATIC_LIBRARIES ${snappy_BUILD}/libsnappy.a) +endif() + +set(snappy_HEADERS + "${snappy_INCLUDE_DIR}/snappy.h" +) + +ExternalProject_Add(snappy + PREFIX snappy + GIT_REPOSITORY ${snappy_URL} + GIT_TAG ${snappy_TAG} + DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 + INSTALL_COMMAND "" + LOG_DOWNLOAD ON + LOG_CONFIGURE ON + LOG_BUILD ON + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DSNAPPY_BUILD_TESTS:BOOL=OFF + -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON +) + +# actually enables snappy in the source code +add_definitions(-DSNAPPY)
\ No newline at end of file diff --git a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py index 97b4ec44fc..7240fc7422 100644 --- a/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/map_dataset_op_test.py @@ -16,6 +16,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import namedtuple import os import threading @@ -481,6 +482,40 @@ class MapDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testMapNamedtuple(self, count=10): + # construct dataset of tuples + labels = dataset_ops.Dataset.range(count) + images = labels.map(lambda l: -l) + dataset_tuple = dataset_ops.Dataset.zip((labels, images)) + + # convert dataset of tuples to dataset of namedtuples + example = namedtuple("Example", ["label", "image"]) + dataset_namedtuple = dataset_tuple.map(example) + + def preprocess_tuple(label, image): + image = 2 * image + return label, image + + def preprocess_namedtuple(example): + return example._replace(image=2 * example.image) + + # preprocess both datasets + dataset_tuple = dataset_tuple.map(preprocess_tuple) + dataset_namedtuple = dataset_namedtuple.map(preprocess_namedtuple) + + next_tuple = dataset_tuple.make_one_shot_iterator().get_next() + next_namedtuple = dataset_namedtuple.make_one_shot_iterator().get_next() + + # make sure both datasets contain the same data + with self.test_session() as sess: + for i in range(count): + tuple_, namedtuple_ = sess.run([next_tuple, next_namedtuple]) + self.assertEqual(tuple_, namedtuple_) + self.assertEqual(tuple_, (i, -2 * i)) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_namedtuple) + def testUseStepContainerInMap(self): row = np.arange(6) iterator = ( diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index abf7bcb384..0ee9acfc97 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -1921,7 +1921,7 @@ class DenseToSparseBatchDataset(Dataset): def _should_unpack_args(args): """Returns `True` if `args` should be `*args` when passed to a callable.""" - return nest.is_sequence(args) and not isinstance(args, dict) + return type(args) is tuple # pylint: disable=unidiomatic-typecheck class _ResourceDataset(Dataset): @@ -2104,7 +2104,7 @@ class InterleaveDataset(Dataset): nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - if nest.is_sequence(nested_args): + if _should_unpack_args(nested_args): dataset = map_func(*nested_args) else: dataset = map_func(nested_args) @@ -2413,7 +2413,7 @@ def rejection_resample(dataset, shapes and types defined by `dataset.output_shapes` and `dataset.output_types`) to a scalar `tf.int32` tensor. Values should be in `[0, num_classes)`. - target_dist: A floating point type tensor, shaped `[num_classes]. + target_dist: A floating point type tensor, shaped `[num_classes]`. initial_dist: (Optional.) A floating point type tensor, shaped `[num_classes]`. If not provided, the true class distribution is estimated live in a streaming fashion. diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index da1cd72a6f..699cf45a73 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -150,7 +150,7 @@ class ExpRelaxedOneHotCategorical(distribution.Distribution): `N - 1` dimensions index into a batch of independent distributions and the last dimension represents a vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. - dtype: The type of the event samples (default: int32). + dtype: The type of the event samples (default: float32). validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect @@ -388,7 +388,7 @@ class RelaxedOneHotCategorical( dimensions index into a batch of independent distributions and the last dimension represents a vector of probabilities for each class. Only one of `logits` or `probs` should be passed in. - dtype: The type of the event samples (default: int32). + dtype: The type of the event samples (default: float32). validate_args: Unused in this distribution. allow_nan_stats: Python `bool`, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index 7eb410b4c7..33db93b970 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -156,9 +156,9 @@ def optimize_loss(loss, loss = ops.convert_to_tensor(loss) contrib_framework.assert_scalar(loss) if global_step is None: - global_step = contrib_framework.get_global_step() + global_step = train.get_global_step() else: - contrib_framework.assert_global_step(global_step) + train.assert_global_step(global_step) with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]): # Update ops take UPDATE_OPS collection if not provided. if update_ops is None: diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index c31d5d2d47..861db1f89e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -24,7 +24,6 @@ import six from tensorflow.contrib import framework as framework_lib from tensorflow.contrib import layers as layers_lib -from tensorflow.contrib import lookup as lookup_lib from tensorflow.contrib.learn.python.learn.estimators import constants from tensorflow.contrib.learn.python.learn.estimators import model_fn from tensorflow.contrib.learn.python.learn.estimators import prediction_key @@ -35,6 +34,7 @@ from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import nn @@ -1070,9 +1070,8 @@ class _MultiClassHead(_SingleHead): labels_tensor = _to_labels_tensor(labels, self._label_name) _check_no_sparse_tensor(labels_tensor) if self._label_keys: - table = lookup_lib.string_to_index_table_from_tensor( - mapping=self._label_keys, - name="label_id_lookup") + table = lookup_ops.index_table_from_tensor( + self._label_keys, name="label_id_lookup") return { "labels": labels_tensor, "label_ids": table.lookup(labels_tensor), @@ -1106,9 +1105,8 @@ class _MultiClassHead(_SingleHead): class_ids = math_ops.argmax( logits, 1, name=prediction_key.PredictionKey.CLASSES) if self._label_keys: - table = lookup_lib.index_to_string_table_from_tensor( - mapping=self._label_keys, - name="class_string_lookup") + table = lookup_ops.index_to_string_table_from_tensor( + self._label_keys, name="class_string_lookup") classes = table.lookup(class_ids) else: classes = class_ids diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1915a89af7..9319928307 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2156,8 +2156,6 @@ tf_cc_tests( "platform/port_test.cc", "platform/profile_utils/cpu_utils_test.cc", "platform/subprocess_test.cc", - "platform/vmodule_benchmark_test.cc", - "platform/vmodule_test.cc", ], deps = [ ":lib", diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc index e7092f549b..f5dadf76da 100644 --- a/tensorflow/core/framework/allocator.cc +++ b/tensorflow/core/framework/allocator.cc @@ -117,16 +117,6 @@ class CPUAllocator : public Allocator { TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator); }; -namespace { -Allocator* MakeCpuAllocator() { - Allocator* allocator = new CPUAllocator; - if (cpu_allocator_collect_full_stats || LogMemory::IsEnabled()) { - allocator = new TrackingAllocator(allocator, true); - } - return allocator; -} -} // namespace - Allocator* cpu_allocator() { static Allocator* cpu_alloc = AllocatorRegistry::Global()->GetAllocator(); if (cpu_allocator_collect_full_stats && !cpu_alloc->TracksAllocationSizes()) { diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 2f9ceaa3bd..cf5d6e8baa 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -1099,6 +1099,44 @@ int MklLayoutRewritePass::SetUpContiguousInputs( CHECK_NOTNULL(workspace_tensors); CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + // TODO(nhasabni): Temporary solution to connect filter input of + // BackpropInput with the converted filter from Conv2D. + bool do_connect_conv2d_backprop_input_filter = false; + Node* conv2d_node = nullptr; + // Filter node is 2nd input (slot index 1) of Conv2D. + int kConv2DFilterInputSlotIdx = 1; + int kConv2DBackpropInputFilterInputSlotIdx = 1; + int kConv2DFilterOutputSlotIdx = 1; + if (old_node->type_string() == csinfo_.conv2d_grad_input) { + // We need to find Conv2D node from Conv2DBackpropInput. + // For that let's first find filter node that is 2nd input (slot 1) + // of BackpropInput. + Node* filter_node = nullptr; + old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node); + CHECK_NOTNULL(filter_node); + + // Now check which nodes receive from filter_node. Filter feeds as + // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias. + for (const Edge* e : filter_node->out_edges()) { + if (e->dst()->type_string() == csinfo_.mkl_conv2d && + e->dst_input() == kConv2DFilterInputSlotIdx + /* filter is 2nd input of Conv2D and _MklConv2D. */) { + if (conv2d_node != nullptr) { + VLOG(1) << "MklLayoutRewritePass: unusual case of same filter" + << " feeding multiple Conv2D nodes: " + << filter_node->DebugString(); + // We will not connect filter input of Conv2DBackpropInput + // to be safe here. + do_connect_conv2d_backprop_input_filter = false; + break; + } else { + conv2d_node = e->dst(); + do_connect_conv2d_backprop_input_filter = true; + } + } + } + } + // Number of input slots to original op // Input slots are represented by .Input() calls in REGISTER_OP. int old_node_input_slots = old_node->op_def().input_arg_size(); @@ -1122,7 +1160,13 @@ int MklLayoutRewritePass::SetUpContiguousInputs( nb->Input(new_node_inputs); nn_slot_idx++; } else { - nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second); + // Special case for connecting filter input of Conv2DBackpropInput + if (do_connect_conv2d_backprop_input_filter && + iidx == kConv2DBackpropInputFilterInputSlotIdx) { + nb->Input(conv2d_node, kConv2DFilterOutputSlotIdx); + } else { + nb->Input(old_node_inputs[iidx].first, old_node_inputs[iidx].second); + } iidx++; nn_slot_idx++; } @@ -1157,9 +1201,17 @@ int MklLayoutRewritePass::SetUpContiguousInputs( } else { Node* mkl_node = nullptr; int mkl_node_output_slot = 0; - GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first, - old_node_inputs[iidx].second, - &mkl_node, &mkl_node_output_slot); + // Special case for connecting filter input of Conv2DBackpropInput + if (do_connect_conv2d_backprop_input_filter && + iidx == kConv2DBackpropInputFilterInputSlotIdx) { + GetNodeProducingMklTensor(g, old_node, conv2d_node, + kConv2DFilterOutputSlotIdx, &mkl_node, + &mkl_node_output_slot); + } else { + GetNodeProducingMklTensor(g, old_node, old_node_inputs[iidx].first, + old_node_inputs[iidx].second, &mkl_node, + &mkl_node_output_slot); + } nb->Input(mkl_node, mkl_node_output_slot); iidx++; nn_slot_idx++; diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 482e339802..bd1d74368e 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -788,7 +788,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Positive1) { "DMT/_1(Const);DMT/_2(Const);E(Mul)|A->C;A->D;" "A:control->DMT/_0:control;A:control->DMT/_1:control;" "A:control->DMT/_2:control;B->C:1;C->D:1;C->E;" - "C:1->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2"); + "C:2->D:3;D->E:1;DMT/_0->C:2;DMT/_1->C:3;DMT/_2->D:2"); } // Conv2D with INT32 which is not supported by Mkl @@ -917,7 +917,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) { "A:control->DMT/_2:control;A:control->DMT/_3:control;" "B->E:1;C->F;C:control->DMT/_0:control;C:control->DMT/_1:control;" "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;" - "DMT/_4->H:3;E->H:1;E:1->H:4;F->H:2;F:1->H:5;G->H;" + "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;" "G:control->DMT/_4:control;H->I:1"); } @@ -953,7 +953,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) { "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Mul);G(Const);" "H(_MklConcat);I(Mul)|A->E;A->I;A:control->DMT/_0:control;" "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:1->H:4;F->H:2;" + "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;" "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1"); } @@ -1023,8 +1023,8 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) { "A:control->DMT/_2:control;A:control->DMT/_3:control;B->E:1;C->F;" "C:control->DMT/_0:control;C:control->DMT/_1:control;" "D->F:1;DMT/_0->F:2;DMT/_1->F:3;DMT/_2->E:2;DMT/_3->E:3;" - "DMT/_4->H:5;E->H;E:1->H:3;E:control->DMT/_4:control;F->H:1;" - "F:1->H:4;G->H:2;H->I:1"); + "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;" + "F:2->H:4;G->H:2;H->I:1"); } // ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it @@ -1060,7 +1060,7 @@ TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) { "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Mul);G(Const);" "H(_MklConcatV2);I(Mul)|A->E;A->I;A:control->DMT/_0:control;" "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:1->H:3;" + "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;" "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;" "G->H:2;H->I:1"); } diff --git a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc index 90bef11164..b01818f746 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass_test.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass_test.cc @@ -173,13 +173,13 @@ TEST_F(MklToTfConversionPass, Positive) { EXPECT_EQ(DoRunMklToTfConversionPass(), "A(Input);B(Input);C(_MklConv2D);D(Input);E(Sub);M(_MklInput);" "Mkl2Tf/_0(_MklToTf);N(_MklInput)|A->C;B->C:1;C->Mkl2Tf/_0;" - "C:1->Mkl2Tf/_0:1;D->E:1;M->C:2;Mkl2Tf/_0->E;N->C:3"); + "C:2->Mkl2Tf/_0:1;D->E:1;M->C:2;Mkl2Tf/_0->E;N->C:3"); } } // MklConv2D followed by MklToTf op followed by Non-Mkl layer. // C=MklConv2D(A,M,B,N); D=MklToTf(C:0, C:1) F=Sub(D,E) (for interleaved) -// C=MklConv2D(A,B,M,N); D=MklToTf(C:0, C:1) F=Sub(D,E) (for contiguous) +// C=MklConv2D(A,B,M,N); D=MklToTf(C:0, C:2) F=Sub(D,E) (for contiguous) // MklToTf node should not be inserted again. TEST_F(MklToTfConversionPass, Negative_DoubleInsert) { if (kTensorOrdering == MklTfTensorOrdering::TENSORS_INTERLEAVED) { @@ -226,7 +226,7 @@ TEST_F(MklToTfConversionPass, Negative_DoubleInsert) { "node { name: 'D' op: '_MklToTf'" " attr { key: 'T' value { type: DT_FLOAT } }" " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['C:0', 'C:1']}" + " input: ['C:0', 'C:2']}" "node { name: 'E' op: 'Input'}" "node { name: 'F' op: 'Sub'" " attr {key: 'T' value { type: DT_FLOAT } }" @@ -234,7 +234,7 @@ TEST_F(MklToTfConversionPass, Negative_DoubleInsert) { EXPECT_EQ(DoRunMklToTfConversionPass(), "A(Input);B(Input);C(_MklConv2D);D(_MklToTf);E(Input);" "F(Sub);M(_MklInput);N(_MklInput)|" - "A->C;B->C:1;C->D;C:1->D:1;D->F;E->F:1;M->C:2;N->C:3"); + "A->C;B->C:1;C->D;C:2->D:1;D->F;E->F:1;M->C:2;N->C:3"); } } diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 082101ce11..8dd8900f28 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2590,7 +2590,9 @@ tf_kernel_library( tf_kernel_library( name = "segment_reduction_ops", prefix = "segment_reduction_ops", - deps = MATH_DEPS, + deps = MATH_DEPS + if_cuda([ + ":cuda_solvers", + ]), ) tf_kernel_library( @@ -3344,6 +3346,19 @@ tf_kernel_library( deps = PARSING_DEPS, ) +tf_cc_test( + name = "parse_tensor_test", + srcs = ["parse_tensor_test.cc"], + deps = [ + ":ops_testutil", + ":parse_tensor_op", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "string_to_number_op", prefix = "string_to_number_op", diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index eb173c7040..6adaecba04 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -18,7 +18,10 @@ limitations under the License. namespace tensorflow { REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, int64, complex64, complex128); -#if defined(__ANDROID_TYPES_SLIM__) +#if !defined(__ANDROID_TYPES_SLIM__) +// Sub op for int8, uint8, int16, uint16 +REGISTER4(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16); +#else // We only register the first type when we have multi-argument calls in the // case where we're trying to reduce executable size, but it turns out that the // int32 version of this op is needed, so explicitly include it. diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index ef7338e0e0..00884d0981 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -97,8 +97,12 @@ class MklConv2DCustomBackpropInputOp : public OpKernel { errors::InvalidArgument( "Conv2DCustomBackpropInput: size must be 4-dim")); - MklSizesToTFSizes(context, data_format, mkl_context.filter_shape, - &filter_shape); + const int64* filter_sizes = + (const int64*)mkl_context.filter_shape.GetSizes(); + const int64 filter_dims = mkl_context.filter_shape.GetDimension(); + + OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( + filter_sizes, filter_dims, &filter_shape)); } else { filter_shape = filter.shape(); } diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 203e694631..5dfce5d5c6 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -265,6 +265,28 @@ class MklConv2DOp : public OpKernel { sizeof(T)); AllocateOutputSetMklShape(context, 0, &output, mkl_output_tf_shape, mkl_output_mkl_shape); + // Filter output to be used in the backprop_input + TensorShape mkl_filter_output_tf_shape; + MklShape mkl_filter_output_mkl_shape; + mkl_filter_output_mkl_shape.SetMklTensor(true); + mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, + dnnResourceFilter); + + size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1), + filter.dim_size(2), filter.dim_size(3)}; + mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes, + mkl_context.filter_strides); + + mkl_filter_output_mkl_shape.SetTfDimOrder(mkl_context.filter_dims, + data_format_); + mkl_filter_output_tf_shape.AddDim( + dnnLayoutGetMemorySize_F32(static_cast<dnnLayout_t>( + mkl_filter_output_mkl_shape.GetMklLayout())) / + sizeof(T)); + AllocateOutputSetMklShape(context, 1, &mkl_context.output_filter, + mkl_filter_output_tf_shape, + mkl_filter_output_mkl_shape); + mkl_context.conv_res[dnnResourceDst] = static_cast<void*>(output->flat<T>().data()); @@ -303,6 +325,7 @@ class MklConv2DOp : public OpKernel { dnnPrimitive_t prim_fwd; void* conv_res[dnnResourceNumber]; dnnLayout_t lt_filter, lt_bias, lt_input; + Tensor* output_filter = nullptr; // Create MKL dnnLayout_t objects for tensors coming into the layer void MklCreateInputLayouts(OpKernelContext* context) { @@ -383,8 +406,8 @@ class MklConv2DOp : public OpKernel { CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_filter, lt_filter, mkl_lt_internal_filter), E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_filter_buf_tensor, - mkl_lt_internal_filter, &mkl_buf_convert_filter); + mkl_buf_convert_filter = const_cast<void*>( + static_cast<const void*>(output_filter->flat<T>().data())); CHECK_EQ( dnnConversionExecute_F32(mkl_prim_convert_filter, mkl_buf_filter, mkl_buf_convert_filter), diff --git a/tensorflow/core/kernels/mkl_reshape_op.cc b/tensorflow/core/kernels/mkl_reshape_op.cc index b3763f17bc..5e98582475 100644 --- a/tensorflow/core/kernels/mkl_reshape_op.cc +++ b/tensorflow/core/kernels/mkl_reshape_op.cc @@ -43,30 +43,26 @@ class MklReshapeOp : public OpKernel { OP_REQUIRES(context, IsLegacyVector(sizes.shape()), errors::InvalidArgument("sizes input must be 1-D, not shape ", sizes.shape().DebugString())); - const int64 num_dims = sizes.NumElements(); // Compute the output shape. Determine product of specified // dimensions, and find the index of the unspecified one. TensorShape shape; int64 product = 1; int unknown_index = -1; - auto vec_size = sizes.flat<int32>(); - for (int d = 0; d < num_dims; ++d) { - const int32 size = vec_size(d); - if (size == -1) { - OP_REQUIRES( - context, unknown_index == -1, - errors::InvalidArgument("only one input size may be -1, not both ", - unknown_index, " and ", d)); - unknown_index = d; - shape.AddDim(1); - } else { - OP_REQUIRES(context, size >= 0, - errors::InvalidArgument( - "size ", d, " must be non-negative, not ", size)); - shape.AddDim(size); - product *= size; - } + switch (sizes.dtype()) { + case DT_INT32: + OP_REQUIRES_OK(context, ValidateSizes<int32>(sizes, &product, + &unknown_index, &shape)); + break; + case DT_INT64: + OP_REQUIRES_OK(context, ValidateSizes<int64>(sizes, &product, + &unknown_index, &shape)); + break; + default: + context->CtxFailure(errors::InvalidArgument( + "desired shape must be a DT_INT32 or DT_INT64 vector, not a ", + DataTypeString(sizes.dtype()))); + return; } if (unknown_index != -1) { OP_REQUIRES( @@ -132,6 +128,35 @@ class MklReshapeOp : public OpKernel { CopyTfTensorInToOutWithShape(context, 0, 0, shape); } } + + private: + template <typename Tshape> + Status ValidateSizes(const Tensor& sizes, int64* product, int* unknown_index, + TensorShape* shape) { + *product = 1; + *unknown_index = -1; + const int64 num_dims = sizes.NumElements(); + auto Svec = sizes.flat<Tshape>(); + for (int d = 0; d < num_dims; ++d) { + const Tshape size = Svec(d); + if (size == -1) { + if (*unknown_index != -1) { + return errors::InvalidArgument( + "Only one input size may be -1, not both ", *unknown_index, + " and ", d); + } + *unknown_index = d; + shape->AddDim(1); + } else if (size < 0) { + return errors::InvalidArgument("Size ", d, + " must be non-negative, not ", size); + } else { + shape->AddDim(size); + (*product) *= size; + } + } + return Status::OK(); + } }; #define REGISTER_MKL_CPU(T) \ @@ -141,6 +166,13 @@ class MklReshapeOp : public OpKernel { .TypeConstraint<T>("T") \ .TypeConstraint<int32>("Tshape") \ .Label(mkl_op_registry::kMklOpLabel), \ + MklReshapeOp<CPUDevice, T>); \ + REGISTER_KERNEL_BUILDER(Name("_MklReshape") \ + .Device(DEVICE_CPU) \ + .HostMemory("shape") \ + .TypeConstraint<T>("T") \ + .TypeConstraint<int64>("Tshape") \ + .Label(mkl_op_registry::kMklOpLabel), \ MklReshapeOp<CPUDevice, T>); TF_CALL_float(REGISTER_MKL_CPU); #undef REGISTER_MKL_CPU diff --git a/tensorflow/core/kernels/parse_tensor_op.cc b/tensorflow/core/kernels/parse_tensor_op.cc index 79199ff5c3..8e175fe8d4 100644 --- a/tensorflow/core/kernels/parse_tensor_op.cc +++ b/tensorflow/core/kernels/parse_tensor_op.cc @@ -16,6 +16,7 @@ limitations under the License. // See docs in ../ops/parsing_ops.cc. #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.h" @@ -65,4 +66,31 @@ class ParseTensorOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("ParseTensor").Device(DEVICE_CPU), ParseTensorOp); +template <typename T> +class SerializeTensorOp : public OpKernel { + public: + using OpKernel::OpKernel; + + void Compute(OpKernelContext* context) override { + const Tensor& tensor = context->input(0); + TensorProto proto; + if (tensor.dtype() == DT_STRING) { + tensor.AsProtoField(&proto); + } else { + tensor.AsProtoTensorContent(&proto); + } + Tensor* proto_string = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({}), &proto_string)); + CHECK(proto.SerializeToString(&proto_string->scalar<string>()())); + } +}; + +#define REGISTER(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("SerializeTensor").Device(DEVICE_CPU).TypeConstraint<T>("T"), \ + SerializeTensorOp<T>); +TF_CALL_ALL_TYPES(REGISTER) +#undef REGISTER + } // namespace tensorflow diff --git a/tensorflow/core/kernels/parse_tensor_test.cc b/tensorflow/core/kernels/parse_tensor_test.cc new file mode 100644 index 0000000000..4a5fc07935 --- /dev/null +++ b/tensorflow/core/kernels/parse_tensor_test.cc @@ -0,0 +1,198 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <memory> +#include <string> +#include <vector> + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/ops_testutil.h" + +namespace tensorflow { +namespace { + +class SerializeTensorOpTest : public OpsTestBase { + protected: + template <typename T> + void MakeOp(const TensorShape& input_shape, std::function<T(int)> functor) { + TF_ASSERT_OK(NodeDefBuilder("myop", "SerializeTensor") + .Input(FakeInput(DataTypeToEnum<T>::value)) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + AddInput<T>(input_shape, functor); + } + void ParseSerializedWithNodeDef(const NodeDef& parse_node_def, + Tensor* serialized, Tensor* parse_output) { + std::unique_ptr<Device> device( + DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0")); + gtl::InlinedVector<TensorValue, 4> inputs; + inputs.push_back({nullptr, serialized}); + Status status; + std::unique_ptr<OpKernel> op(CreateOpKernel(DEVICE_CPU, device.get(), + cpu_allocator(), parse_node_def, + TF_GRAPH_DEF_VERSION, &status)); + TF_EXPECT_OK(status); + OpKernelContext::Params params; + params.device = device.get(); + params.inputs = &inputs; + params.frame_iter = FrameAndIter(0, 0); + params.op_kernel = op.get(); + std::vector<AllocatorAttributes> attrs; + test::SetOutputAttrs(¶ms, &attrs); + OpKernelContext ctx(¶ms); + op->Compute(&ctx); + TF_EXPECT_OK(status); + *parse_output = *ctx.mutable_output(0); + } + template <typename T> + void ParseSerializedOutput(Tensor* serialized, Tensor* parse_output) { + NodeDef parse; + TF_ASSERT_OK(NodeDefBuilder("parse", "ParseTensor") + .Input(FakeInput(DT_STRING)) + .Attr("out_type", DataTypeToEnum<T>::value) + .Finalize(&parse)); + ParseSerializedWithNodeDef(parse, serialized, parse_output); + } +}; + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_half) { + MakeOp<Eigen::half>(TensorShape({10}), [](int x) -> Eigen::half { + return static_cast<Eigen::half>(x / 10.); + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<Eigen::half>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<Eigen::half>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_float) { + MakeOp<float>(TensorShape({1, 10}), + [](int x) -> float { return static_cast<float>(x / 10.); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<float>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<float>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_double) { + MakeOp<double>(TensorShape({5, 5}), + [](int x) -> double { return static_cast<double>(x / 10.); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<double>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<double>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int64) { + MakeOp<int64>(TensorShape({2, 3, 4}), + [](int x) -> int64 { return static_cast<int64>(x - 10); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<int64>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<int64>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int32) { + MakeOp<int32>(TensorShape({4, 2}), + [](int x) -> int32 { return static_cast<int32>(x + 7); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<int32>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<int32>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int16) { + MakeOp<int16>(TensorShape({8}), + [](int x) -> int16 { return static_cast<int16>(x + 18); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<int16>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<int16>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_int8) { + MakeOp<int8>(TensorShape({2}), + [](int x) -> int8 { return static_cast<int8>(x + 8); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<int8>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<int8>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint16) { + MakeOp<uint16>(TensorShape({1, 3}), + [](int x) -> uint16 { return static_cast<uint16>(x + 2); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<uint16>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<uint16>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_uint8) { + MakeOp<uint8>(TensorShape({2, 1, 1}), + [](int x) -> uint8 { return static_cast<uint8>(x + 1); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<uint8>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<uint8>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex64) { + MakeOp<complex64>(TensorShape({}), [](int x) -> complex64 { + return complex64{static_cast<float>(x / 8.), static_cast<float>(x / 2.)}; + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<complex64>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<complex64>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_complex128) { + MakeOp<complex128>(TensorShape({3}), [](int x) -> complex128 { + return complex128{x / 3., x / 2.}; + }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<complex128>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<complex128>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_bool) { + MakeOp<bool>(TensorShape({1}), + [](int x) -> bool { return static_cast<bool>(x % 2); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<bool>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<bool>(parse_output, GetInput(0)); +} + +TEST_F(SerializeTensorOpTest, SerializeTensorOpTest_string) { + MakeOp<string>(TensorShape({10}), + [](int x) -> string { return std::to_string(x / 10.); }); + TF_ASSERT_OK(RunOpKernel()); + Tensor parse_output; + ParseSerializedOutput<string>(GetOutput(0), &parse_output); + test::ExpectTensorEqual<string>(parse_output, GetInput(0)); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 9cdbe89457..5624d5cd1b 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -16,6 +16,9 @@ limitations under the License. // See docs in ../ops/math_ops.cc. #define EIGEN_USE_THREADS +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif // GOOGLE_CUDA #include "tensorflow/core/kernels/segment_reduction_ops.h" #include <vector> @@ -32,6 +35,14 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/util/util.h" +#if GOOGLE_CUDA +#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" +#include "tensorflow/core/kernels/cuda_solvers.h" +#include "tensorflow/core/platform/cuda.h" + +using ::perftools::gputools::cuda::ScopedActivateExecutorContext; +#endif // GOOGLE_CUDA + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -183,6 +194,106 @@ class SegmentReductionOp : public OpKernel { } }; +#ifdef GOOGLE_CUDA +// SegmentSumGPUOp is a segment sum operator implemented for GPU only. +// TODO: This implementation of SegmentSumGPUOp is sometimes slower than +// its unsorted counterpart (mostly when problem size is small). +// This is due to the following two main reasons and a cost-effective way +// to resolve these problems is desirable. +// 1. Sorted segment sum requires a memory transfer from device to host in +// order to know the size of the output dimension whereas unsorted segment +// sum receives the size of the output dimension as an input parameter. +// 2. Sorted segment sum is essentially a tiled version of unsorted segment +// sum and therefore such optimization comes at an inherent cost. However +// such cost may not be justified when the problem size is small. When to +// use the tiled version or the untiled version depends on many factors +// including data alignments, ratio of calculation to memory traffic and +// obviously, the problem sizes. +template <class T, class Index> +class SegmentSumGPUOp : public AsyncOpKernel { + public: + explicit SegmentSumGPUOp(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + + void ComputeAsync(OpKernelContext* context, DoneCallback done) override { + const Tensor& input = context->input(0); + const Tensor& segment_ids = context->input(1); + + OP_REQUIRES_ASYNC( + context, TensorShapeUtils::IsVector(segment_ids.shape()), + errors::InvalidArgument("segment_ids should be a vector."), done); + + const int64 num_indices = segment_ids.NumElements(); + OP_REQUIRES_ASYNC( + context, num_indices == input.dim_size(0), + errors::InvalidArgument( + "segment_ids should be the same size as dimension 0 of" + " input."), + done); + + if (num_indices == 0) { + TensorShape output_shape = input.shape(); + output_shape.set_dim(0, 0); + + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + context, context->allocate_output(0, output_shape, &output), done); + done(); + return; + } + + perftools::gputools::DeviceMemoryBase output_rows_device( + (void*)(segment_ids.template flat<Index>().data() + (num_indices - 1))); + ScratchSpace<Index> output_rows_host(context, 1, /* on_host */ true); + + auto stream = context->op_device_context()->stream(); + OP_REQUIRES_ASYNC( + context, + stream + ->ThenMemcpy(output_rows_host.mutable_data(), output_rows_device, + sizeof(Index)) + .ok(), + errors::Internal( + "SegmentSumGPUOp: failed to copy output_rows from device"), + done); + + functor::SegmentSumFunctor<T, Index> functor_; + auto create_and_check_output = [context, output_rows_host, &input, + &segment_ids, &functor_, done]() { + // Ensure that within the callback, the proper GPU settings are + // configured. + auto stream = context->op_device_context()->stream(); + ScopedActivateExecutorContext scoped_activation{stream->parent()}; + + Index output_rows = *output_rows_host.data(); + output_rows++; + OP_REQUIRES_ASYNC(context, output_rows > 0, + errors::InvalidArgument("segment ids must be >= 0"), + done); + + TensorShape output_shape = input.shape(); + output_shape.set_dim(0, output_rows); + + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + context, context->allocate_output(0, output_shape, &output), done); + + auto output_flat = output->flat_outer_dims<T>(); + auto data_ptr = input.template flat<T>().data(); + auto segment_flat = segment_ids.flat<Index>(); + functor_(context, context->eigen_device<GPUDevice>(), output_rows, + segment_ids.shape(), segment_flat, input.NumElements(), data_ptr, + output_flat); + + done(); + }; + + context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( + stream, create_and_check_output); + } +}; +#endif // GOOGLE_CUDA + #define REGISTER_CPU_KERNEL_SEGMENT(name, functor, type, index_type, \ default_value) \ REGISTER_KERNEL_BUILDER( \ @@ -227,6 +338,23 @@ REGISTER_COMPLEX_CPU_KERNELS_ALL(complex128); #undef REGISTER_REAL_CPU_KERNELS_ALL #undef REGISTER_COMPLEX_CPU_KERNELS_ALL +#if GOOGLE_CUDA +#define REGISTER_GPU_SORTED_KERNELS(type, index_type) \ + REGISTER_KERNEL_BUILDER(Name("SegmentSum") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<index_type>("Tindices"), \ + SegmentSumGPUOp<type, index_type>) + +#define REGISTER_GPU_SORTED_KERNELS_ALL(type) \ + REGISTER_GPU_SORTED_KERNELS(type, int32); \ + REGISTER_GPU_SORTED_KERNELS(type, int64); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_SORTED_KERNELS_ALL); +#undef REGISTER_GPU_SORTED_KERNELS +#undef REGISTER_GPU_SORTED_KERNELS_ALL +#endif // GOOGLE_CUDA + namespace functor { // UnsortedSegmentSumFunctor implementation for CPUDevice. diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index ee09c213b7..412c1d601d 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -26,6 +26,28 @@ namespace tensorflow { class OpKernelContext; namespace functor { + +#ifdef GOOGLE_CUDA +typedef Eigen::GpuDevice GPUDevice; +// Functor for SegmentSumGPUOp. +// 'output_rows': the number of output segments (unique segment ids in +// 'segment_ids'). +// 'segment_ids_shape': shape of 'segment_ids' tensor. +// 'segment_ids': unsorted map from input to output segment ids at which to +// perform segment sum operation. +// 'data_size': size of input data tensor. +// 'data': input data tensor. +// 'output': output reshaped to {output_rows, output.size/output_rows} +template <typename T, typename Index> +struct SegmentSumFunctor { + void operator()(OpKernelContext* ctx, const GPUDevice& d, + const Index output_rows, const TensorShape& segment_ids_shape, + typename TTypes<Index>::ConstFlat segment_ids, + const Index data_size, const T* data, + typename TTypes<T, 2>::Tensor output); +}; +#endif + // BaseFunctor for definition of UnsorteSegmentReductionOp // for usage without templates. template <typename Device, typename T, typename Index> diff --git a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc index b132b1e8f8..159fada621 100644 --- a/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/segment_reduction_ops_gpu.cu.cc @@ -54,6 +54,77 @@ __device__ __forceinline__ void AccumulateInto( CudaAtomicAdd(dest_scalar + 1, value.imag()); } +// SortedSegmentSumFunctor kernel reduces input data just as +// UnsortedSegmentSumCustomKernel does except that input data +// is partitioned along the outer reduction dimension. This is +// because consecutive rows (elements in a row share the same +// outer dimension index) in the flattened 2D input data likely +// belong to the same segment in sorted segment sum operation. +// Therefore such partitioning strategy has two advantages over +// the UnsortedSegmentSumFunctor kernel: +// 1. Each thread reduces across multiple rows before writing +// answers to the global memory, we can therefore +// write reduction results to global memory less often. +// 2. We may know that the current thread is the only contributor +// to an output element because of the increasing nature of segment +// ids. In such cases, we do not need to use atomic operations +// to write results to global memory. +// In the flattened view of input data (with only outer and inner +// dimension), every thread processes a strip of input data of +// size OuterDimTileSize x 1. This strip runs across multiple +// rows of input data and all reduction elements share one inner +// dimension index. +template <typename T, typename Index, int OuterDimTileSize> +__global__ void SortedSegmentSumCustomKernel(const Index input_outer_dim_size, + const Index inner_dim_size, + const Index output_outer_dim_size, + const Index* segment_ids, + const T* input, T* output, + const Index total_stripe_count) { + CUDA_1D_KERNEL_LOOP(stripe_index, total_stripe_count) { + const Index segment_offset = stripe_index % inner_dim_size; + const Index input_outer_dim_index_base = + stripe_index / inner_dim_size * Index(OuterDimTileSize); + + T sum = T(0); + Index first_segment_id = segment_ids[input_outer_dim_index_base]; + Index last_output_segment_id = output_outer_dim_size; + + const Index actual_stripe_height = + min(Index(OuterDimTileSize), + input_outer_dim_size - input_outer_dim_index_base); + for (Index j = 0; j < actual_stripe_height; j++) { + Index current_output_segment_id = + segment_ids[input_outer_dim_index_base + j]; + // Decide whether to write result to global memory. + // Result is only written to global memory if we move + // to another segment. Otherwise we can keep accumulating + // locally. + if (current_output_segment_id > last_output_segment_id) { + const Index output_index = + last_output_segment_id * inner_dim_size + segment_offset; + // decide whether to write result to global memory using atomic + // operations + if (last_output_segment_id == first_segment_id) { + AccumulateInto<T>(output + output_index, sum); + } else { + *(output + output_index) = sum; + } + sum = T(0); + } + sum += ldg(input + (input_outer_dim_index_base + j) * inner_dim_size + + segment_offset); + last_output_segment_id = current_output_segment_id; + } + // For the last result in a strip, always write using atomic operations + // due to possible race conditions with threads computing + // the following strip. + const Index output_index = + last_output_segment_id * inner_dim_size + segment_offset; + AccumulateInto<T>(output + output_index, sum); + } +} + // UnsortedSegmentSumFunctor kernel processes 'input_total_size' elements. // Each element is mapped from input to output by a combination of its // 'segment_ids' mapping and 'inner_dim_size'. @@ -80,6 +151,47 @@ __global__ void UnsortedSegmentSumCustomKernel( namespace functor { +template <typename T, typename Index> +void SegmentSumFunctor<T, Index>::operator()( + OpKernelContext* ctx, const GPUDevice& d, const Index output_rows, + const TensorShape& segment_ids_shape, + typename TTypes<Index>::ConstFlat segment_ids, const Index data_size, + const T* data, typename TTypes<T, 2>::Tensor output) { + if (output.size() == 0) { + return; + } + // Set 'output' to zeros. + CudaLaunchConfig config = GetCudaLaunchConfig(output.size(), d); + SetZero<<<config.block_count, config.thread_per_block, 0, d.stream()>>>( + output.size(), output.data()); + if (data_size == 0 || segment_ids_shape.num_elements() == 0) { + return; + } + + // Launch kernel to compute sorted segment sum. + // Notes: + // *) 'input_total_size' is the total number of elements to process. + // *) 'segment_ids.shape' is a prefix of data's shape. + // *) 'input_outer_dim_size' is the total number of segments to process. + const Index input_total_size = data_size; + const Index input_outer_dim_size = segment_ids.dimension(0); + const Index input_inner_dim_size = input_total_size / input_outer_dim_size; + + const int OuterDimTileSize = 8; + + const Index input_outer_dim_num_stripe = + Eigen::divup(input_outer_dim_size, Index(OuterDimTileSize)); + + const Index total_stripe_count = + input_inner_dim_size * input_outer_dim_num_stripe; + + config = GetCudaLaunchConfig(total_stripe_count, d); + SortedSegmentSumCustomKernel<T, Index, OuterDimTileSize> + <<<config.block_count, config.thread_per_block, 0, d.stream()>>>( + input_outer_dim_size, input_inner_dim_size, output_rows, + segment_ids.data(), data, output.data(), total_stripe_count); +}; + // UnsortedSegmentSumFunctor implementation for GPUDevice. template <typename T, typename Index> struct UnsortedSegmentSumFunctor<GPUDevice, T, Index>: UnsortedSegmentBaseFunctor<GPUDevice, T, Index> { @@ -117,6 +229,15 @@ struct UnsortedSegmentSumFunctor<GPUDevice, T, Index>: UnsortedSegmentBaseFuncto } }; +#define DEFINE_SORTED_GPU_SPECS_INDEX(T, Index) \ + template struct SegmentSumFunctor<T, Index> + +#define DEFINE_SORTED_GPU_SPECS(T) \ + DEFINE_SORTED_GPU_SPECS_INDEX(T, int32); \ + DEFINE_SORTED_GPU_SPECS_INDEX(T, int64); + +TF_CALL_GPU_NUMBER_TYPES(DEFINE_SORTED_GPU_SPECS); + #define DEFINE_GPU_SPECS_INDEX(T, Index) \ template struct UnsortedSegmentSumFunctor<GPUDevice, T, Index> diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 6ff05bd2a6..6eb05874aa 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -499,7 +499,7 @@ Returns x + y element-wise. )doc"); REGISTER_OP("Sub") - .BINARY_FEWER() + .BINARY_MORE() .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn) .Doc(R"doc( Returns x - y element-wise. diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 1ab1f1a736..8a2d5e8c05 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -2791,7 +2791,9 @@ REGISTER_OP("_MklConv2D") .Input("mkl_input: uint8") .Input("mkl_filter: uint8") .Output("output: T") + .Output("filter_output: T") .Output("mkl_output: uint8") + .Output("mkl_filter_output: uint8") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") @@ -2813,7 +2815,9 @@ REGISTER_OP("_MklConv2DWithBias") .Input("mkl_filter: uint8") .Input("mkl_bias: uint8") .Output("output: T") + .Output("filter_output: T") .Output("mkl_output: uint8") + .Output("mkl_filter_output: uint8") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr("use_cudnn_on_gpu: bool = true") diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 3a28ce3767..35c31c6cb8 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -15805,6 +15805,25 @@ op { summary: "Transforms a serialized tensorflow.TensorProto proto into a Tensor." } op { + name: "SerializeTensor" + input_arg { + name: "tensor" + description: "A Tensor of type `T`." + type: "T" + } + output_arg { + name: "serialized" + description: "A serialized TensorProto proto of the input tensor." + type_attr: DT_STRING + } + attr { + name: "T" + type: "type" + description: "The type of the input tensor." + } + summary: "Transforms a Tensor into a serialized TensorProto proto." +} +op { name: "Placeholder" output_arg { name: "output" diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc index 2e605fdffc..1f7ebe91cf 100644 --- a/tensorflow/core/ops/parsing_ops.cc +++ b/tensorflow/core/ops/parsing_ops.cc @@ -292,6 +292,19 @@ out_type: The type of the serialized tensor. The provided type must match the output: A Tensor of type `out_type`. )doc"); +REGISTER_OP("SerializeTensor") + .Input("tensor: T") + .Output("serialized: string") + .Attr("T: type") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Transforms a Tensor into a serialized TensorProto proto. + +tensor: A Tensor of type `T`. +T: The type of the input tensor. +serialized: A serialized TensorProto proto of the input tensor. +)doc"); + REGISTER_OP("DecodeJSONExample") .Input("json_examples: string") .Output("binary_examples: string") diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index ac0988e704..7127db3929 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -14,7 +14,6 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/platform/default/logging.h" -#include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/env_time.h" #include "tensorflow/core/platform/macros.h" @@ -25,12 +24,8 @@ limitations under the License. #endif #include <stdlib.h> -#include <string.h> #include <time.h> -#include <string> -#include <unordered_map> - namespace tensorflow { namespace internal { @@ -129,48 +124,6 @@ int64 MinVLogLevelFromEnv() { return LogLevelStrToInt(tf_env_var_val); } -using VmoduleMap = std::unordered_map<StringPiece, int, StringPiece::Hasher>; - -// Returns a mapping from module name to VLOG level, derived from the -// TF_CPP_VMOUDLE environment variable; ownership is transferred to the caller. -VmoduleMap* VmoduleRecordsFromEnv() { - // The value of the env var is supposed to be of the form: - // "foo=1,bar=2,baz=3" - const char* tf_env_var_val = getenv("TF_CPP_VMODULE"); - auto* result = new VmoduleMap(); - if (tf_env_var_val == nullptr) return result; - while (true) { - const char* eq = strchr(tf_env_var_val, '='); - if (eq == nullptr) break; - const char* after_eq = eq + 1; - - // Comma either points at the next comma delimiter, or at a null terminator. - // We check that the integer we parse ends at this delimiter. - const char* comma = strchr(after_eq, ','); - const char* new_tf_env_var_val; - if (comma == nullptr) { - comma = strchr(after_eq, '\0'); - new_tf_env_var_val = comma; - } else { - new_tf_env_var_val = comma + 1; - } - - char* endptr = nullptr; - int level = strtol(after_eq, &endptr, 10); - if (endptr != comma) { - fprintf(stderr, - "warning: could not parse integer in vmodule specification in " - "\"%s\".\n", - after_eq); - break; - } - StringPiece module(tf_env_var_val, eq - tf_env_var_val); - tf_env_var_val = new_tf_env_var_val; - (*result)[module] = level; - } - return result; -} - } // namespace LogMessage::~LogMessage() { @@ -184,19 +137,6 @@ int64 LogMessage::MinVLogLevel() { return min_vlog_level; } -bool LogMessage::VmoduleActivated(const char* fname, int lvl) { - static VmoduleMap* vmodule_records = VmoduleRecordsFromEnv(); - const char* last_slash = strrchr(fname, '/'); - const char* module_start = last_slash == nullptr ? fname : last_slash + 1; - const char* dot_after = strchr(module_start, '.'); - const char* module_limit = - dot_after == nullptr ? strchr(fname, '\0') : dot_after; - StringPiece module(module_start, module_limit - module_start); - auto it = vmodule_records->find(module); - if (it == vmodule_records->end()) return false; - return it->second >= lvl; -} - LogMessageFatal::LogMessageFatal(const char* file, int line) : LogMessage(file, line, FATAL) {} LogMessageFatal::~LogMessageFatal() { diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index c8c9b2da11..d5f7350cdd 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -46,16 +46,6 @@ class LogMessage : public std::basic_ostringstream<char> { // but VLOG(3) will not. Defaults to 0. static int64 MinVLogLevel(); - // Returns whether VLOG level lvl is activated for the file fname. - // - // E.g. if the environment variable TF_CPP_VMODULE contains foo=3 and fname is - // foo.cc and lvl is <= 3, this will return true. - // - // It is expected that the result of this query will be cached in the VLOG-ing - // call site to avoid repeated lookups. This routine performs a hash-map - // access against the VLOG-ing specification provided by the env var. - static bool VmoduleActivated(const char* fname, int lvl); - protected: void GenerateLogMessage(); @@ -86,38 +76,18 @@ class LogMessageFatal : public LogMessage { #define LOG(severity) _TF_LOG_##severity -#if defined(IS_MOBILE_PLATFORM) - +#ifdef IS_MOBILE_PLATFORM // Turn VLOG off when under mobile devices for considerations of binary size. -#define _VLOG_IS_ON(lvl, file) ((lvl) <= 0) - -#elif defined(PLATFORM_WINDOWS) - -// TODO(b/64279502) The _VLOG_IS_ON definition below appears to cause MSVC to -// fatal error, so we fall back to the vmodule-less implementation for now. -#define _VLOG_IS_ON(lvl, file) \ - ((lvl) <= ::tensorflow::internal::LogMessage::MinVLogLevel()) - +#define VLOG_IS_ON(lvl) ((lvl) <= 0) #else - -// Otherwise, set TF_CPP_MIN_VLOG_LEVEL environment to update minimum log level -// of VLOG, or TF_CPP_VMODULE to set the minimum log level for individual -// translation units. -#define _VLOG_IS_ON(lvl, file) \ - (([](int level, const char* fname) { \ - if (level <= ::tensorflow::internal::LogMessage::MinVLogLevel()) \ - return true; \ - static bool vmodule_activated = \ - ::tensorflow::internal::LogMessage::VmoduleActivated(fname, level); \ - return vmodule_activated; \ - })(lvl, file)) - +// Otherwise, Set TF_CPP_MIN_VLOG_LEVEL environment to update minimum log level +// of VLOG +#define VLOG_IS_ON(lvl) \ + ((lvl) <= ::tensorflow::internal::LogMessage::MinVLogLevel()) #endif -#define VLOG_IS_ON(lvl) _VLOG_IS_ON(lvl, __FILE__) - -#define VLOG(lvl) \ - if (TF_PREDICT_FALSE(_VLOG_IS_ON(lvl, __FILE__))) \ +#define VLOG(lvl) \ + if (TF_PREDICT_FALSE(VLOG_IS_ON(lvl))) \ ::tensorflow::internal::LogMessage(__FILE__, __LINE__, tensorflow::INFO) // CHECK dies with a fatal error if condition is not true. It is *not* diff --git a/tensorflow/core/profiler/g3doc/command_line.md b/tensorflow/core/profiler/g3doc/command_line.md index e2839a682f..fb4207c784 100644 --- a/tensorflow/core/profiler/g3doc/command_line.md +++ b/tensorflow/core/profiler/g3doc/command_line.md @@ -57,7 +57,7 @@ Note: this feature is not well maintained now. ```shell # Build the tool. -bazel build --config opt third_party/tensorflow/core/profiler/... +bazel build --config opt tensorflow/core/profiler:profiler # Help information, including detail 'option' instructions. bazel-bin/tensorflow/core/profiler/profiler help diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 194649a304..4991783a53 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -37,6 +37,7 @@ Asia: * [TensorFlow Korea (TF-KR) User Group](https://www.facebook.com/groups/TensorFlowKR/) _(Korean language)_ * [TensorFlow User Group Tokyo](https://tfug-tokyo.connpass.com/) _(Japanese Language)_ * [Soleil Data Dojo](https://soleildatadojo.connpass.com/) _(Japanese language)_ +* [TensorFlow User Group Utsunomiya](https://tfug-utsunomiya.connpass.com/) Europe: diff --git a/tensorflow/docs_src/get_started/estimator.md b/tensorflow/docs_src/get_started/estimator.md index a55454f8af..4f3a438d17 100644 --- a/tensorflow/docs_src/get_started/estimator.md +++ b/tensorflow/docs_src/get_started/estimator.md @@ -273,9 +273,7 @@ Then, the code creates a `DNNClassifier` model using the following arguments: containing 10, 20, and 10 neurons, respectively. * `n_classes=3`. Three target classes, representing the three Iris species. * `model_dir=/tmp/iris_model`. The directory in which TensorFlow will save - checkpoint data during model training. For more on logging and monitoring - with TensorFlow, see - @{$monitors$Logging and Monitoring Basics with tf.estimator}. + checkpoint data and TensorBoard summaries during model training. ## Describe the training input pipeline {#train-input} @@ -315,9 +313,7 @@ classifier.train(input_fn=train_input_fn, steps=1000) However, if you're looking to track the model while it trains, you'll likely want to instead use a TensorFlow @{tf.train.SessionRunHook$`SessionRunHook`} -to perform logging operations. See the tutorial -@{$monitors$Logging and Monitoring Basics with tf.estimator} -for more on this topic. +to perform logging operations. ## Evaluate Model Accuracy {#evaluate-accuracy} diff --git a/tensorflow/docs_src/get_started/index.md b/tensorflow/docs_src/get_started/index.md index 3e700daa30..003fac1a28 100644 --- a/tensorflow/docs_src/get_started/index.md +++ b/tensorflow/docs_src/get_started/index.md @@ -24,8 +24,6 @@ To learn about the high-level API, read the following guides: API. * @{$get_started/input_fn$Building Input Functions}, which takes you into a somewhat more sophisticated use of this API. - * @{$get_started/monitors$Logging and Monitoring Basics with tf.contrib.learn}, - which explains how to audit the progress of model training. TensorBoard is a utility to visualize different aspects of machine learning. The following guides explain how to use TensorBoard: diff --git a/tensorflow/docs_src/get_started/input_fn.md b/tensorflow/docs_src/get_started/input_fn.md index 422f45c586..7706c07b1d 100644 --- a/tensorflow/docs_src/get_started/input_fn.md +++ b/tensorflow/docs_src/get_started/input_fn.md @@ -249,7 +249,7 @@ here](https://www.tensorflow.org/code/tensorflow/examples/tutorials/input_fn/bos ### Importing the Housing Data -To start, set up your imports (including `pandas` and `tensorflow`) and @{$monitors#enabling-logging-with-tensorflow$set logging verbosity} to +To start, set up your imports (including `pandas` and `tensorflow`) and set logging verbosity to `INFO` for more detailed log output: ```python diff --git a/tensorflow/docs_src/get_started/leftnav_files b/tensorflow/docs_src/get_started/leftnav_files index b656033f7e..bb67eaddda 100644 --- a/tensorflow/docs_src/get_started/leftnav_files +++ b/tensorflow/docs_src/get_started/leftnav_files @@ -5,7 +5,6 @@ mnist/pros.md mnist/mechanics.md estimator.md input_fn.md -monitors.md summaries_and_tensorboard.md graph_viz.md tensorboard_histograms.md diff --git a/tensorflow/java/BUILD b/tensorflow/java/BUILD index 64b3767735..4680e3ba16 100644 --- a/tensorflow/java/BUILD +++ b/tensorflow/java/BUILD @@ -5,7 +5,9 @@ package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # Apache 2.0 -load("build_defs", "JAVACOPTS") +load(":build_defs.bzl", "JAVACOPTS") +load(":src/gen/gen_ops.bzl", "tf_java_op_gen_srcjar") +load("//tensorflow:tensorflow.bzl", "tf_copts") java_library( name = "tensorflow", @@ -34,12 +36,57 @@ filegroup( filegroup( name = "java_op_sources", - srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]), + srcs = glob(["src/main/java/org/tensorflow/op/**/*.java"]) + [ + ":java_op_gen_sources", + ], visibility = [ "//tensorflow/java:__pkg__", ], ) +tf_java_op_gen_srcjar( + name = "java_op_gen_sources", + gen_base_package = "org.tensorflow.op", + gen_tool = "java_op_gen_tool", + ops_libs = [ + "array_ops", + "candidate_sampling_ops", + "control_flow_ops", + "data_flow_ops", + "image_ops", + "io_ops", + "linalg_ops", + "logging_ops", + "math_ops", + "nn_ops", + "no_op", + "parsing_ops", + "random_ops", + "sparse_ops", + "state_ops", + "string_ops", + "training_ops", + "user_ops", + ], +) + +# Build the gen tool as a library, as it will be linked to a core/ops binary +# file before making it an executable. See tf_java_op_gen_srcjar(). +cc_library( + name = "java_op_gen_tool", + srcs = glob([ + "src/gen/cc/*.h", + "src/gen/cc/*.cc", + ]), + copts = tf_copts(), + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + ], +) + java_library( name = "testutil", testonly = 1, diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc new file mode 100644 index 0000000000..a7c66dda89 --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -0,0 +1,84 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==============================================================================*/ + +#include <string> +#include <vector> + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/java/src/gen/cc/op_generator.h" + +namespace tensorflow { +namespace op_gen { + +const char kUsageHeader[] = + "\n\nGenerator of operation wrappers in Java.\n\n" + "This executable generates wrappers for all registered operations it has " + "been compiled with. A wrapper exposes an intuitive and strongly-typed\n" + "interface for building its underlying operation and linking it into a " + "graph.\n\n" + "Operation wrappers are generated under the path specified by the " + "'--output_dir' argument. This path can be absolute or relative to the\n" + "current working directory and will be created if it does not exists.\n\n" + "The '--lib_name' argument is used to classify the set of operations. If " + "the chosen name contains more than one word, it must be provided in \n" + "snake_case. This value is declined into other meaningful names, such as " + "the group and package of the generated operations. For example,\n" + "'--lib_name=my_lib' generates the operations under the " + "'org.tensorflow.op.mylib' package and add them to the 'myLib()' operator\n" + "group.\n\n" + "Note that the operator group assigned to the generated wrappers is just " + "an annotation tag at this stage. Operations will not be available " + "through\n" + "the 'org.tensorflow.op.Ops' API as a group until the generated classes " + "are compiled using an appropriate annotation processor.\n\n" + "Finally, the '--base_package' overrides the default parent package " + "under which the generated subpackage and classes are to be located.\n\n"; + +} // namespace op_gen +} // namespace tensorflow + +int main(int argc, char* argv[]) { + tensorflow::string lib_name; + tensorflow::string output_dir; + tensorflow::string base_package = "org.tensorflow.op"; + std::vector<tensorflow::Flag> flag_list = { + tensorflow::Flag("output_dir", &output_dir, + "Root directory into which output files are generated"), + tensorflow::Flag( + "lib_name", &lib_name, + "A name, in snake_case, used to classify this set of operations"), + tensorflow::Flag( + "base_package", &base_package, + "Package parent to the generated subpackage and classes")}; + tensorflow::string usage = tensorflow::op_gen::kUsageHeader; + usage += tensorflow::Flags::Usage(argv[0], flag_list); + bool parsed_flags_ok = tensorflow::Flags::Parse(&argc, argv, flag_list); + tensorflow::port::InitMain(usage.c_str(), &argc, &argv); + QCHECK(parsed_flags_ok && !lib_name.empty() && !output_dir.empty()) << usage; + + tensorflow::OpGenerator generator; + tensorflow::OpList ops; + tensorflow::OpRegistry::Global()->Export(true, &ops); + tensorflow::Status status = + generator.Run(ops, lib_name, base_package, output_dir); + TF_QCHECK_OK(status); + + return 0; +} diff --git a/tensorflow/java/src/gen/cc/op_generator.cc b/tensorflow/java/src/gen/cc/op_generator.cc new file mode 100644 index 0000000000..df130c32e6 --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_generator.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <string> + +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/java/src/gen/cc/op_generator.h" + +namespace tensorflow { +namespace { + +string CamelCase(const string& str, char delimiter, bool upper) { + string result; + bool cap = upper; + for (string::const_iterator it = str.begin(); it != str.end(); ++it) { + const char c = *it; + if (c == delimiter) { + cap = true; + } else if (cap) { + result += toupper(c); + cap = false; + } else { + result += c; + } + } + return result; +} + +} // namespace + +OpGenerator::OpGenerator() : env(Env::Default()) {} + +OpGenerator::~OpGenerator() {} + +Status OpGenerator::Run(const OpList& ops, const string& lib_name, + const string& base_package, const string& output_dir) { + const string package = + base_package + '.' + str_util::StringReplace(lib_name, "_", "", true); + const string package_path = + output_dir + '/' + str_util::StringReplace(package, ".", "/", true); + const string group = CamelCase(lib_name, '_', false); + + if (!env->FileExists(package_path).ok()) { + TF_CHECK_OK(env->RecursivelyCreateDir(package_path)); + } + + LOG(INFO) << "Generating Java wrappers for '" << lib_name << "' operations"; + // TODO(karllessard) generate wrappers from list of ops + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/java/src/gen/cc/op_generator.h b/tensorflow/java/src/gen/cc/op_generator.h new file mode 100644 index 0000000000..eec1082b51 --- /dev/null +++ b/tensorflow/java/src/gen/cc/op_generator.h @@ -0,0 +1,51 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ +#define TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ + +#include <string> + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { + +/// \brief A generator of Java operation wrappers. +/// +/// Such generator is normally ran only once per executable, outputting +/// wrappers for the all registered operations it has been compiled with. +/// Nonetheless, it is designed to support multiple runs, giving a different +/// list of operations on each cycle. +class OpGenerator { + public: + OpGenerator(); + virtual ~OpGenerator(); + + /// \brief Generates wrappers for the given list of 'ops'. + /// + /// Output files are generated in <output_dir>/<base_package>/<lib_package>, + /// where 'lib_package' is derived from 'lib_name'. + Status Run(const OpList& ops, const string& lib_name, + const string& base_package, const string& output_dir); + + private: + Env* env; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_JAVA_SRC_GEN_CC_OP_GENERATOR_H_ diff --git a/tensorflow/java/src/gen/gen_ops.bzl b/tensorflow/java/src/gen/gen_ops.bzl new file mode 100644 index 0000000000..e3710c49d0 --- /dev/null +++ b/tensorflow/java/src/gen/gen_ops.bzl @@ -0,0 +1,59 @@ +# -*- Python -*- + +load("//tensorflow:tensorflow.bzl", "tf_copts") + +# Given a list of "ops_libs" (a list of files in the core/ops directory +# without their .cc extensions), generate Java wrapper code for all operations +# found in the ops files. +# Then, combine all those source files into a single archive (.srcjar). +# +# For example: +# tf_java_op_gen_srcjar("gen_sources", "gen_tool", "my.package", [ "array_ops", "math_ops" ]) +# +# will create a genrule named "gen_sources" that first generate source files: +# ops/src/main/java/my/package/array/*.java +# ops/src/main/java/my/package/math/*.java +# +# and then archive those source files in: +# ops/gen_sources.srcjar +# +def tf_java_op_gen_srcjar(name, + gen_tool, + gen_base_package, + ops_libs=[], + ops_libs_pkg="//tensorflow/core", + out_dir="ops/", + out_src_dir="src/main/java/", + visibility=["//tensorflow/java:__pkg__"]): + + gen_tools = [] + gen_cmds = ["rm -rf $(@D)"] # Always start from fresh when generating source files + + # Construct an op generator binary for each ops library. + for ops_lib in ops_libs: + gen_lib = ops_lib[:ops_lib.rfind("_")] + out_gen_tool = out_dir + ops_lib + "_gen_tool" + + native.cc_binary( + name=out_gen_tool, + copts=tf_copts(), + linkopts=["-lm"], + linkstatic=1, # Faster to link this one-time-use binary dynamically + deps=[gen_tool, ops_libs_pkg + ":" + ops_lib + "_op_lib"]) + + gen_tools += [":" + out_gen_tool] + gen_cmds += ["$(location :" + out_gen_tool + ")" + + " --output_dir=$(@D)/" + out_src_dir + + " --lib_name=" + gen_lib + + " --base_package=" + gen_base_package] + + # Generate a source archive containing generated code for these ops. + gen_srcjar = out_dir + name + ".srcjar" + gen_cmds += ["$(location @local_jdk//:jar) cMf $(location :" + gen_srcjar + ") -C $(@D) ."] + gen_tools += ["@local_jdk//:jar"] + + native.genrule( + name=name, + outs=[gen_srcjar], + tools=gen_tools, + cmd="&&".join(gen_cmds)) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index c46a3d8db3..a526856794 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -659,14 +659,25 @@ void GenEagerPythonOp::AddEagerExecute(const string& num_outputs_expr) { string GetEagerPythonOps(const OpList& ops, const std::vector<string>& hidden_ops, - bool require_shapes) { + bool require_shapes, + const string& source_file_name = "") { string result; // Header // TODO(josh11b): Mention the library for which wrappers are being generated. - strings::StrAppend(&result, R"("""Python wrappers for TensorFlow ops. + strings::StrAppend(&result, R"("""Python wrappers around TensorFlow ops. This file is MACHINE GENERATED! Do not edit. -""" +)"); + + // Mention the original source file so someone tracing back through generated + // Python code will know where to look next. + if (!source_file_name.empty()) { + strings::StrAppend(&result, "Original C++ source file: "); + strings::StrAppend(&result, source_file_name); + strings::StrAppend(&result, "\n"); + } + + strings::StrAppend(&result, R"(""" import collections as _collections @@ -747,8 +758,10 @@ from tensorflow.python.framework import op_def_library as _op_def_library void PrintEagerPythonOps(const OpList& ops, const std::vector<string>& hidden_ops, - bool require_shapes) { - printf("%s", GetEagerPythonOps(ops, hidden_ops, require_shapes).c_str()); + bool require_shapes, const string& source_file_name) { + printf("%s", + GetEagerPythonOps(ops, hidden_ops, require_shapes, source_file_name) + .c_str()); } string GetEagerPythonWrappers(const char* op_list_buf, size_t op_list_len) { diff --git a/tensorflow/python/eager/python_eager_op_gen.h b/tensorflow/python/eager/python_eager_op_gen.h index 9a7ed28cf9..250623850f 100644 --- a/tensorflow/python/eager/python_eager_op_gen.h +++ b/tensorflow/python/eager/python_eager_op_gen.h @@ -24,9 +24,12 @@ namespace tensorflow { // hidden_ops should be a list of Op names that should get a leading _ // in the output. Prints the output to stdout. +// Optional fourth argument is the name of the original C++ source file +// where the ops' REGISTER_OP() calls reside. void PrintEagerPythonOps(const OpList& ops, const std::vector<string>& hidden_ops, - bool require_shapes); + bool require_shapes, + const string& source_file_name = ""); // Get the python wrappers for a list of ops in a OpList. // `op_list_buf` should be a pointer to a buffer containing diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index a8434d0c99..965b35bc4c 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2474,6 +2474,9 @@ class _IndicatorColumn(_DenseColumn, sp_ids=id_tensor, sp_values=weight_tensor, vocab_size=int(self._variable_shape[-1])) + # Remove (?, -1) index + weighted_column = sparse_ops.sparse_slice(weighted_column, [0, 0], + weighted_column.dense_shape) return sparse_ops.sparse_tensor_to_dense(weighted_column) dense_id_tensor = sparse_ops.sparse_tensor_to_dense( diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 3057776391..626879f76a 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -3213,13 +3213,39 @@ class IndicatorColumnTest(test.TestCase): weights = fc.weighted_categorical_column(ids, 'weights') indicator = fc.indicator_column(weights) features = { - 'ids': constant_op.constant(['c', 'b', 'a'], shape=(1, 3)), - 'weights': constant_op.constant([2., 4., 6.], shape=(1, 3)) + 'ids': constant_op.constant([['c', 'b', 'a']]), + 'weights': constant_op.constant([[2., 4., 6.]]) } indicator_tensor = _transform_features(features, [indicator])[indicator] with _initialized_session(): self.assertAllEqual([[6., 4., 2.]], indicator_tensor.eval()) + def test_transform_with_missing_value_in_weighted_column(self): + # Github issue 12583 + ids = fc.categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + weights = fc.weighted_categorical_column(ids, 'weights') + indicator = fc.indicator_column(weights) + features = { + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + 'weights': constant_op.constant([[2., 4., 6.]]) + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[0., 4., 2.]], indicator_tensor.eval()) + + def test_transform_with_missing_value_in_categorical_column(self): + # Github issue 12583 + ids = fc.categorical_column_with_vocabulary_list( + key='ids', vocabulary_list=('a', 'b', 'c')) + indicator = fc.indicator_column(ids) + features = { + 'ids': constant_op.constant([['c', 'b', 'unknown']]), + } + indicator_tensor = _transform_features(features, [indicator])[indicator] + with _initialized_session(): + self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval()) + def test_linear_model(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) diff --git a/tensorflow/python/framework/python_op_gen_main.cc b/tensorflow/python/framework/python_op_gen_main.cc index 8366542288..f681daa7e4 100644 --- a/tensorflow/python/framework/python_op_gen_main.cc +++ b/tensorflow/python/framework/python_op_gen_main.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/inputbuffer.h" +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/init_main.h" @@ -80,7 +81,29 @@ Status ParseOpListCommandLine(const char* arg, std::vector<string>* op_list) { return Status::OK(); } -void PrintAllPythonOps(const std::vector<string>& op_list, bool require_shapes, +// Use the name of the current executable to infer the C++ source file +// where the REGISTER_OP() call for the operator can be found. +// Returns the name of the file. +// Returns an empty string if the current executable's name does not +// follow a known pattern. +string InferSourceFileName(const char* argv_zero) { + StringPiece command_str = io::Basename(argv_zero); + + // For built-in ops, the Bazel build creates a separate executable + // with the name gen_<op type>_ops_py_wrappers_cc containing the + // operators defined in <op type>_ops.cc + const char* kExecPrefix = "gen_"; + const char* kExecSuffix = "_py_wrappers_cc"; + if (command_str.Consume(kExecPrefix) && command_str.ends_with(kExecSuffix)) { + command_str.remove_suffix(strlen(kExecSuffix)); + return strings::StrCat(command_str, ".cc"); + } else { + return string(""); + } +} + +void PrintAllPythonOps(const std::vector<string>& op_list, + const string& source_file_name, bool require_shapes, bool op_list_is_whitelist) { OpList ops; OpRegistry::Global()->Export(false, &ops); @@ -93,9 +116,9 @@ void PrintAllPythonOps(const std::vector<string>& op_list, bool require_shapes, *pruned_ops.mutable_op()->Add() = op_def; } } - PrintEagerPythonOps(pruned_ops, {}, require_shapes); + PrintEagerPythonOps(pruned_ops, {}, require_shapes, source_file_name); } else { - PrintEagerPythonOps(ops, op_list, require_shapes); + PrintEagerPythonOps(ops, op_list, require_shapes, source_file_name); } } @@ -105,20 +128,26 @@ void PrintAllPythonOps(const std::vector<string>& op_list, bool require_shapes, int main(int argc, char* argv[]) { tensorflow::port::InitMain(argv[0], &argc, &argv); + tensorflow::string source_file_name = + tensorflow::InferSourceFileName(argv[0]); + // Usage: // gen_main [ @FILENAME | OpName[,OpName]* ] (0 | 1) [0 | 1] if (argc == 2) { - tensorflow::PrintAllPythonOps({}, {}, tensorflow::string(argv[1]) == "1"); + tensorflow::PrintAllPythonOps({}, source_file_name, + tensorflow::string(argv[1]) == "1", + false /* op_list_is_whitelist */); } else if (argc == 3) { std::vector<tensorflow::string> hidden_ops; TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &hidden_ops)); - tensorflow::PrintAllPythonOps(hidden_ops, + tensorflow::PrintAllPythonOps(hidden_ops, source_file_name, tensorflow::string(argv[2]) == "1", false /* op_list_is_whitelist */); } else if (argc == 4) { std::vector<tensorflow::string> op_list; TF_CHECK_OK(tensorflow::ParseOpListCommandLine(argv[1], &op_list)); - tensorflow::PrintAllPythonOps(op_list, tensorflow::string(argv[2]) == "1", + tensorflow::PrintAllPythonOps(op_list, source_file_name, + tensorflow::string(argv[2]) == "1", tensorflow::string(argv[3]) == "1"); } else { return -1; diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index eea3d28a7e..745428e530 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -236,7 +236,8 @@ def _FilterTuple(v): def _FilterInt(v): if isinstance(v, (list, tuple)): return _FirstNotNone([_FilterInt(x) for x in v]) - return None if isinstance(v, compat.integral_types) else _NotNone(v) + return None if isinstance(v, (compat.integral_types, + tensor_shape.Dimension)) else _NotNone(v) def _FilterFloat(v): diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index 2760f98a6b..f66af3adc6 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -314,6 +314,17 @@ class TensorUtilTest(test.TestCase): shape=[3, 4], dtype=dtype))) + def testIntMixedWithDimension(self): + # Github issue: 11974 + dtype = dtypes.int32 + nptype = np.int32 + t = tensor_util.make_tensor_proto( + [10, tensor_shape.Dimension(20), 30], dtype=dtype) + self.assertEquals(dtype, t.dtype) + a = tensor_util.MakeNdarray(t) + self.assertEquals(nptype, a.dtype) + self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a) + def testLong(self): t = tensor_util.make_tensor_proto(10, dtype=dtypes.int64) self.assertProtoEquals(""" diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 43827b0d10..d9c5f3bce9 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -684,13 +684,15 @@ cuda_py_test( tf_py_test( name = "segment_reduction_ops_test", - size = "small", + size = "medium", srcs = ["segment_reduction_ops_test.py"], additional_deps = [ "//third_party/py/numpy", + "//tensorflow/python:client", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", + "//tensorflow/python:variables", "//tensorflow/python:nn_grad", ], ) diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 33269c9123..bf20f5d1a9 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -18,13 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools + import numpy as np +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib +from tensorflow.python.framework import ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops -import tensorflow.python.ops.nn_grad # pylint: disable=unused-import +from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -107,19 +111,19 @@ class SegmentReductionOpTest(SegmentReductionHelper): curr_ops_list = complex_ops_list else: curr_ops_list = ops_list - - with self.test_session(use_gpu=False): - tf_x, np_x = self._input(shape, dtype=dtype) - for np_op1, np_op2, tf_op in curr_ops_list: - np_ans = self._segmentReduce(indices, np_x, np_op1, np_op2) - s = tf_op(data=tf_x, segment_ids=indices) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) - # NOTE(mrry): The static shape inference that computes - # `tf_ans.shape` can only infer that sizes from dimension 1 - # onwards, because the size of dimension 0 is data-dependent - # and may therefore vary dynamically. - self.assertAllEqual(np_ans.shape[1:], tf_ans.shape[1:]) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, np_x = self._input(shape, dtype=dtype) + for np_op1, np_op2, tf_op in curr_ops_list: + np_ans = self._segmentReduce(indices, np_x, np_op1, np_op2) + s = tf_op(data=tf_x, segment_ids=indices) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) + # NOTE(mrry): The static shape inference that computes + # `tf_ans.shape` can only infer that sizes from dimension 1 + # onwards, because the size of dimension 0 is data-dependent + # and may therefore vary dynamically. + self.assertAllEqual(np_ans.shape[1:], tf_ans.shape[1:]) def testSegmentIdsShape(self): shape = [4, 4] @@ -130,41 +134,45 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsSize(self): shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 1] - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - with self.assertRaisesOpError("segment_ids should be the same size"): - s.eval() + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape) + indices = [0, 1] + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + with self.assertRaisesOpError("segment_ids should be the same size"): + s.eval() def testSegmentIdsValid(self): # This is a baseline for the following SegmentIdsInvalid* tests. shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 0, 0, 1] - result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval() - self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]], result) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 0, 1] + result = math_ops.segment_sum(data=tf_x, segment_ids=indices).eval() + self.assertAllEqual([[15, 18, 21, 24], [13, 14, 15, 16]], result) def testSegmentIdsGreaterThanZero(self): shape = [4, 4] - with self.test_session(): - tf_x, np_x = self._input(shape) - indices = [1, 1, 2, 2] - np_ans = self._segmentReduce(indices, np_x, np.add) - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32) + indices = [1, 1, 2, 2] + np_ans = self._segmentReduce(indices, np_x, np.add) + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) def testSegmentIdsHole(self): shape = [4, 4] - with self.test_session(): - tf_x, np_x = self._input(shape) - indices = [0, 0, 3, 3] - np_ans = self._segmentReduce(indices, np_x, np.add) - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - tf_ans = s.eval() - self.assertAllClose(np_ans, tf_ans) + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, np_x = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 3, 3] + np_ans = self._segmentReduce(indices, np_x, np.add) + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + tf_ans = s.eval() + self.assertAllClose(np_ans, tf_ans) def testSegmentIdsInvalid1(self): shape = [4, 4] @@ -199,21 +207,23 @@ class SegmentReductionOpTest(SegmentReductionHelper): def testSegmentIdsInvalid4(self): shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 0, 0, -1] - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - with self.assertRaisesOpError("segment ids must be >= 0"): - s.eval() + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 0, -1] + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + with self.assertRaisesOpError("segment ids must be >= 0"): + s.eval() def testSegmentIdsInvalid5(self): shape = [4, 4] - with self.test_session(): - tf_x, _ = self._input(shape) - indices = [0, 0, 0, -2] - s = math_ops.segment_sum(data=tf_x, segment_ids=indices) - with self.assertRaisesOpError("segment ids must be >= 0"): - s.eval() + for use_gpu in [True, False]: + with self.test_session(use_gpu=use_gpu): + tf_x, _ = self._input(shape, dtype=dtypes_lib.float32) + indices = [0, 0, 0, -2] + s = math_ops.segment_sum(data=tf_x, segment_ids=indices) + with self.assertRaisesOpError("segment ids must be >= 0"): + s.eval() def testGradient(self): shape = [4, 4] @@ -340,8 +350,8 @@ class UnsortedSegmentSumTest(SegmentReductionHelper): shape = indices.shape + (num_cols,) with self.test_session(use_gpu=True): tf_x, np_x = self._input(shape, dtype=dtypes_lib.float64) - s = math_ops.unsorted_segment_max(data=tf_x, segment_ids=indices, - num_segments=num_segments) + s = math_ops.unsorted_segment_max( + data=tf_x, segment_ids=indices, num_segments=num_segments) jacob_t, jacob_n = gradient_checker.compute_gradient( tf_x, shape, @@ -636,5 +646,67 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): s.eval() +class SegmentReductionOpBenchmark(test.Benchmark): + outer_dim_options = [2**x for x in range(9, 14, 2)] + ratio_options = [2**x for x in range(1, 6, 2)] + inner_dim_options = [2**x for x in range(9, 14, 2)] + # randomly generated sizes with less alignments + inner_dim_options += [ + 1120, 1215, 1856, 1302, 1329, 1531, 1313, 1672, 1851, 1584 + ] + dtype_options = [np.float32, np.float64] + options = (outer_dim_options, ratio_options, inner_dim_options, dtype_options) + # pylint: disable=g-long-lambda + op_functors = [lambda vc, vs, seg_ids: + ("sorted", math_ops.segment_sum(vc, vs)), + lambda vc, vs, seg_ids: + ("unsorted", + math_ops.unsorted_segment_sum(vc, vs, seg_ids[-1]+1))] + # pylint: enable=g-long-lambda + repeat = 10 + + def _npTypeToStr(self, t): + if t == np.float32: + return "fp32" + if t == np.float64: + return "fp64" + + def _runGraph(self, op_functor, outer_dim, ratio, inner_dim, dtype): + output_outer_dim = int(outer_dim / ratio) + const = np.random.randint(5, size=(outer_dim, inner_dim)) + seg_ids = np.sort(np.random.randint(output_outer_dim, size=outer_dim)) + vs = variables.Variable(seg_ids.astype(np.int32)) + with ops.device("/gpu:0"): + vc = variables.Variable(const.astype(dtype)) + name, op = op_functor(vc, vs, seg_ids) + with session.Session() as sess: + variables.global_variables_initializer().run() + r = self.run_op_benchmark( + sess, + op, + min_iters=self.repeat, + name="_".join( + map(str, + [name, outer_dim, ratio, inner_dim, + self._npTypeToStr(dtype)]))) + return name, r["wall_time"] + + def benchmarkSegmentSumGPU(self): + if not test.is_gpu_available(cuda_only=True): + return + for outer_dim, ratio, inner_dim, dtype in itertools.product(*self.options): + op_functor = self.op_functors[0] + with ops.Graph().as_default(): + self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype) + + def benchmarkUnsortedSegmentSumGPU(self): + if not test.is_gpu_available(cuda_only=True): + return + for outer_dim, ratio, inner_dim, dtype in itertools.product(*self.options): + op_functor = self.op_functors[1] + with ops.Graph().as_default(): + self._runGraph(op_functor, outer_dim, ratio, inner_dim, dtype) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/io_ops.py b/tensorflow/python/ops/io_ops.py index 5cd5d7ba2f..bd879ac423 100644 --- a/tensorflow/python/ops/io_ops.py +++ b/tensorflow/python/ops/io_ops.py @@ -37,6 +37,7 @@ See the @{$python/io_ops} guide. @@parse_example @@parse_single_example @@parse_tensor +@@serialize_tensor @@decode_json_example @@QueueBase @@FIFOQueue diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index 803e0e7a1e..c5fd15bae4 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -40,6 +40,7 @@ from tensorflow.python.platform import tf_logging ops.NotDifferentiable("DecodeRaw") ops.NotDifferentiable("ParseTensor") +ops.NotDifferentiable("SerializeTensor") ops.NotDifferentiable("StringToNumber") diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index 5345949664..a1fe47982f 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -117,7 +117,7 @@ class Profiler(object): ```python Typical use case: # Currently we are only allowed to create 1 profiler per process. - profiler = Profile(sess.graph) + profiler = Profiler(sess.graph) for i in xrange(total_steps): if i % 10000 == 0: @@ -174,7 +174,7 @@ class Profiler(object): """Add statistics of a step. Args: - step: A step uint64 used to identify the RunMetadata. Must be different + step: int, A step used to identify the RunMetadata. Must be different across different AddStep() calls. run_meta: RunMetadata proto that contains statistics of a session run. """ diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py index a8712fc37e..00de044505 100644 --- a/tensorflow/python/tools/import_pb_to_tensorboard.py +++ b/tensorflow/python/tools/import_pb_to_tensorboard.py @@ -51,7 +51,7 @@ def import_to_tensorboard(model_dir, log_dir): pb_visual_writer = summary.FileWriter(log_dir) pb_visual_writer.add_graph(sess.graph) print("Model Imported. Visualize by running: " - "> tensorboard --logdir={}".format(log_dir)) + "tensorboard --logdir={}".format(log_dir)) def main(unused_args): diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index ca867dbe3c..8935bcda3d 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1689,6 +1689,10 @@ tf_module { argspec: "args=[\'sp_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { + name: "serialize_tensor" + argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { name: "set_random_seed" argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=None" } diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e5342cba77..ef342fe127 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -573,11 +573,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): patched_http_archive( name = "boringssl", urls = [ - "http://mirror.bazel.build/github.com/google/boringssl/archive/bbcaa15b0647816b9a1a9b9e0d209cd6712f0105.tar.gz", - "https://github.com/google/boringssl/archive/bbcaa15b0647816b9a1a9b9e0d209cd6712f0105.tar.gz", # 2016-07-11 + "http://mirror.bazel.build/github.com/google/boringssl/archive/e3860009a091cd1bd2bc189cdbc3c6d095abde84.tar.gz", + "https://github.com/google/boringssl/archive/e3860009a091cd1bd2bc189cdbc3c6d095abde84.tar.gz", # 2017-07-07 ], - sha256 = "025264d6e9a7ad371f2f66d17a28b6627de0c9592dc2eb54afd062f68f1f9aa3", - strip_prefix = "boringssl-bbcaa15b0647816b9a1a9b9e0d209cd6712f0105", + sha256 = "02f5950f93c4fd3691771c07c9d04cf2999ab01383ff99da345249e93b0fcfb2", + strip_prefix = "boringssl-e3860009a091cd1bd2bc189cdbc3c6d095abde84", # Add patch to boringssl code to support s390x patch_file = str(Label("//third_party/boringssl:add_boringssl_s390x.patch")), ) diff --git a/third_party/boringssl/add_boringssl_s390x.patch b/third_party/boringssl/add_boringssl_s390x.patch index 9a34a59a1d..8b42d10e68 100644 --- a/third_party/boringssl/add_boringssl_s390x.patch +++ b/third_party/boringssl/add_boringssl_s390x.patch @@ -3,9 +3,9 @@ index 7a3adfb..88012ad 100644 --- a/src/include/openssl/base.h +++ b/src/include/openssl/base.h @@ -94,6 +94,8 @@ extern "C" { - #elif defined(__pnacl__) - #define OPENSSL_32_BIT #define OPENSSL_PNACL + #elif defined(__myriad2__) + #define OPENSSL_32_BIT +#elif defined(__s390x__) +#define OPENSSL_64_BIT #else |