diff options
270 files changed, 10234 insertions, 3353 deletions
@@ -34,8 +34,8 @@ new_http_archive( new_http_archive( name = "mobile_multibox", build_file = "models.BUILD", - url = "https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1.zip", - sha256 = "b4c178fd6236dcf0a20d25d07c45eebe85281263978c6a6f1dfc49d75befc45f" + url = "https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1a.zip", + sha256 = "859edcddf84dddb974c36c36cfc1f74555148e9c9213dedacf1d6b613ad52b96" ) new_http_archive( @@ -9,6 +9,23 @@ SOURCE_BASE_DIR=`pwd -P` popd > /dev/null PLATFORM="$(uname -s | tr 'A-Z' 'a-z')" + +function is_linux() { + if [[ "${PLATFORM}" == "linux" ]]; then + true + else + false + fi +} + +function is_macos() { + if [[ "${PLATFORM}" == "darwin" ]]; then + true + else + false + fi +} + function is_windows() { # On windows, the shell script is actually running in msys if [[ "${PLATFORM}" =~ msys_nt*|mingw*|cygwin*|uwin* ]]; then @@ -65,16 +82,20 @@ if is_windows; then TF_NEED_OPENCL=0 fi -while [ "$TF_NEED_JEMALLOC" == "" ]; do - read -p "Do you wish to use jemalloc as the malloc implementation? "\ -"(Linux only) [Y/n] " INPUT - case $INPUT in - [Yy]* ) echo "jemalloc enabled on Linux"; TF_NEED_JEMALLOC=1;; - [Nn]* ) echo "jemalloc disabled on Linux"; TF_NEED_JEMALLOC=0;; - "" ) echo "jemalloc enabled on Linux"; TF_NEED_JEMALLOC=1;; - * ) echo "Invalid selection: " $INPUT;; - esac -done +if is_linux; then + while [ "$TF_NEED_JEMALLOC" == "" ]; do + read -p "Do you wish to use jemalloc as the malloc implementation? [Y/n] "\ + INPUT + case $INPUT in + [Yy]* ) echo "jemalloc enabled"; TF_NEED_JEMALLOC=1;; + [Nn]* ) echo "jemalloc disabled"; TF_NEED_JEMALLOC=0;; + "" ) echo "jemalloc enabled"; TF_NEED_JEMALLOC=1;; + * ) echo "Invalid selection: " $INPUT;; + esac + done +else + TF_NEED_JEMALLOC=0 +fi if [ "$TF_NEED_JEMALLOC" == "1" ]; then sed -i -e "s/WITH_JEMALLOC = False/WITH_JEMALLOC = True/" tensorflow/core/platform/default/build_config.bzl @@ -99,7 +120,7 @@ done if [ "$TF_NEED_GCP" == "1" ]; then ## Verify that libcurl header files are available. # Only check Linux, since on MacOS the header files are installed with XCode. - if [[ $(uname -a) =~ Linux ]] && [[ ! -f "/usr/include/curl/curl.h" ]]; then + if is_linux && [[ ! -f "/usr/include/curl/curl.h" ]]; then echo "ERROR: It appears that the development version of libcurl is not "\ "available. Please install the libcurl3-dev package." exit 1 @@ -226,8 +247,6 @@ while ! is_windows && true; do done # Find out where the CUDA toolkit is installed -OSNAME=`uname -s` - while true; do # Configure the Cuda SDK version to use. if [ -z "$TF_CUDA_VERSION" ]; then @@ -259,9 +278,9 @@ while true; do if is_windows; then CUDA_RT_LIB_PATH="lib/x64/cudart.lib" - elif [ "$OSNAME" == "Linux" ]; then + elif is_linux; then CUDA_RT_LIB_PATH="lib64/libcudart.so${TF_CUDA_EXT}" - elif [ "$OSNAME" == "Darwin" ]; then + elif is_macos; then CUDA_RT_LIB_PATH="lib/libcudart${TF_CUDA_EXT}.dylib" fi @@ -307,10 +326,10 @@ while true; do if is_windows; then cudnn_lib_path="${CUDNN_INSTALL_PATH}/lib/x64/cudnn.lib" cudnn_alt_lib_path="${CUDNN_INSTALL_PATH}/lib/x64/cudnn.lib" - elif [ "$OSNAME" == "Linux" ]; then + elif is_linux; then cudnn_lib_path="${CUDNN_INSTALL_PATH}/lib64/libcudnn.so" cudnn_alt_lib_path="${CUDNN_INSTALL_PATH}/libcudnn.so" - elif [ "$OSNAME" == "Darwin" ]; then + elif is_macos; then cudnn_lib_path="${CUDNN_INSTALL_PATH}/lib/libcudnn.dylib" cudnn_alt_lib_path="${CUDNN_INSTALL_PATH}/libcudnn.dylib" fi @@ -337,7 +356,7 @@ while true; do echo "libcudnn.dylib resolves to libcudnn${TF_CUDNN_EXT}" fi else - if [ "$OSNAME" == "Darwin" ]; then + if is_macos; then TF_CUDNN_EXT=".${TF_CUDNN_VERSION}.dylib" else TF_CUDNN_EXT=".$TF_CUDNN_VERSION" @@ -347,10 +366,10 @@ while true; do if is_windows; then CUDA_DNN_LIB_PATH="lib/x64/cudnn.lib" CUDA_DNN_LIB_ALT_PATH="lib/x64/cudnn.lib" - elif [ "$OSNAME" == "Linux" ]; then + elif is_linux; then CUDA_DNN_LIB_PATH="lib64/libcudnn.so${TF_CUDNN_EXT}" CUDA_DNN_LIB_ALT_PATH="libcudnn.so${TF_CUDNN_EXT}" - elif [ "$OSNAME" == "Darwin" ]; then + elif is_macos; then CUDA_DNN_LIB_PATH="lib/libcudnn${TF_CUDNN_EXT}" CUDA_DNN_LIB_ALT_PATH="libcudnn${TF_CUDNN_EXT}" fi @@ -361,7 +380,7 @@ while true; do break fi - if [ "$OSNAME" == "Linux" ]; then + if is_linux; then CUDNN_PATH_FROM_LDCONFIG="$(ldconfig -p | sed -n 's/.*libcudnn.so .* => \(.*\)/\1/p')" if [ -e "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" ]; then export TF_CUDNN_VERSION @@ -372,7 +391,7 @@ while true; do echo "Invalid path to cuDNN ${CUDNN_VERSION} toolkit. Neither of the following two files can be found:" echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_PATH}" echo "${CUDNN_INSTALL_PATH}/${CUDA_DNN_LIB_ALT_PATH}" - if [ "$OSNAME" == "Linux" ]; then + if is_linux; then echo "${CUDNN_PATH_FROM_LDCONFIG}${TF_CUDNN_EXT}" fi @@ -499,7 +518,7 @@ while true; do fi fi - if [ "$OSNAME" == "Linux" ]; then + if is_linux; then SYCL_RT_LIB_PATH="lib/libComputeCpp.so" fi diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index a6bc8fdc49..9e8ea84baf 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -20,6 +20,12 @@ load( # ----------------------------------------------------------------------------- # Public targets +filegroup( + name = "headers", + srcs = ["c_api.h"], + visibility = ["//tensorflow:__subpackages__"], +) + tf_cuda_library( name = "c_api", srcs = ["c_api.cc"], diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index d3a16c57f6..38117d388f 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -355,6 +355,7 @@ tf_cc_test( tf_gen_op_wrappers_cc( name = "sendrecv_ops", + include_internal_ops = 1, op_lib_names = [ "sendrecv_ops", ], @@ -363,6 +364,7 @@ tf_gen_op_wrappers_cc( tf_gen_op_wrappers_cc( name = "function_ops", + include_internal_ops = 1, op_lib_names = [ "function_ops", ], diff --git a/tensorflow/cc/client/client_session.cc b/tensorflow/cc/client/client_session.cc index 5a98deb259..b407d3ab03 100644 --- a/tensorflow/cc/client/client_session.cc +++ b/tensorflow/cc/client/client_session.cc @@ -45,20 +45,20 @@ SessionOptions ClientSession::MakeDefaultSessionOptions( return options; } -Status ClientSession::Run(const std::vector<ops::Output>& fetch_outputs, +Status ClientSession::Run(const std::vector<Output>& fetch_outputs, std::vector<Tensor>* outputs) const { return Run(FeedType{}, fetch_outputs, {}, outputs); } Status ClientSession::Run(const FeedType& inputs, - const std::vector<ops::Output>& fetch_outputs, + const std::vector<Output>& fetch_outputs, std::vector<Tensor>* outputs) const { return Run(inputs, fetch_outputs, {}, outputs); } Status ClientSession::Run(const FeedType& inputs, - const std::vector<ops::Output>& fetch_outputs, - const std::vector<ops::Operation>& run_outputs, + const std::vector<Output>& fetch_outputs, + const std::vector<Operation>& run_outputs, std::vector<Tensor>* outputs) const { return Run(RunOptions(), inputs, fetch_outputs, run_outputs, outputs, nullptr); @@ -77,8 +77,8 @@ Status ClientSession::MaybeExtendGraph() const { } Status ClientSession::Run(const RunOptions& run_options, const FeedType& inputs, - const std::vector<ops::Output>& fetch_outputs, - const std::vector<ops::Operation>& run_outputs, + const std::vector<Output>& fetch_outputs, + const std::vector<Operation>& run_outputs, std::vector<Tensor>* outputs, RunMetadata* run_metadata) const { std::vector<std::pair<string, Tensor>> feeds; diff --git a/tensorflow/cc/client/client_session.h b/tensorflow/cc/client/client_session.h index 9d480477f6..28ff3ec964 100644 --- a/tensorflow/cc/client/client_session.h +++ b/tensorflow/cc/client/client_session.h @@ -31,62 +31,59 @@ limitations under the License. namespace tensorflow { -// A `ClientSession` object lets the caller drive the evaluation of the -// TensorFlow graph constructed with the C++ API. -// -// Example: -// -// Scope root = Scope::NewRootScope(); -// auto a = Placeholder(root, DT_INT32); -// auto c = Add(root, a, {41}); -// -// ClientSession session(root); -// std::vector<Tensor> outputs; -// -// Status s = session.Run({{a, {1}}}, {c}, &outputs); -// if (!s.ok()) { /* Handle error */ } +/// A `ClientSession` object lets the caller drive the evaluation of the +/// TensorFlow graph constructed with the C++ API. +/// +/// Example: +/// +/// Scope root = Scope::NewRootScope(); +/// auto a = Placeholder(root, DT_INT32); +/// auto c = Add(root, a, {41}); +/// +/// ClientSession session(root); +/// std::vector<Tensor> outputs; +/// +/// Status s = session.Run({ {a, {1}} }, {c}, &outputs); +/// if (!s.ok()) { ... } class ClientSession { public: - // A data type to represent feeds to a Run call. - // This is a map of `Output` objects returned by op-constructors to the value - // to feed them with. See `ops::Input::Initializer` for details on what can be - // used as feed values. - typedef std::unordered_map<ops::Output, ops::Input::Initializer, - ops::OutputHash> - FeedType; - - // Create a new session to evaluate the graph contained in `scope` by - // connecting to the TensorFlow runtime specified by `target`. + /// A data type to represent feeds to a Run call. + /// + /// This is a map of `Output` objects returned by op-constructors to the value + /// to feed them with. See `Input::Initializer` for details on what can be + /// used as feed values. + typedef std::unordered_map<Output, Input::Initializer, OutputHash> FeedType; + + /// Create a new session to evaluate the graph contained in `scope` by + /// connecting to the TensorFlow runtime specified by `target`. ClientSession(const Scope& scope, const string& target); - // Same as above, but use the empty string ("") as the target specification. + /// Same as above, but use the empty string ("") as the target specification. ClientSession(const Scope& scope); - // Create a new session, configuring it with `session_options`. + /// Create a new session, configuring it with `session_options`. ClientSession(const Scope& scope, const SessionOptions& session_options); - // Evaluate the tensors in `fetch_outputs`. The values are returned as - // `Tensor` objects in `outputs`. The number and order of `outputs` will match - // `fetch_outputs`. - Status Run(const std::vector<ops::Output>& fetch_outputs, + /// Evaluate the tensors in `fetch_outputs`. The values are returned as + /// `Tensor` objects in `outputs`. The number and order of `outputs` will + /// match `fetch_outputs`. + Status Run(const std::vector<Output>& fetch_outputs, std::vector<Tensor>* outputs) const; - // Same as above, but use the mapping in `inputs` as feeds. - Status Run(const FeedType& inputs, - const std::vector<ops::Output>& fetch_outputs, + /// Same as above, but use the mapping in `inputs` as feeds. + Status Run(const FeedType& inputs, const std::vector<Output>& fetch_outputs, std::vector<Tensor>* outputs) const; - // Same as above. Additionally runs the operations ins `run_outputs`. - Status Run(const FeedType& inputs, - const std::vector<ops::Output>& fetch_outputs, - const std::vector<ops::Operation>& run_outputs, + /// Same as above. Additionally runs the operations ins `run_outputs`. + Status Run(const FeedType& inputs, const std::vector<Output>& fetch_outputs, + const std::vector<Operation>& run_outputs, std::vector<Tensor>* outputs) const; - // Use `run_options` to turn on performance profiling. `run_metadata`, if not - // null, is filled in with the profiling results. + /// Use `run_options` to turn on performance profiling. `run_metadata`, if not + /// null, is filled in with the profiling results. Status Run(const RunOptions& run_options, const FeedType& inputs, - const std::vector<ops::Output>& fetch_outputs, - const std::vector<ops::Operation>& run_outputs, + const std::vector<Output>& fetch_outputs, + const std::vector<Operation>& run_outputs, std::vector<Tensor>* outputs, RunMetadata* run_metadata) const; // TODO(keveman): Add support for partial run. diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index d191a73547..a4da3aa8e2 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -76,9 +76,9 @@ string ToGuard(const std::string& path) { } // Change: Into: -// ABC // ABC -// // -// DEF // DEF +// ABC /// ABC +// /// +// DEF /// DEF string MakeComment(StringPiece text, StringPiece indent) { string ret; while (!text.empty()) { @@ -89,9 +89,9 @@ string MakeComment(StringPiece text, StringPiece indent) { if (text[newline] != ' ') last_non_space = newline; } if (last_non_space == -1) { - strings::StrAppend(&ret, indent, "//\n"); + strings::StrAppend(&ret, indent, "///\n"); } else { - strings::StrAppend(&ret, indent, "// ", + strings::StrAppend(&ret, indent, "/// ", text.substr(0, last_non_space + 1), "\n"); } text.remove_prefix(newline + 1); @@ -406,7 +406,7 @@ OpInfo::OpInfo(const OpDef& op_def) : op_def(op_def) { for (int i = 0; i < op_def.input_arg_size(); ++i) { const auto& arg(op_def.input_arg(i)); arg_types.push_back(strings::StrCat( - "::tensorflow::ops::", ArgIsList(arg) ? "InputList" : "Input")); + "::tensorflow::", ArgIsList(arg) ? "InputList" : "Input")); arg_names.push_back(AvoidCPPKeywords(arg.name())); // TODO(keveman): Include input type information. @@ -445,8 +445,8 @@ OpInfo::OpInfo(const OpDef& op_def) : op_def(op_def) { for (int i = 0; i < op_def.output_arg_size(); ++i) { const auto& arg = op_def.output_arg(i); bool is_list = ArgIsList(arg); - output_types.push_back(strings::StrCat("::tensorflow::ops::", - is_list ? "OutputList" : "Output")); + output_types.push_back( + strings::StrCat("::tensorflow::", is_list ? "OutputList" : "Output")); output_names.push_back(AvoidCPPKeywords(arg.name())); is_list_output.push_back(is_list); } @@ -537,26 +537,26 @@ void OpInfo::WriteClassDecl(WritableFile* h) const { if (output_types.empty()) { // Allow casting this class to Operation. strings::StrAppend(&class_decl, - " operator ::tensorflow::ops::Operation() const { " + " operator ::tensorflow::Operation() const { " "return operation; }\n"); } else if (output_types.size() == 1) { if (is_list_output[0]) { // Write the subscript operator, allowing out[i] for the list-typed // output. strings::StrAppend(&class_decl, - " ::tensorflow::ops::Output operator[](size_t index) " + " ::tensorflow::Output operator[](size_t index) " "const { return ", output_names[0], "[index]; }\n\n"); } else { // Write type cast functions, allowing casting this class to Input and // Output. - strings::StrAppend( - &class_decl, " operator ::tensorflow::ops::Output() const { return ", - output_names[0], "; }\n"); - strings::StrAppend( - &class_decl, " operator ::tensorflow::ops::Input() const { return ", - output_names[0], "; }\n"); + strings::StrAppend(&class_decl, + " operator ::tensorflow::Output() const { return ", + output_names[0], "; }\n"); + strings::StrAppend(&class_decl, + " operator ::tensorflow::Input() const { return ", + output_names[0], "; }\n"); // Write node() to get the Node* directly. strings::StrAppend(&class_decl, " ::tensorflow::Node* node() const { return ", diff --git a/tensorflow/cc/framework/cc_op_gen.h b/tensorflow/cc/framework/cc_op_gen.h index d1e83a87c3..3d35d0ef32 100644 --- a/tensorflow/cc/framework/cc_op_gen.h +++ b/tensorflow/cc/framework/cc_op_gen.h @@ -20,7 +20,7 @@ limitations under the License. namespace tensorflow { -// Result is written to files dot_h and dot_cc. +/// Result is written to files dot_h and dot_cc. void WriteCCOps(const OpList& ops, const std::string& dot_h_fname, const std::string& dot_cc_fname); diff --git a/tensorflow/cc/framework/grad_op_registry.h b/tensorflow/cc/framework/grad_op_registry.h index e4da8570f1..190b96f685 100644 --- a/tensorflow/cc/framework/grad_op_registry.h +++ b/tensorflow/cc/framework/grad_op_registry.h @@ -24,30 +24,30 @@ limitations under the License. namespace tensorflow { namespace ops { -// GradFunc is the signature for all gradient functions in GradOpRegistry. -// Implementations should add operations to compute the gradient outputs of 'op' -// (returned in 'grad_outputs') using 'scope' and 'grad_inputs'. +/// GradFunc is the signature for all gradient functions in GradOpRegistry. +/// Implementations should add operations to compute the gradient outputs of +/// 'op' (returned in 'grad_outputs') using 'scope' and 'grad_inputs'. typedef Status (*GradFunc)(const Scope& scope, const Operation& op, const std::vector<Output>& grad_inputs, std::vector<Output>* grad_outputs); -// GradOpRegistry maintains a static registry of gradient functions. -// Gradient functions are indexed in the registry by the forward op name (i.e. -// "MatMul" -> MatMulGrad func). +/// GradOpRegistry maintains a static registry of gradient functions. +/// Gradient functions are indexed in the registry by the forward op name (i.e. +/// "MatMul" -> MatMulGrad func). class GradOpRegistry { public: - // Registers 'func' as the gradient function for 'op'. - // Returns true if registration was successful, check fails otherwise. + /// Registers 'func' as the gradient function for 'op'. + /// Returns true if registration was successful, check fails otherwise. bool Register(const string& op, GradFunc func); - // Sets 'func' to the gradient function for 'op' and returns Status OK if - // the gradient function for 'op' exists in the registry. - // Note that 'func' can be null for ops that have registered no-gradient with - // the registry. - // Returns error status otherwise. + /// Sets 'func' to the gradient function for 'op' and returns Status OK if + /// the gradient function for 'op' exists in the registry. + /// Note that 'func' can be null for ops that have registered no-gradient with + /// the registry. + /// Returns error status otherwise. Status Lookup(const string& op, GradFunc* func) const; - // Returns a pointer to the global gradient function registry. + /// Returns a pointer to the global gradient function registry. static GradOpRegistry* Global(); private: diff --git a/tensorflow/cc/framework/gradient_checker.cc b/tensorflow/cc/framework/gradient_checker.cc index 89baa1a5bb..849a8eed6f 100644 --- a/tensorflow/cc/framework/gradient_checker.cc +++ b/tensorflow/cc/framework/gradient_checker.cc @@ -35,20 +35,20 @@ namespace { template <typename T> Status ComputeTheoreticalJacobianTranspose( - const Scope& scope, const ops::OutputList& xs, + const Scope& scope, const OutputList& xs, const std::vector<TensorShape>& x_shapes, - const std::vector<Tensor>& x_datas, const ops::OutputList& ys, + const std::vector<Tensor>& x_datas, const OutputList& ys, const std::vector<TensorShape>& y_shapes, std::vector<Tensor>& jacobian_ts) { int y_num = y_shapes.size(); int x_num = x_shapes.size(); // Call AddSymbolicGradients to get 'dxs' (we will feed 'dys'). - ops::OutputList dys; + OutputList dys; for (const auto& y_shape : y_shapes) { // TODO(suharshs): This currently assumes that all x's are the same type. dys.push_back(Cast(scope, Const(scope, 1.0, y_shape), xs[0].type())); } - ops::OutputList dxs; + OutputList dxs; TF_RETURN_IF_ERROR(AddSymbolicGradients(scope, ys, xs, dys, &dxs)); // Initialize 'dy_data' to zeros. @@ -97,8 +97,8 @@ Status ComputeTheoreticalJacobianTranspose( return Status::OK(); } -Status EvaluateGraph(ClientSession& session, const ops::OutputList& xs, - const ops::OutputList& ys, std::vector<Tensor>& x_datas, +Status EvaluateGraph(ClientSession& session, const OutputList& xs, + const OutputList& ys, std::vector<Tensor>& x_datas, std::vector<Tensor>* y_datas) { // Create the feed list. ClientSession::FeedType feed_list; @@ -123,11 +123,13 @@ Status EvaluateGraph(ClientSession& session, const ops::OutputList& xs, } template <typename T> -Status ComputeNumericJacobianTranspose( - const Scope& scope, const ops::OutputList& xs, - const std::vector<TensorShape>& x_shapes, const ops::OutputList& ys, - const std::vector<TensorShape>& y_shapes, const T delta, - std::vector<Tensor>& x_datas, std::vector<Tensor>& jacobian_ts) { +Status ComputeNumericJacobianTranspose(const Scope& scope, const OutputList& xs, + const std::vector<TensorShape>& x_shapes, + const OutputList& ys, + const std::vector<TensorShape>& y_shapes, + const T delta, + std::vector<Tensor>& x_datas, + std::vector<Tensor>& jacobian_ts) { int y_num = y_shapes.size(); int x_num = x_shapes.size(); @@ -170,7 +172,7 @@ Status ComputeNumericJacobianTranspose( } template <typename T> -void InitJacobians(const ops::OutputList& xs, +void InitJacobians(const OutputList& xs, const std::vector<TensorShape>& x_shapes, const std::vector<TensorShape>& y_shapes, std::vector<Tensor>& jacobians) { @@ -191,10 +193,9 @@ void InitJacobians(const ops::OutputList& xs, } template <typename T> -Status ComputeGradientErrorInternal(const Scope& scope, - const ops::OutputList& xs, +Status ComputeGradientErrorInternal(const Scope& scope, const OutputList& xs, const std::vector<TensorShape>& x_shapes, - const ops::OutputList& ys, + const OutputList& ys, const std::vector<TensorShape>& y_shapes, std::vector<Tensor>& x_datas, T* max_error) { @@ -231,9 +232,9 @@ Status ComputeGradientErrorInternal(const Scope& scope, } // namespace template <typename T> -Status ComputeGradientError(const Scope& scope, const ops::OutputList& xs, +Status ComputeGradientError(const Scope& scope, const OutputList& xs, const std::vector<TensorShape>& x_shapes, - const ops::OutputList& ys, + const OutputList& ys, const std::vector<TensorShape>& y_shapes, T* max_error) { if (xs.size() != x_shapes.size()) { @@ -259,8 +260,8 @@ Status ComputeGradientError(const Scope& scope, const ops::OutputList& xs, } template <typename T> -Status ComputeGradientError(const Scope& scope, const ops::Output& x, - const Tensor& x_init_value, const ops::Output& y, +Status ComputeGradientError(const Scope& scope, const Output& x, + const Tensor& x_init_value, const Output& y, const TensorShape& y_shape, T* max_error) { // Initialize 'x_data' from 'x_init_value'. std::vector<Tensor> x_datas(1, Tensor(x_init_value)); @@ -269,14 +270,14 @@ Status ComputeGradientError(const Scope& scope, const ops::Output& x, {y_shape}, x_datas, max_error); } -#define INSTANTIATE_GRAD_ERR_TYPE(T) \ - template Status ComputeGradientError<T>( \ - const Scope& scope, const ops::OutputList& xs, \ - const std::vector<TensorShape>& x_shapes, const ops::OutputList& ys, \ - const std::vector<TensorShape>& y_shapes, T* max_error); \ - template Status ComputeGradientError<T>( \ - const Scope& scope, const ops::Output& x, const Tensor& x_init_value, \ - const ops::Output& y, const TensorShape& y_shape, T* max_error); +#define INSTANTIATE_GRAD_ERR_TYPE(T) \ + template Status ComputeGradientError<T>( \ + const Scope& scope, const OutputList& xs, \ + const std::vector<TensorShape>& x_shapes, const OutputList& ys, \ + const std::vector<TensorShape>& y_shapes, T* max_error); \ + template Status ComputeGradientError<T>( \ + const Scope& scope, const Output& x, const Tensor& x_init_value, \ + const Output& y, const TensorShape& y_shape, T* max_error); INSTANTIATE_GRAD_ERR_TYPE(float); INSTANTIATE_GRAD_ERR_TYPE(double); diff --git a/tensorflow/cc/framework/gradient_checker.h b/tensorflow/cc/framework/gradient_checker.h index 66a2b3040c..2e61213615 100644 --- a/tensorflow/cc/framework/gradient_checker.h +++ b/tensorflow/cc/framework/gradient_checker.h @@ -22,20 +22,20 @@ limitations under the License. namespace tensorflow { -// Returns in 'max_error' the maximum element-wise error for dy/dx between the -// computed and numeric Jacobian matrices where 'xs' and 'ys' are tensors. -// This function adds operations to the graph associated with 'scope'. +/// Returns in 'max_error' the maximum element-wise error for dy/dx between the +/// computed and numeric Jacobian matrices where 'xs' and 'ys' are tensors. +/// This function adds operations to the graph associated with 'scope'. template <typename T> -Status ComputeGradientError(const Scope& scope, const ops::OutputList& xs, +Status ComputeGradientError(const Scope& scope, const OutputList& xs, const std::vector<TensorShape>& x_shapes, - const ops::OutputList& ys, + const OutputList& ys, const std::vector<TensorShape>& y_shapes, T* max_error); -// Overload of ComputeGradientError which takes an initial value for 'x'. +/// Overload of ComputeGradientError which takes an initial value for 'x'. template <typename T> -Status ComputeGradientError(const Scope& scope, const ops::Output& x, - const Tensor& x_init_value, const ops::Output& y, +Status ComputeGradientError(const Scope& scope, const Output& x, + const Tensor& x_init_value, const Output& y, const TensorShape& y_shape, T* max_error); } // namespace tensorflow diff --git a/tensorflow/cc/framework/gradients.cc b/tensorflow/cc/framework/gradients.cc index 0059bdd6d1..2c60f947a5 100644 --- a/tensorflow/cc/framework/gradients.cc +++ b/tensorflow/cc/framework/gradients.cc @@ -29,8 +29,6 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" namespace tensorflow { -using namespace ops; // NOLINT(build/namespaces) - namespace { struct OutputHash { @@ -48,7 +46,7 @@ struct OutputEq { class SymbolicGradientBuilder { public: SymbolicGradientBuilder(const Scope& scope, - const GradOpRegistry* registry, + const ops::GradOpRegistry* registry, const std::vector<Output>& outputs, const std::vector<Output>& inputs, const std::vector<Output>& grad_inputs, @@ -81,7 +79,7 @@ class SymbolicGradientBuilder { std::vector<Output>* grad_outputs); const Scope& scope_; - const GradOpRegistry* registry_; + const ops::GradOpRegistry* registry_; const std::vector<Output>& outputs_; const std::vector<Output>& inputs_; const std::vector<Output>& grad_inputs_; @@ -119,19 +117,15 @@ class SymbolicGradientBuilder { }; SymbolicGradientBuilder::SymbolicGradientBuilder( - const Scope& scope, - const GradOpRegistry* registry, - const std::vector<Output>& outputs, - const std::vector<Output>& inputs, - const std::vector<Output>& grad_inputs, - std::vector<Output>* grad_outputs) + const Scope& scope, const ops::GradOpRegistry* registry, + const std::vector<Output>& outputs, const std::vector<Output>& inputs, + const std::vector<Output>& grad_inputs, std::vector<Output>* grad_outputs) : scope_(scope), registry_(registry), outputs_(outputs), inputs_(inputs), grad_inputs_(grad_inputs), - grad_outputs_(grad_outputs) { -} + grad_outputs_(grad_outputs) {} Status SymbolicGradientBuilder::BackpropAlongEdge(const Output& dst_grad, const Output& src) { @@ -249,14 +243,14 @@ Status SymbolicGradientBuilder::SumGradients(const Output& src, Output* grad) { } else { // Otherwise, adds backprop-ed gradients. // TODO(andydavis) Use a better accumulator here. - *grad = AddN(scope_, grads_to_keep); + *grad = ops::AddN(scope_, grads_to_keep); } return Status::OK(); } bool SymbolicGradientBuilder::IsPrimitiveOpWithNoGrad(const string& opname) { - GradFunc grad_fn; + ops::GradFunc grad_fn; Status s = registry_->Lookup(opname, &grad_fn); return s.ok() && (grad_fn == nullptr); } @@ -265,7 +259,7 @@ Status SymbolicGradientBuilder::CallGradFunction( const Operation& op, const std::vector<Output>& grad_inputs, std::vector<Output>* grad_outputs) { - GradFunc grad_fn; + ops::GradFunc grad_fn; TF_RETURN_IF_ERROR(registry_->Lookup(op.node()->type_string(), &grad_fn)); TF_RETURN_IF_ERROR(grad_fn(scope_, op, grad_inputs, grad_outputs)); TF_RETURN_IF_ERROR(scope_.status()); @@ -333,7 +327,7 @@ Status SymbolicGradientBuilder::AddGradients() { // TODO(andydavis) If static shapes are known, replace 'ZerosLike' with // zero-filled Constant node of appropriate shape. for (const int dy_index : no_grad_dy_indices) { - dy[dy_index] = ZerosLike(scope_, Output(n, dy_index)); + dy[dy_index] = ops::ZerosLike(scope_, Output(n, dy_index)); } } @@ -368,7 +362,7 @@ Status AddSymbolicGradients(const Scope& scope, const std::vector<Output>& inputs, const std::vector<Output>& grad_inputs, std::vector<Output>* grad_outputs) { - SymbolicGradientBuilder builder(scope, GradOpRegistry::Global(), outputs, + SymbolicGradientBuilder builder(scope, ops::GradOpRegistry::Global(), outputs, inputs, grad_inputs, grad_outputs); return builder.AddGradients(); } diff --git a/tensorflow/cc/framework/gradients.h b/tensorflow/cc/framework/gradients.h index fa5e608bd4..d076bc43b4 100644 --- a/tensorflow/cc/framework/gradients.h +++ b/tensorflow/cc/framework/gradients.h @@ -21,28 +21,28 @@ limitations under the License. namespace tensorflow { -// NOTE: This API is a work in progress and will likely be changing frequently. -// -// Given initial gradients 'grad_inputs' (which represent the symbolic partial -// derivatives of some loss function 'L' w.r.t 'outputs'), adds gradient nodes -// to the graph associated with 'scope', which compute (and return in -// 'grad_outputs') the symbolic partial derivatives of 'L' w.r.t 'inputs'. -// +/// NOTE: This API is a work in progress and will likely be changing frequently. +/// +/// Given initial gradients 'grad_inputs' (which represent the symbolic partial +/// derivatives of some loss function 'L' w.r.t 'outputs'), adds gradient nodes +/// to the graph associated with 'scope', which compute (and return in +/// 'grad_outputs') the symbolic partial derivatives of 'L' w.r.t 'inputs'. +/// // TODO(andydavis) Add overload of this function with no 'grad_inputs' arg. // Implementation will fill in 'OnesLike' for all shapes in 'outputs'. Status AddSymbolicGradients(const Scope& scope, - const std::vector<ops::Output>& outputs, - const std::vector<ops::Output>& inputs, - const std::vector<ops::Output>& grad_inputs, - std::vector<ops::Output>* grad_outputs); - -// Returns a sentinel Output that represents 'no gradient' (i.e. no gradient -// flows along some graph edge during backpropagation). -// Can be returned in 'grad_outputs' by an invocation of 'AddSymbolicGradients' -// (note that gradient flow through an Output can be stopped through the use of -// the StopGradient node). -ops::Output NoGradient(); + const std::vector<Output>& outputs, + const std::vector<Output>& inputs, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs); + +/// Returns a sentinel Output that represents 'no gradient' (i.e. no gradient +/// flows along some graph edge during backpropagation). +/// Can be returned in 'grad_outputs' by an invocation of 'AddSymbolicGradients' +/// (note that gradient flow through an Output can be stopped through the use of +/// the StopGradient node). +Output NoGradient(); } // namespace tensorflow diff --git a/tensorflow/cc/framework/gradients_test.cc b/tensorflow/cc/framework/gradients_test.cc index 9ae927a762..6e9ff3e01c 100644 --- a/tensorflow/cc/framework/gradients_test.cc +++ b/tensorflow/cc/framework/gradients_test.cc @@ -90,7 +90,7 @@ TEST_F(GradientsTest, OneMatMul) { } else { // Call AddSymbolicGradients. auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}}); - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK( AddSymbolicGradients(scope, {z}, {x, y}, {dz}, &grad_outputs)); } @@ -123,7 +123,7 @@ TEST_F(GradientsTest, TwoMatMuls_Chained) { } else { // Call AddSymbolicGradients. auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}}); - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK( AddSymbolicGradients(scope, {z}, {u, v}, {dz}, &grad_outputs)); } @@ -160,7 +160,7 @@ TEST_F(GradientsTest, TwoMatMuls_Independent) { // Call AddSymbolicGradients. auto dv = Const(scope, {{1.0, 1.0}, {1.0, 1.0}}); auto dz = Const(scope, {{1.0, 1.0}, {1.0, 1.0}}); - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK(AddSymbolicGradients(scope, {v, z}, {t, u, x, y}, {dv, dz}, &grad_outputs)); } @@ -191,7 +191,7 @@ TEST_F(GradientsTest, PackUnpack_Chained) { auto pack_grad = Unpack(scope, unpack_grad.output, 3); } else { // Call AddSymbolicGradients. - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK(AddSymbolicGradients(scope, unpack.output, {a, b, c}, {dx, dy, dz}, &grad_outputs)); } @@ -225,7 +225,7 @@ TEST_F(GradientsTest, PackUnpack_StopBackprop) { auto unpack_grad = Pack(scope, {dx, dy, dz}); } else { // Call AddSymbolicGradients. - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK(AddSymbolicGradients(scope, unpack.output, {pack}, {dx, dy, dz}, &grad_outputs)); } @@ -252,7 +252,7 @@ TEST_F(GradientsTest, DependentGradOutputs) { // The gradient w.r.t to 'v' (returned in grad_outputs[0]) is dependent on // the gradient w.r.t. to 'x' (returned in grad_outputs[1]). auto dz = Const(scope_test_, {{5}}); - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK( AddSymbolicGradients(scope_test_, {z}, {v, x}, {dz}, &grad_outputs)); @@ -281,7 +281,7 @@ TEST_F(GradientsTest, MultipleNodeOutputGrads) { {3, 4, 2}); // clang-format on - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {pack}, unpack.output, {dx}, &grad_outputs)); @@ -333,7 +333,7 @@ class StopGradientSingleOutputMultiEdgeTest : public ::testing::Test { auto g2 = Const(scope_, {{9, 10}, {11, 12}}); // Call AddSymbolicGradients and compare against 'expected_grad'. - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_EXPECT_OK(AddSymbolicGradients(scope_, {out0, out1, out2}, {z}, {g0, g1, g2}, &grad_outputs)); @@ -410,7 +410,7 @@ class StopGradientMultiOutputTest : public ::testing::Test { auto g2 = Const(scope_, {17, 18, 19, 20, 21, 22, 23, 24}, {2, 4}); // Call AddSymbolicGradients and compare against 'expected_grad'. - std::vector<ops::Output> grad_outputs; + std::vector<Output> grad_outputs; TF_EXPECT_OK(AddSymbolicGradients(scope_, {out0, out1, out2}, {x}, {g0, g1, g2}, &grad_outputs)); diff --git a/tensorflow/cc/framework/ops.h b/tensorflow/cc/framework/ops.h index 82ba9c68f0..32086d4123 100644 --- a/tensorflow/cc/framework/ops.h +++ b/tensorflow/cc/framework/ops.h @@ -28,7 +28,7 @@ namespace tensorflow { class Output; -// Represents a node in the computation graph. +/// Represents a node in the computation graph. class Operation { public: Operation() : node_(nullptr) {} @@ -56,7 +56,7 @@ class Operation { Node* node_; }; -// Represents a tensor value produced by an Operation. +/// Represents a tensor value produced by an Operation. class Output { public: Output() = default; @@ -87,18 +87,18 @@ struct OutputHash { } }; -// Represents a tensor value that can be used as an operand to an Operation. +/// Represents a tensor value that can be used as an operand to an Operation. class Input { public: - // Initializer enables constructing an Input object from various kinds of C++ - // constants such as simple primitive constants and nested initializer lists - // representing a multi-dimensional array. Initializer constructors are all - // templates, so the aforementioned kinds of C++ constants can be used to - // construct an Initializer. Initializer stores the value it got constructed - // with in a Tensor object. + /// Initializer enables constructing an Input object from various kinds of C++ + /// constants such as simple primitive constants and nested initializer lists + /// representing a multi-dimensional array. Initializer constructors are all + /// templates, so the aforementioned kinds of C++ constants can be used to + /// construct an Initializer. Initializer stores the value it got constructed + /// with in a Tensor object. struct Initializer { - // Construct from a scalar value of an arithmetic type or a type that can be - // converted to a string (eg. a string literal). + /// Construct from a scalar value of an arithmetic type or a type that can + /// be converted to a string (eg. a string literal). template <typename T, typename = typename std::enable_if< std::is_arithmetic<T>::value || std::is_convertible<T, string>::value>::type> @@ -111,7 +111,7 @@ class Input { Initializer(const Tensor& t) : tensor(t) {} // NOLINT(runtime/explicit) - // Construct from a scalar value and an explicit shape + /// Construct from a scalar value and an explicit shape template <typename T, typename = typename std::enable_if< std::is_arithmetic<T>::value || std::is_convertible<T, string>::value>::type> @@ -124,7 +124,7 @@ class Input { tensor = t; } - // Construct from a initializer list of scalars (a one-dimensional tensor). + /// Construct from a initializer list of scalars (a one-dimensional tensor). template <typename T, typename = typename std::enable_if< std::is_arithmetic<T>::value || std::is_convertible<T, string>::value>::type> @@ -137,7 +137,7 @@ class Input { tensor = t; } - // Construct from a initializer list of scalars and an explicit shape. + /// Construct from a initializer list of scalars and an explicit shape. template <typename T, typename = typename std::enable_if< std::is_arithmetic<T>::value || std::is_convertible<T, string>::value>::type> @@ -154,11 +154,11 @@ class Input { tensor = t; } - // Construct a multi-dimensional tensor from a nested initializer list. Note - // that C++ syntax allows nesting of arbitrarily typed initializer lists, so - // such invalid initializers cannot be disallowed at compile time. This - // function performs checks to make sure that the nested initializer list is - // indeed a valid multi-dimensional tensor. + /// Construct a multi-dimensional tensor from a nested initializer + /// list. Note that C++ syntax allows nesting of arbitrarily typed + /// initializer lists, so such invalid initializers cannot be disallowed at + /// compile time. This function performs checks to make sure that the nested + /// initializer list is indeed a valid multi-dimensional tensor. Initializer(const std::initializer_list<Initializer>& v); template <typename T, bool = std::is_convertible<T, string>::value> @@ -185,14 +185,14 @@ class Input { Tensor tensor; }; - // All of Input's constructors are implicit. Input can be implicitly - // constructed from the following objects : - // * Output: This is so that the output of an Operation can be directly used - // as the input to a op wrapper, which takes Inputs. - // * A scalar, or a multi-dimensional tensor specified as a recursive - // initializer list. This enables directly passing constants as - // inputs to op wrappers. - // * A Tensor object. + /// All of Input's constructors are implicit. Input can be implicitly + /// constructed from the following objects : + /// * Output: This is so that the output of an Operation can be directly used + /// as the input to a op wrapper, which takes Inputs. + /// * A scalar, or a multi-dimensional tensor specified as a recursive + /// initializer list. This enables directly passing constants as + /// inputs to op wrappers. + /// * A Tensor object. Input(const Output& o) : output_(o) {} // NOLINT(runtime/explicit) template <typename T, typename = typename std::enable_if< @@ -220,8 +220,8 @@ class Input { tensor_ = Initializer(init).tensor; } - // Constructor specifying a node name, index and datatype. This should only be - // used for specifying a backward edge, needed by control flow. + /// Constructor specifying a node name, index and datatype. This should only + /// be used for specifying a backward edge, needed by control flow. Input(const string& name, int i, DataType dt) : node_name_(name), index_(i), data_type_(dt) {} @@ -241,15 +241,15 @@ class Input { DataType data_type_ = DT_INVALID; }; -// A type for representing the output of ops that produce more than one output, -// or a list of tensors. +/// A type for representing the output of ops that produce more than one output, +/// or a list of tensors. typedef std::vector<Output> OutputList; -// A type for representing the input to ops that require a list of tensors. +/// A type for representing the input to ops that require a list of tensors. class InputList { public: - // Implicitly convert a list of outputs to a list of inputs. This is useful to - // write code such as ops::Concat(ops::Split(x, 4)). + /// Implicitly convert a list of outputs to a list of inputs. This is useful + /// to write code such as ops::Concat(ops::Split(x, 4)). InputList(const OutputList& out) { // NOLINT(runtime/explicit) for (auto const& x : out) { inputs_.push_back(x); diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc index 2bce24f2fc..e1af5b36e8 100644 --- a/tensorflow/cc/framework/scope.cc +++ b/tensorflow/cc/framework/scope.cc @@ -70,14 +70,14 @@ Scope::Scope(const Scope& other, Scope::Tags::OpName, const string& name, colocation_constraints_(other.colocation_constraints_) {} Scope::Scope(const Scope& other, Scope::Tags::ControlDeps, - std::vector<ops::Operation> control_deps, bool clear_control_deps) + std::vector<Operation> control_deps, bool clear_control_deps) : graph_(other.graph_), status_(other.status_), name_map_(other.name_map_), refiner_(other.refiner_), scope_used_(other.scope_used_), control_deps_(clear_control_deps - ? std::vector<ops::Operation>() + ? std::vector<Operation>() : (control_deps.insert(control_deps.begin(), other.control_deps_.begin(), other.control_deps_.end()), @@ -148,7 +148,7 @@ Scope::Scope(const Scope& other, Scope::Tags::KernelLabel, colocation_constraints_(other.colocation_constraints_) {} Scope::Scope(const Scope& other, Scope::Tags::Colocate, - const ops::Operation& colocate_with_op, bool clear_colocations) + const Operation& colocate_with_op, bool clear_colocations) : graph_(other.graph_), status_(other.status_), name_map_(other.name_map_), @@ -166,7 +166,7 @@ Scope::Scope(const Scope& other, Scope::Tags::Colocate, : other.GetColocationConstraints(colocate_with_op)) {} std::unordered_set<string> Scope::GetColocationConstraints( - const ops::Operation& colocate_with_op) const { + const Operation& colocate_with_op) const { std::unordered_set<string> current_constraints(colocation_constraints_); const NodeDef& node_def = colocate_with_op.node()->def(); std::vector<string> node_constraints; @@ -298,21 +298,20 @@ Scope Scope::WithOpName(const string& op_name) const { } Scope Scope::WithControlDependencies( - const gtl::ArraySlice<ops::Operation>& control_deps) const { - return Scope( - *this, Scope::Tags::ControlDeps(), - std::vector<ops::Operation>(control_deps.begin(), control_deps.end()), - /* clear_control_deps */ false); + const gtl::ArraySlice<Operation>& control_deps) const { + return Scope(*this, Scope::Tags::ControlDeps(), + std::vector<Operation>(control_deps.begin(), control_deps.end()), + /* clear_control_deps */ false); } -Scope Scope::WithControlDependencies(const ops::Output& control_dep) const { +Scope Scope::WithControlDependencies(const Output& control_dep) const { return Scope(*this, Scope::Tags::ControlDeps(), - std::vector<ops::Operation>(1, control_dep.op()), + std::vector<Operation>(1, control_dep.op()), /* clear_control_deps */ false); } Scope Scope::WithNoControlDependencies() const { - return Scope(*this, Scope::Tags::ControlDeps(), std::vector<ops::Operation>(), + return Scope(*this, Scope::Tags::ControlDeps(), std::vector<Operation>(), /* clear_control_deps */ true); } @@ -320,13 +319,13 @@ Scope Scope::WithDevice(const string& device) const { return Scope(*this, Scope::Tags::Device(), device); } -Scope Scope::ColocateWith(const ops::Operation& op) const { +Scope Scope::ColocateWith(const Operation& op) const { return Scope(*this, Scope::Tags::Colocate(), op, /* clear_colocations */ false); } Scope Scope::ClearColocation() const { - return Scope(*this, Scope::Tags::Colocate(), ops::Operation(), + return Scope(*this, Scope::Tags::Colocate(), Operation(), /* clear_colocations */ true); } diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h index edf25e2227..47d1026bb2 100644 --- a/tensorflow/cc/framework/scope.h +++ b/tensorflow/cc/framework/scope.h @@ -33,129 +33,136 @@ class GraphDef; class NodeBuilder; struct CompositeOpScopes; -// A `Scope` object represents a set of related TensorFlow ops that have the -// same properties such as a common name prefix. -// A Scope object is a container for TensorFlow Op properties. Op constructors -// get a Scope object as a mandatory first argument and the constructed op -// acquires the properties in the object. -// -// A simple example: -// -// using namespace ops; -// Scope root = Scope::NewRootScope(); -// auto c1 = Const(root, {{1, 1}}); -// auto m = MatMul(root, c1, {{41}, {1}}); -// GraphDef gdef; -// Status s = root.ToGraphDef(&gdef); -// if (!s.ok()) { /* Handle error */ } -// -// Scope hierarchy: -// The Scope class provides various With<> functions that create a new scope. -// The new scope typically has one property changed while other properties are -// inherited from the parent scope. -// NewSubScope(name) method appends `name` to the prefix of names for ops -// created within the scope, and WithOpName() changes the suffix which -// otherwise defaults to the type of the op. -// -// Name examples: -// Scope root = Scope::NewRootScope(); -// Scope linear = root.NewSubScope("linear"); -// /* W will be named "linear/W" */ -// auto W = Variable(linear.WithOpName("W"), -// {2, 2}, DT_FLOAT); -// /* b will be named "linear/b" */ -// auto b = Variable(linear.WithOpName("b"), -// {2}, DT_FLOAT); -// auto x = Const(linear, {...}); // name: "linear/Const" -// auto m = MatMul(linear, x, W); // name: "linear/MatMul" -// auto r = BiasAdd(linear, m, b); // name: "linear/BiasAdd" -// -// Scope lifetime: -// A new scope is created by calling Scope::NewRootScope. This creates some -// resources that are shared by all the child scopes that inherit from this -// scope, directly or transitively. For instance, a new scope creates a new -// Graph object to which operations are added when the new scope or its children -// are used by an Op constructor. The new scope also has a Status object which -// will be used to indicate errors by Op-constructor functions called on any -// child scope. The Op-constructor functions have to check the scope's status by -// calling the ok() method before proceeding to construct the op. -// -// Thread safety: -// A `Scope` object is NOT thread-safe. Threads cannot concurrently call -// op-constructor functions on the same `Scope` object. +/// A `Scope` object represents a set of related TensorFlow ops that have the +/// same properties such as a common name prefix. +/// +/// A Scope object is a container for TensorFlow Op properties. Op constructors +/// get a Scope object as a mandatory first argument and the constructed op +/// acquires the properties in the object. +/// +/// A simple example: +/// +/// using namespace ops; +/// Scope root = Scope::NewRootScope(); +/// auto c1 = Const(root, { {1, 1} }); +/// auto m = MatMul(root, c1, { {41}, {1} }); +/// GraphDef gdef; +/// Status s = root.ToGraphDef(&gdef); +/// if (!s.ok()) { ... } +/// +/// Scope hierarchy: +/// +/// The Scope class provides various With<> functions that create a new scope. +/// The new scope typically has one property changed while other properties are +/// inherited from the parent scope. +/// NewSubScope(name) method appends `name` to the prefix of names for ops +/// created within the scope, and WithOpName() changes the suffix which +/// otherwise defaults to the type of the op. +/// +/// Name examples: +/// +/// Scope root = Scope::NewRootScope(); +/// Scope linear = root.NewSubScope("linear"); +/// // W will be named "linear/W" +/// auto W = Variable(linear.WithOpName("W"), +/// {2, 2}, DT_FLOAT); +/// // b will be named "linear/b" +/// auto b = Variable(linear.WithOpName("b"), +/// {2}, DT_FLOAT); +/// auto x = Const(linear, {...}); // name: "linear/Const" +/// auto m = MatMul(linear, x, W); // name: "linear/MatMul" +/// auto r = BiasAdd(linear, m, b); // name: "linear/BiasAdd" +/// +/// Scope lifetime: +/// +/// A new scope is created by calling Scope::NewRootScope. This creates some +/// resources that are shared by all the child scopes that inherit from this +/// scope, directly or transitively. For instance, a new scope creates a new +/// Graph object to which operations are added when the new scope or its +/// children are used by an Op constructor. The new scope also has a Status +/// object which will be used to indicate errors by Op-constructor functions +/// called on any child scope. The Op-constructor functions have to check the +/// scope's status by calling the ok() method before proceeding to construct the +/// op. +/// +/// Thread safety: +/// +/// A `Scope` object is NOT thread-safe. Threads cannot concurrently call +/// op-constructor functions on the same `Scope` object. class Scope { public: // The following functions are for users making graphs. They return brand new // scopes, or scopes derived from an existing scope object. - // Return a new scope. - // This creates a new graph and all operations constructed in this graph - // should use the returned object as the "root" scope. + /// Return a new scope. + /// This creates a new graph and all operations constructed in this graph + /// should use the returned object as the "root" scope. static Scope NewRootScope(); - // Return a new scope. Ops created with this scope will have - // <name>/<child_scope_name> as the prefix. The actual name will be unique - // in the current scope. All other properties are inherited from the current - // scope. If child_scope_name is empty, the '/' is elided. + /// Return a new scope. Ops created with this scope will have + /// <name>/<child_scope_name> as the prefix. The actual name will be unique + /// in the current scope. All other properties are inherited from the current + /// scope. If child_scope_name is empty, the '/' is elided. Scope NewSubScope(const string& child_scope_name) const; - // Return a new scope. All ops created within the returned scope will have - // names of the form <name>/<op_name>[_<suffix]. + /// Return a new scope. All ops created within the returned scope will have + /// names of the form <name>/<op_name>[_<suffix]. Scope WithOpName(const string& op_name) const; - // Return a new scope. All ops created within the returned scope will have as - // control dependencies the union of operations in the control_deps vector and - // the control dependencies of the current scope. + /// Return a new scope. All ops created within the returned scope will have as + /// control dependencies the union of operations in the control_deps vector + /// and the control dependencies of the current scope. Scope WithControlDependencies( - const gtl::ArraySlice<ops::Operation>& control_deps) const; - // Same as above, but convenient to add control dependency on the operation - // producing the control_dep output. - Scope WithControlDependencies(const ops::Output& control_dep) const; + const gtl::ArraySlice<Operation>& control_deps) const; + /// Same as above, but convenient to add control dependency on the operation + /// producing the control_dep output. + Scope WithControlDependencies(const Output& control_dep) const; - // Return a new scope. All ops created within the returned scope will have no - // control dependencies on other operations. + /// Return a new scope. All ops created within the returned scope will have no + /// control dependencies on other operations. Scope WithNoControlDependencies() const; - // Return a new scope. All ops created within the returned scope will have the - // device field set to 'device'. + /// Return a new scope. All ops created within the returned scope will have + /// the device field set to 'device'. Scope WithDevice(const string& device) const; - // Return a new scope. All ops created within the returned scope will be - // co-located on the device where op is placed. - // NOTE: This function is intended to be use internal libraries only for - // controlling placement of ops on to devices. Public use is not encouraged - // because the implementation of device placement is subject to change. - Scope ColocateWith(const ops::Operation& op) const; - // Convenience function for above. - Scope ColocateWith(const ops::Output& out) const { - return ColocateWith(out.op()); - } - // Clear all colocation constraints. + /// Return a new scope. All ops created within the returned scope will be + /// co-located on the device where op is placed. + /// NOTE: This function is intended to be use internal libraries only for + /// controlling placement of ops on to devices. Public use is not encouraged + /// because the implementation of device placement is subject to change. + Scope ColocateWith(const Operation& op) const; + /// Convenience function for above. + Scope ColocateWith(const Output& out) const { return ColocateWith(out.op()); } + /// Clear all colocation constraints. Scope ClearColocation() const; - // Return a new scope. The op-constructor functions taking the returned scope - // as the scope argument will exit as soon as an error is detected, instead of - // setting the status on the scope. + /// Return a new scope. The op-constructor functions taking the returned scope + /// as the scope argument will exit as soon as an error is detected, instead + /// of setting the status on the scope. Scope ExitOnError() const; - // Return a new scope. All ops created with the new scope will have - // kernel_label as the value for their '_kernel' attribute; + /// Return a new scope. All ops created with the new scope will have + /// kernel_label as the value for their '_kernel' attribute; Scope WithKernelLabel(const string& kernel_label) const; // The following functions are for scope object consumers. - // Return a unique name, using default_name if an op name has not been - // specified. + /// Return a unique name, using default_name if an op name has not been + /// specified. string GetUniqueNameForOp(const string& default_name) const; - // Update the status on this scope. - // Note: The status object is shared between all children of this scope. - // If the resulting status is not Status::OK() and exit_on_error_ is set on - // this scope, this function exits by calling LOG(FATAL). + /// Update the status on this scope. + /// Note: The status object is shared between all children of this scope. + /// If the resulting status is not Status::OK() and exit_on_error_ is set on + /// this scope, this function exits by calling LOG(FATAL). void UpdateStatus(const Status s) const; - // Update the builder with properties accumulated in this scope. + // START_SKIP_DOXYGEN + + /// Update the builder with properties accumulated in this scope. void UpdateBuilder(NodeBuilder* builder) const; + // END_SKIP_DOXYGEN CompositeOpScopes GetCompositeOpScopes(const string& composite_op_name) const; @@ -169,23 +176,24 @@ class Scope { Status status() const { return *status_; } - // If status() is Status::OK(), convert the Graph object stored in this scope - // to a GraphDef proto and return Status::OK(). Otherwise, return the error - // status as is without performing GraphDef conversion. + /// If status() is Status::OK(), convert the Graph object stored in this scope + /// to a GraphDef proto and return Status::OK(). Otherwise, return the error + /// status as is without performing GraphDef conversion. Status ToGraphDef(GraphDef* gdef) const; - // If status() is Status::OK(), construct a Graph object using the default - // GraphConstructorOptions, and return Status::OK if graph construction was - // successful. Otherwise, return the error status. + // START_SKIP_DOXYGEN + + /// If status() is Status::OK(), construct a Graph object using the default + /// GraphConstructorOptions, and return Status::OK if graph construction was + /// successful. Otherwise, return the error status. // TODO(josh11b, keveman): Make this faster; right now it converts // Graph->GraphDef->Graph. This cleans up the graph (e.g. adds // edges from the source and to the sink node, resolves back edges // by name), and makes sure the resulting graph is valid. Status ToGraph(Graph* g) const; + // END_SKIP_DOXYGEN - const std::vector<ops::Operation>& control_deps() const { - return control_deps_; - } + const std::vector<Operation>& control_deps() const { return control_deps_; } private: // Tag types to choose the constructor to dispatch. @@ -214,16 +222,16 @@ class Scope { Scope(const Scope& other, Tags::OpName, const string& name, const string& op_name); Scope(const Scope& other, Tags::ControlDeps, - std::vector<ops::Operation> control_deps, bool clear_control_deps); + std::vector<Operation> control_deps, bool clear_control_deps); Scope(const Scope& other, Tags::Device, const string& device); Scope(const Scope& other, Tags::SingleUseScope, const string& op_name); Scope(const Scope& other, Tags::ExitOnError); Scope(const Scope& other, Tags::KernelLabel, const string& kernel_label); - Scope(const Scope& other, Tags::Colocate, - const ops::Operation& colocate_with_op, bool clear_colocations); + Scope(const Scope& other, Tags::Colocate, const Operation& colocate_with_op, + bool clear_colocations); std::unordered_set<string> GetColocationConstraints( - const ops::Operation& colocate_with_op) const; + const Operation& colocate_with_op) const; // Helper functions to get a unique names. string GetUniqueName(const string& prefix, bool check_single_use) const; @@ -245,7 +253,7 @@ class Scope { // GetUniqueNameForOp will cause an error status to be set on this scope. std::shared_ptr<bool> scope_used_ = nullptr; - const std::vector<ops::Operation> control_deps_; + const std::vector<Operation> control_deps_; const string name_ = ""; const string op_name_ = ""; @@ -255,13 +263,13 @@ class Scope { const std::unordered_set<string> colocation_constraints_; }; -// A helper struct to hold the scopes that would be used by a function -// constructing a composite op. +/// A helper struct to hold the scopes that would be used by a function +/// constructing a composite op. struct CompositeOpScopes { - // Scope to be used for creating the local ops (primitive or other composite - // ops). + /// Scope to be used for creating the local ops (primitive or other composite + /// ops). Scope child; - // Scope to be used for creating the last op. + /// Scope to be used for creating the last op. Scope last; }; diff --git a/tensorflow/cc/framework/scope_test.cc b/tensorflow/cc/framework/scope_test.cc index 3882b5623b..9eca9d3fac 100644 --- a/tensorflow/cc/framework/scope_test.cc +++ b/tensorflow/cc/framework/scope_test.cc @@ -127,11 +127,11 @@ TEST(ScopeTest, SingleUseScope) { TEST(ScopeTest, ControlDeps) { Scope root = Scope::NewRootScope(); - auto c1 = ops::Operation(); - auto c2 = ops::Operation(); + auto c1 = Operation(); + auto c2 = Operation(); Scope c = root.WithControlDependencies({c1, c2}); EXPECT_EQ(c.control_deps().size(), 2); - Scope c_c = c.WithControlDependencies({ops::Operation()}); + Scope c_c = c.WithControlDependencies({Operation()}); EXPECT_EQ(c_c.control_deps().size(), 3); } diff --git a/tensorflow/cc/framework/testutil.cc b/tensorflow/cc/framework/testutil.cc index 58afc6b979..b0746913a1 100644 --- a/tensorflow/cc/framework/testutil.cc +++ b/tensorflow/cc/framework/testutil.cc @@ -20,8 +20,6 @@ limitations under the License. #include "tensorflow/core/graph/default_device.h" namespace tensorflow { -using namespace ops; // NOLINT(build/namespaces) - namespace test { void GetTensors(const Scope& scope, OutputList tensors, diff --git a/tensorflow/cc/framework/testutil.h b/tensorflow/cc/framework/testutil.h index 5e67ede6ab..d027ad3744 100644 --- a/tensorflow/cc/framework/testutil.h +++ b/tensorflow/cc/framework/testutil.h @@ -22,12 +22,12 @@ limitations under the License. namespace tensorflow { namespace test { -// Computes the outputs listed in 'tensors', returns the tensors in 'out'. -void GetTensors(const Scope& scope, ops::OutputList tensors, +/// Computes the outputs listed in 'tensors', returns the tensors in 'out'. +void GetTensors(const Scope& scope, OutputList tensors, std::vector<Tensor>* out); -// Computes the output 'tensor', returning the resulting tensor in 'out'. -void GetTensor(const Scope& scope, ops::Output tensor, Tensor* out); +/// Computes the output 'tensor', returning the resulting tensor in 'out'. +void GetTensor(const Scope& scope, Output tensor, Tensor* out); } // namespace test } // namespace tensorflow diff --git a/tensorflow/cc/gradients/grad_testutil.h b/tensorflow/cc/gradients/grad_testutil.h index 7a925f9b0e..d31f412754 100644 --- a/tensorflow/cc/gradients/grad_testutil.h +++ b/tensorflow/cc/gradients/grad_testutil.h @@ -22,12 +22,12 @@ limitations under the License. namespace tensorflow { namespace test { -// Calls the gradient function registered for 'op', adding gradient operations -// to the graph associated with 'scope'. Gradient outputs for each 'op' input -// are returned in 'grad_outputs'. -Status CallGradFunction(const Scope& scope, const ops::Operation& op, - const std::vector<ops::Output>& grad_inputs, - std::vector<ops::Output>* grad_outputs); +/// Calls the gradient function registered for 'op', adding gradient operations +/// to the graph associated with 'scope'. Gradient outputs for each 'op' input +/// are returned in 'grad_outputs'. +Status CallGradFunction(const Scope& scope, const Operation& op, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs); } // namespace test } // namespace tensorflow diff --git a/tensorflow/cc/saved_model/constants.h b/tensorflow/cc/saved_model/constants.h index 654e765170..7f2d560978 100644 --- a/tensorflow/cc/saved_model/constants.h +++ b/tensorflow/cc/saved_model/constants.h @@ -18,25 +18,25 @@ limitations under the License. namespace tensorflow { -// SavedModel assets directory. +/// SavedModel assets directory. constexpr char kSavedModelAssetsDirectory[] = "assets"; -// SavedModel assets key for graph collection-def. +/// SavedModel assets key for graph collection-def. constexpr char kSavedModelAssetsKey[] = "saved_model_assets"; -// SavedModel proto filename. +/// SavedModel proto filename. constexpr char kSavedModelFilenamePb[] = "saved_model.pb"; -// SavedModel text format proto filename. +/// SavedModel text format proto filename. constexpr char kSavedModelFilenamePbTxt[] = "saved_model.pbtxt"; -// SavedModel legacy init op key. +/// SavedModel legacy init op key. constexpr char kSavedModelLegacyInitOpKey[] = "legacy_init_op"; -// Directory in which to save the SavedModel variables. +/// Directory in which to save the SavedModel variables. constexpr char kSavedModelVariablesDirectory[] = "variables"; -// SavedModel variables filename. +/// SavedModel variables filename. constexpr char kSavedModelVariablesFilename[] = "variables"; } // namespace tensorflow diff --git a/tensorflow/cc/saved_model/loader.h b/tensorflow/cc/saved_model/loader.h index 10157b0a99..9b9abdbb1f 100644 --- a/tensorflow/cc/saved_model/loader.h +++ b/tensorflow/cc/saved_model/loader.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// SavedModel loading functions and SavedModelBundle struct. +/// SavedModel loading functions and SavedModelBundle struct. #ifndef THIRD_PARTY_TENSORFLOW_CC_SAVED_MODEL_LOADER_H_ #define THIRD_PARTY_TENSORFLOW_CC_SAVED_MODEL_LOADER_H_ @@ -27,13 +27,13 @@ limitations under the License. namespace tensorflow { -// SavedModel representation once the SavedModel is loaded from storage. +/// SavedModel representation once the SavedModel is loaded from storage. struct SavedModelBundle { std::unique_ptr<Session> session; MetaGraphDef meta_graph_def; - // A TensorFlow Session does not Close itself on destruction. To avoid - // resource leaks, we explicitly call Close on Sessions that we create. + /// A TensorFlow Session does not Close itself on destruction. To avoid + /// resource leaks, we explicitly call Close on Sessions that we create. ~SavedModelBundle() { if (session) { session->Close(); @@ -43,20 +43,20 @@ struct SavedModelBundle { SavedModelBundle() = default; }; -// Loads a SavedModel from the specified export directory. The meta graph def to -// be loaded is identified by the supplied tags, corresponding exactly to the -// set of tags used at SavedModel build time. Returns a SavedModel bundle with a -// session and the requested meta graph def, if found. +/// Loads a SavedModel from the specified export directory. The meta graph def +/// to be loaded is identified by the supplied tags, corresponding exactly to +/// the set of tags used at SavedModel build time. Returns a SavedModel bundle +/// with a session and the requested meta graph def, if found. Status LoadSavedModel(const SessionOptions& session_options, const RunOptions& run_options, const string& export_dir, const std::unordered_set<string>& tags, SavedModelBundle* const bundle); -// Checks whether the provided directory could contain a SavedModel. Note that -// the method does not load any data by itself. If the method returns `false`, -// the export directory definitely does not contain a SavedModel. If the method -// returns `true`, the export directory may contain a SavedModel but provides no -// guarantee that it can be loaded. +/// Checks whether the provided directory could contain a SavedModel. Note that +/// the method does not load any data by itself. If the method returns `false`, +/// the export directory definitely does not contain a SavedModel. If the method +/// returns `true`, the export directory may contain a SavedModel but provides +/// no guarantee that it can be loaded. bool MaybeSavedModelDirectory(const string& export_dir); } // namespace tensorflow diff --git a/tensorflow/cc/saved_model/signature_constants.h b/tensorflow/cc/saved_model/signature_constants.h index 5a784874cd..b2d39bd55b 100644 --- a/tensorflow/cc/saved_model/signature_constants.h +++ b/tensorflow/cc/saved_model/signature_constants.h @@ -18,48 +18,48 @@ limitations under the License. namespace tensorflow { -// Key in the signature def map for `default` serving signatures. The default -// signature is used in inference requests where a specific signature was not -// specified. +/// Key in the signature def map for `default` serving signatures. The default +/// signature is used in inference requests where a specific signature was not +/// specified. static constexpr char kDefaultServingSignatureDefKey[] = "serving_default"; //////////////////////////////////////////////////////////////////////////////// -// Classification API constants. +/// Classification API constants. -// Classification inputs. +/// Classification inputs. static constexpr char kClassifyInputs[] = "inputs"; -// Classification method name used in a SignatureDef. +/// Classification method name used in a SignatureDef. static constexpr char kClassifyMethodName[] = "tensorflow/serving/classify"; -// Classification classes output. +/// Classification classes output. static constexpr char kClassifyOutputClasses[] = "classes"; -// Classification scores output. +/// Classification scores output. static constexpr char kClassifyOutputScores[] = "scores"; //////////////////////////////////////////////////////////////////////////////// -// Predict API constants. +/// Predict API constants. -// Predict inputs. +/// Predict inputs. static constexpr char kPredictInputs[] = "inputs"; -// Predict method name used in a SignatureDef. +/// Predict method name used in a SignatureDef. static constexpr char kPredictMethodName[] = "tensorflow/serving/predict"; -// Predict outputs. +/// Predict outputs. static constexpr char kPredictOutputs[] = "outputs"; //////////////////////////////////////////////////////////////////////////////// -// Regression API constants. +/// Regression API constants. -// Regression inputs. +/// Regression inputs. static constexpr char kRegressInputs[] = "inputs"; -// Regression method name used in a SignatureDef. +/// Regression method name used in a SignatureDef. static constexpr char kRegressMethodName[] = "tensorflow/serving/regress"; -// Regression outputs. +/// Regression outputs. static constexpr char kRegressOutputs[] = "outputs"; //////////////////////////////////////////////////////////////////////////////// diff --git a/tensorflow/cc/saved_model/tag_constants.h b/tensorflow/cc/saved_model/tag_constants.h index 8c4d12a57f..48ab1158e4 100644 --- a/tensorflow/cc/saved_model/tag_constants.h +++ b/tensorflow/cc/saved_model/tag_constants.h @@ -18,10 +18,10 @@ limitations under the License. namespace tensorflow { -// Tag for the `serving` graph. +/// Tag for the `serving` graph. constexpr char kSavedModelTagServe[] = "serve"; -// Tag for the `training` graph.` +/// Tag for the `training` graph.` constexpr char kSavedModelTagTrain[] = "train"; } // namespace tensorflow diff --git a/tensorflow/cc/training/coordinator.h b/tensorflow/cc/training/coordinator.h index 58e95f40f6..dbcf072015 100644 --- a/tensorflow/cc/training/coordinator.h +++ b/tensorflow/cc/training/coordinator.h @@ -28,77 +28,77 @@ limitations under the License. namespace tensorflow { -// The abstract interface for runners which must implement the Join function. +/// The abstract interface for runners which must implement the Join function. class RunnerInterface { public: virtual ~RunnerInterface() {} virtual Status Join() = 0; - // Returns true iff the runner is running, i.e. if it is trying to populate - // its queue. + /// Returns true iff the runner is running, i.e. if it is trying to populate + /// its queue. virtual bool IsRunning() const = 0; }; -// Coordinator class manages the termination of a collection of QueueRunners. -// Without a coordinator, QueueRunners have to be joined in a specific order; -// otherwise the QueueRunner::Join() could sometimes hang. The -// Coordinator::RequestStop() plays the key role which notifies all running -// threads under a coordinator to stop. This function could be called by any -// thread or any client. -// Usage, in the client: -// Coordinator coord; -// std::unique_ptr<QueueRunner> qr(&coord, ...); -// qr.Start(session); -// coord.RegisterRunner(std::move(qr)); -// // do some work -// TF_CHECK_OK(coord.Join()); -// In each thread of QueueRunner, the coordinator needs to be used as: -// void Run() { -// while (!coord->ShouldStop()) { -// // do some work -// if (error) { -// coord->RequestStop(); -// coord->ReportStatus(error_status); -// } -// } -// } +/// Coordinator class manages the termination of a collection of QueueRunners. +/// Without a coordinator, QueueRunners have to be joined in a specific order; +/// otherwise the QueueRunner::Join() could sometimes hang. The +/// Coordinator::RequestStop() plays the key role which notifies all running +/// threads under a coordinator to stop. This function could be called by any +/// thread or any client. +/// Usage, in the client: +/// Coordinator coord; +/// std::unique_ptr<QueueRunner> qr(&coord, ...); +/// qr.Start(session); +/// coord.RegisterRunner(std::move(qr)); +/// /// do some work +/// TF_CHECK_OK(coord.Join()); +/// In each thread of QueueRunner, the coordinator needs to be used as: +/// void Run() { +/// while (!coord->ShouldStop()) { +/// /// do some work +/// if (error) { +/// coord->RequestStop(); +/// coord->ReportStatus(error_status); +/// } +/// } +/// } class Coordinator { public: Coordinator(); - // Constructor with a list of error codes which would not be taken as errors - // in status reporting. + /// Constructor with a list of error codes which would not be taken as errors + /// in status reporting. Coordinator(const std::vector<error::Code>& clean_stop_errors); - // In the destructor, RequestStop() and Join() would be called. + /// In the destructor, RequestStop() and Join() would be called. ~Coordinator(); - // Registers a runner, i.e. a unit of running threads which is usually a - // QueueRunner. It takes the ownership of runner to avoid lifecycle-related - // problems. Note, the coordinator would not start these threads; they are - // supposed to be in running state when they are registered here. + /// Registers a runner, i.e. a unit of running threads which is usually a + /// QueueRunner. It takes the ownership of runner to avoid lifecycle-related + /// problems. Note, the coordinator would not start these threads; they are + /// supposed to be in running state when they are registered here. Status RegisterRunner(std::unique_ptr<RunnerInterface> runner); - // Returns true iff all the registered runners have been stopped. + /// Returns true iff all the registered runners have been stopped. bool AllRunnersStopped(); - // Requests all running threads to stop. + /// Requests all running threads to stop. Status RequestStop(); - // Returns true if its RequestStop() has been called. + /// Returns true if its RequestStop() has been called. bool ShouldStop(); - // Joins all threads, returns OK or the first reported and unexpected status. + /// Joins all threads, returns OK or the first reported and unexpected status. Status Join(); - // Reports status to the coordinator. This is usually called by threads. + /// Reports status to the coordinator. This is usually called by threads. void ReportStatus(const Status& status); - // Returns the latest status. + /// Returns the latest status. Status GetStatus(); - // Returns immediately if the coordinator is stopped or blocks until - // RequestStop() is called. + /// Returns immediately if the coordinator is stopped or blocks until + /// RequestStop() is called. void WaitForStop(); private: diff --git a/tensorflow/cc/training/queue_runner.h b/tensorflow/cc/training/queue_runner.h index e5aae8219f..bfe6a30593 100644 --- a/tensorflow/cc/training/queue_runner.h +++ b/tensorflow/cc/training/queue_runner.h @@ -32,46 +32,46 @@ limitations under the License. namespace tensorflow { -// QueueRunner class imitates the behavior of the python version of QueueRunner -// which creates a thread for each enqueue op, runs close op on completion. +/// QueueRunner class imitates the behavior of the python version of QueueRunner +/// which creates a thread for each enqueue op, runs close op on completion. class QueueRunner : public RunnerInterface { public: - // Creates a new QueueRunner from proto. + /// Creates a new QueueRunner from proto. // TODO(yuefengz): we may want to initialize from queues and ops in the // future. static Status New(const QueueRunnerDef& queue_runner_def, std::unique_ptr<QueueRunner>* result); - // Creates a new QueueRunner with a coordinator, see coordinator.h for usage. + /// Creates a new QueueRunner with a coordinator, see coordinator.h for usage. static Status New(const QueueRunnerDef& queue_runner_def, Coordinator* coord, std::unique_ptr<QueueRunner>* result); - // Adds a callback that the queue runner will call when it detects an error. + /// Adds a callback that the queue runner will call when it detects an error. void AddErrorCallback(const std::function<void(Status)>& cb); - // Delete the previously registered callbacks. + /// Delete the previously registered callbacks. void ClearErrorCallbacks(); - // The destructor would join all the threads. + /// The destructor would join all the threads. ~QueueRunner(); - // Starts the queue runner with the given session. + /// Starts the queue runner with the given session. Status Start(Session* sess); - // Starts the queue runner with the given session, and wait for up to the - // specified time (in milliseconds) for the queues to start to fill up. + /// Starts the queue runner with the given session, and wait for up to the + /// specified time (in milliseconds) for the queues to start to fill up. Status Start(Session* sess, int wait_for_ms); - // Requests to stop and runs the cancel op. It would be called in a separate - // thread when coordinator is set. If there is no coordinator it should be - // called before calling Join. + /// Requests to stop and runs the cancel op. It would be called in a separate + /// thread when coordinator is set. If there is no coordinator it should be + /// called before calling Join. void Stop(Session* sess); - // Joins all the threads. Returns okay if all threads run successfully; - // otherwise returns the first captured failure status. + /// Joins all the threads. Returns okay if all threads run successfully; + /// otherwise returns the first captured failure status. Status Join() final; - // Returns the latest status. + /// Returns the latest status. Status GetStatus(); private: diff --git a/tensorflow/compiler/jit/graph_to_functiondef_test.cc b/tensorflow/compiler/jit/graph_to_functiondef_test.cc index df45f455a9..04b2385c9c 100644 --- a/tensorflow/compiler/jit/graph_to_functiondef_test.cc +++ b/tensorflow/compiler/jit/graph_to_functiondef_test.cc @@ -50,8 +50,7 @@ TEST(GraphToFunctionDefTest, Basics) { auto d = ops::Add(root.WithOpName("D"), a, b); auto e = ops::Add(root.WithOpName("b"), d, c); auto f = ops::Neg(root.WithOpName("h"), e); - auto g = - ops::AddN(root.WithOpName("G"), std::initializer_list<ops::Output>{e, f}); + auto g = ops::AddN(root.WithOpName("G"), std::initializer_list<Output>{e, f}); auto h = ops::_Retval(root.WithOpName("H"), g, 0); GraphDef graph_def; diff --git a/tensorflow/compiler/jit/xla_local_launch_op.cc b/tensorflow/compiler/jit/xla_local_launch_op.cc index 7945e057cf..acf2ccb890 100644 --- a/tensorflow/compiler/jit/xla_local_launch_op.cc +++ b/tensorflow/compiler/jit/xla_local_launch_op.cc @@ -45,6 +45,9 @@ REGISTER_OP("_XlaLaunch") .Output("results: Tresults") .Attr("Tresults: list(type) >= 0") .Attr("function: func") + // XLA random-number generation ops are stateful. + // TODO(phawkins): create stateful and non-stateful variants of _XlaLaunch. + .SetIsStateful() .Doc("XLA Launch Op. For use by the XLA JIT only."); // Adapter class that wraps a Tensorflow allocator as an XLA allocator. @@ -313,9 +316,10 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { } Tensor output_tensor; // Looks up the owning Tensor by buffer address. - OP_REQUIRES_OK(ctx, xla_allocator.MakeTensorFromBuffer( - buffer, ctx->expected_output_dtype(i), shape, - &output_tensor)); + OP_REQUIRES_OK( + ctx, + xla_allocator.MakeTensorFromBuffer( + buffer, ctx->expected_output_dtype(i), shape, &output_tensor)); ctx->set_output(i, output_tensor); ++output_num; } diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index b4f01de4f2..5c78ab7061 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -180,6 +180,20 @@ tf_xla_py_test( ) tf_xla_py_test( + name = "random_ops_test", + size = "small", + srcs = ["random_ops_test.py"], + # TODO(b/31361304): enable RNG ops on GPU when parallelized. + disabled_backends = ["gpu"], + deps = [ + ":xla_test", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + "//tensorflow/python:random_ops", + ], +) + +tf_xla_py_test( name = "reduce_ops_test", size = "medium", srcs = ["reduce_ops_test.py"], diff --git a/tensorflow/compiler/tests/build_defs.bzl b/tensorflow/compiler/tests/build_defs.bzl index 7fb8e0a26d..820db13d0b 100644 --- a/tensorflow/compiler/tests/build_defs.bzl +++ b/tensorflow/compiler/tests/build_defs.bzl @@ -9,7 +9,7 @@ def all_backends(): return ["cpu"] def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None, - backends=None, **kwargs): + disabled_backends=None, **kwargs): """Generates py_test targets, one per XLA backend. This rule generates py_test() targets named name_backend, for each backend @@ -31,15 +31,16 @@ def tf_xla_py_test(name, srcs=[], deps=[], tags=[], data=[], main=None, tags: Tags to apply to the generated targets. data: Data dependencies of the target. main: Same as py_test's main attribute. - backends: A list of backends to test. Supported values include "cpu" and - "gpu". If not specified, defaults to all backends. + disabled_backends: A list of backends that should not be tested. Supported + values include "cpu" and "gpu". If not specified, defaults to None. **kwargs: keyword arguments passed onto the generated py_test() rules. """ - if backends == None: - backends = all_backends() + if disabled_backends == None: + disabled_backends = [] + enabled_backends = [b for b in all_backends() if b not in disabled_backends] test_names = [] - for backend in backends: + for backend in enabled_backends: test_name = "{}_{}".format(name, backend) backend_tags = ["tf_xla_{}".format(backend)] backend_args = [] diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py new file mode 100644 index 0000000000..31173c717d --- /dev/null +++ b/tensorflow/compiler/tests/random_ops_test.py @@ -0,0 +1,67 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for random-number generation ops in the XLA JIT compiler.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import googletest + + +class RandomOpsTest(XLATestCase): + """Test cases for random-number generating operators.""" + + def _testRngIsNotConstant(self, rng, dtype): + # Tests that 'rng' does not always return the same value. + with self.test_session() as sess: + with self.test_scope(): + x = rng(dtype) + + # The random-number generator, if working correctly, should produce the + # same output multiple times with low probability. + y = sess.run(x) + z = sess.run(x) + w = sess.run(x) + + # We use exact equality here. If the random-number generator is producing + # deterministic output, all three outputs will be bitwise identical. + self.assertTrue((not np.array_equal(y, z)) or + (not np.array_equal(z, w)) or + (not np.array_equal(y, w))) + + def testRandomUniformIsNotConstant(self): + def rng(dtype): + return random_ops.random_uniform(shape=[2], dtype=dtype, + maxval=1000000) + for dtype in self.numeric_types: + self._testRngIsNotConstant(rng, dtype) + + def testRandomNormalIsNotConstant(self): + def rng(dtype): + return random_ops.random_normal(shape=[2], dtype=dtype) + + # TODO(b/34339814): implement inverse erf support for non-F32 types. + dtype = dtypes.float32 + self._testRngIsNotConstant(rng, dtype) + + +if __name__ == '__main__': + googletest.main() diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 299b5e98c0..10b4a6d054 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -55,8 +55,6 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:stream_executor_no_cuda", - "//tensorflow/core:tensorflow_opensource", - "//tensorflow/core/kernels:cwise_op", ], alwayslink = 1, ) diff --git a/tensorflow/compiler/tf2xla/op_registrations.cc b/tensorflow/compiler/tf2xla/op_registrations.cc index d8a4dad4b3..d1a7abb22c 100644 --- a/tensorflow/compiler/tf2xla/op_registrations.cc +++ b/tensorflow/compiler/tf2xla/op_registrations.cc @@ -59,9 +59,10 @@ REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Ceil").TypeConstraint("T", kCpuFloatTypes)); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Concat").TypeConstraint("T", kCpuAllTypes)); -REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("ConcatV2") - .TypeConstraint("T", kCpuAllTypes) - .TypeConstraint("Tidx", DT_INT32)); +REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, + Name("ConcatV2") + .TypeConstraint("T", kCpuAllTypes) + .TypeConstraint("Tidx", DT_INT32)); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("ConcatOffset")); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Conv2D").TypeConstraint("T", kCpuFloatTypes)); @@ -165,8 +166,11 @@ REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Prod").TypeConstraint("T", kCpuNumericTypes)); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Range").TypeConstraint("Tidx", kCpuNumericTypes)); -// TODO(b/31361304): disabled because of XLA bugs. -// REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("RandomStandardNormal")); +// TODO(b/34339814): implement inverse erf for double types and update the +// type constraint. +REGISTER_XLA_KERNEL( + DEVICE_CPU_XLA_JIT, + Name("RandomStandardNormal").TypeConstraint("dtype", DT_FLOAT)); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("RandomUniform")); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("RandomUniformInt")); REGISTER_XLA_KERNEL(DEVICE_CPU_XLA_JIT, Name("Rank")); diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index d291888a75..517eae2f5d 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -330,6 +330,8 @@ Status XlaCompiler::CompileGraph(string const& name, &result->computation, &result->requires_runtime_context, &compile_time_constants, &num_nonconst_outputs)); + VLOG(2) << "Outputs: constant: " << compile_time_constants.size() + << " nonconstant: " << num_nonconst_outputs; result->outputs.resize(compile_time_constants.size() + num_nonconst_outputs); for (const auto& c : compile_time_constants) { if (!c.status.ok()) { diff --git a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc index 7a966ce241..07bbcd802f 100644 --- a/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc +++ b/tensorflow/compiler/xla/legacy_flags/parse_flags_from_env_test.cc @@ -127,7 +127,6 @@ static const char* binary_name; // Test that when we use both the environment variable and actual // commend line flags (when the latter is possible), the latter win. TEST(ParseFlagsFromEnv, EnvAndFlag) { - // TODO(m3b): convert to Subprocess when CL 137771604 is finished. static struct { const char* env; const char* arg; diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index f03b158fa7..2465edc498 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -93,6 +93,38 @@ namespace xla { ComputationBuilder::CreateDefaultConvDimensionNumbers()); } +/* static */ std::unique_ptr<Array4D<float>> +ReferenceUtil::SeparableConvArray4D(const Array4D<float>& input, + const Array4D<float>& depthwise_weights, + const Array4D<float>& pointwise_weights, + std::pair<int64, int64> kernel_stride, + Padding padding) { + const int64 depth_multiplier = depthwise_weights.planes(); + CHECK_EQ(pointwise_weights.depth(), input.depth() * depth_multiplier); + + // Combine the two weights by reducing the depth_multiplier, so that we can + // apply a single convolution on the combined weights. + Array4D<float> weights(pointwise_weights.planes(), input.depth(), + depthwise_weights.height(), depthwise_weights.width()); + for (int64 kx = 0; kx < depthwise_weights.width(); ++kx) { + for (int64 ky = 0; ky < depthwise_weights.height(); ++ky) { + for (int64 kz = 0; kz < input.depth(); ++kz) { + for (int64 out = 0; out < pointwise_weights.planes(); ++out) { + float weight = 0.0; + for (int64 depth = 0; depth < depth_multiplier; ++depth) { + weight += + depthwise_weights(depth, kz, ky, kx) * + pointwise_weights(out, depth + kz * depth_multiplier, 0, 0); + } + weights(out, kz, ky, kx) = weight; + } + } + } + } + + return ConvArray4D(input, weights, kernel_stride, padding); +} + /* static */ int64 ReferenceUtil::WindowCount(int64 unpadded_width, int64 window_len, int64 stride, Padding padding) { diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 27421b2ac4..d19d5f9dbb 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -73,6 +73,15 @@ class ReferenceUtil { std::pair<int64, int64> lhs_dilation, std::pair<int64, int64> rhs_dilation, ConvolutionDimensionNumbers dnums); + // Returns the result of a separable convolution with the given parameters. + // kernel_stride and padding applies to the depthwise convolution during + // the separable convolution. pointwise_weights.depth() must be equal to + // input.depth() * depthwise_weights.planes(). + static std::unique_ptr<Array4D<float>> SeparableConvArray4D( + const Array4D<float>& input, const Array4D<float>& depthwise_weights, + const Array4D<float>& pointwise_weights, + std::pair<int64, int64> kernel_stride, Padding padding); + // Returns the result of reducing a matrix to a column vector. init is the // initial value for the reduce operation, and reduce_function is the function // to apply for each reduction step. diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 1a87a0043a..4d118d2e4e 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -281,7 +281,7 @@ llvm::Value* ElementalIrEmitter::EmitFloatMin(llvm::Value* lhs_value, StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type, llvm::Value* x) const { if (prim_type != F32) { - return Unimplemented("inverse erf"); + return Unimplemented("inverse erf only implemented for F32 (b/34339814)"); } auto getFloat = [&](const float f) { return llvm::ConstantFP::get(ir_builder_->getFloatTy(), f); diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h index 373ab79ab2..ac478afabc 100644 --- a/tensorflow/compiler/xla/service/executable.h +++ b/tensorflow/compiler/xla/service/executable.h @@ -115,6 +115,9 @@ class Executable { const HloModuleConfig& module_config() const { return *module_config_; } + // Returns whether this executable has an associated HloModuleConfig. + bool has_module_config() const { return module_config_ != nullptr; } + // Returns the versioned computation handle of the computation computed by // this executable. const VersionedComputationHandle& entry_computation_handle() const { diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 9aeebe42f8..8353731fdd 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -365,6 +365,38 @@ cc_library( ) cc_library( + name = "fusion_merger", + srcs = ["fusion_merger.cc"], + hdrs = ["fusion_merger.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_cost_analysis", + "//tensorflow/compiler/xla/service:hlo_pass", + "//tensorflow/core:lib", + ], +) + +cc_test( + name = "fusion_merger_test", + srcs = ["fusion_merger_test.cc"], + deps = [ + ":fusion_merger", + ":instruction_fusion", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/core:test_main", + ], +) + +cc_library( name = "pad_insertion", srcs = ["pad_insertion.cc"], hdrs = ["pad_insertion.h"], @@ -386,6 +418,7 @@ cc_library( deps = [ ":convolution_folding", ":copy_insertion", + ":fusion_merger", ":gpu_executable", ":hlo_schedule", ":instruction_fusion", diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc new file mode 100644 index 0000000000..caa919b688 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -0,0 +1,270 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" + +#include <algorithm> + +#include "tensorflow/compiler/xla/service/hlo_cost_analysis.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/str_util.h" + +namespace xla { +namespace gpu { + +namespace { + +// Traverses users of tuple shape, adding leaf instructions to 'instructions'. +void MaybeResolveTupleElements(HloInstruction* instruction, + std::vector<HloInstruction*>* instructions) { + if (ShapeUtil::IsTuple(instruction->shape())) { + for (auto tuple_user : instruction->users()) { + MaybeResolveTupleElements(tuple_user, instructions); + } + } else { + instructions->push_back(instruction); + } +} + +// Returns the bytes read by fusion parameter 'param', by returning the byte +// size of 'param' shape (or the cumulative byte sizes of all leaf tuple +// elements if 'param' is tuple-shaped). +// In the special case where all users of 'param' (or all users of a leaf +// tuple element if 'param' is tuple-shaped) are Slice instructions, the size +// of each slice instruction is accumulated instead, to give a more accurate +// value for bytes read. +double CalculateBytesReadByFusionParameter(HloInstruction* param) { + CHECK_EQ(HloOpcode::kParameter, param->opcode()); + + // Adds all leaf tuple elements to 'instructions' if 'param' is tuple-shaped. + // Adds 'param' to 'instructions' otherwise. + std::vector<HloInstruction*> instructions; + MaybeResolveTupleElements(param, &instructions); + + // Iterate through 'instructions' accumulating byte sizes of each instruction + // shape. For each 'instruction' in 'instructions', if all users of + // 'instruction' are Slice instructions, accumuates the byte sizes of each + // Slice for a more accurate estimate of bytes read. + double bytes = 0.0; + for (auto& instruction : instructions) { + if (std::all_of(instruction->users().begin(), instruction->users().end(), + [](const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kSlice || + instruction->opcode() == HloOpcode::kDynamicSlice; + })) { + // All users are slice: accumulate bytes of all user slice instructions. + for (auto& user : instruction->users()) { + bytes += ShapeUtil::ByteSizeOf(user->shape()); + } + } else { + // Some users are not slice: accumulate full size of 'instruction'. + bytes += ShapeUtil::ByteSizeOf(instruction->shape()); + } + } + return bytes; +} + +// Returns the bytes read by all fusion parameters of instruction 'fusion'. +double CalculateBytesReadByFusionInstruction(HloInstruction* fusion) { + double bytes = 0.0; + for (const auto& fused_instruction : fusion->fused_instructions()) { + if (fused_instruction->opcode() != HloOpcode::kParameter) { + continue; + } + bytes += CalculateBytesReadByFusionParameter(fused_instruction.get()); + } + return bytes; +} + +// Returns the flops to bytes transferred ratio of instruction 'fusion'. +double CalculateFlopsToBytesRatio(HloInstruction* fusion) { + CHECK_EQ(HloOpcode::kFusion, fusion->opcode()); + // Calculate total bytes transferred in/out. + double bytes = CalculateBytesReadByFusionInstruction(fusion); + // Add bytes written to root instructions buffer. + bytes += ShapeUtil::ByteSizeOf(fusion->fused_expression_root()->shape()); + // Calculate flops for all fused instructions. + HloCostAnalysis analysis; + TF_CHECK_OK(fusion->fused_expression_root()->Accept(&analysis)); + // Return flops / bytes. + return bytes > 0.0 ? analysis.flop_count() / bytes : analysis.flop_count(); +} + +// Returns bytes transferred by instruction 'fusion', including the bytes +// that would be read by all users. +double GetCurrentBytesTransferred(HloInstruction* fusion) { + CHECK_EQ(HloOpcode::kFusion, fusion->opcode()); + const double bytes_read = CalculateBytesReadByFusionInstruction(fusion); + const double bytes_written = + ShapeUtil::ByteSizeOf(fusion->fused_expression_root()->shape()); + // Current bytes transferred (ignoring non 'fusion' user operands) is bytes + // read and written by 'fusion', plus reads of size 'bytes_written' for each + // user. + return bytes_read + bytes_written * (fusion->user_count() + 1); +} + +// Returns bytes transferred if 'fusion' were to be merged into its users. +double GetMergedBytesTransferred(HloInstruction* fusion) { + CHECK_EQ(HloOpcode::kFusion, fusion->opcode()); + return CalculateBytesReadByFusionInstruction(fusion) * fusion->user_count(); +} + +} // anonymous namespace + +// FusionInstructionMerger visits all fusion instructions in 'computation' +// in post order, attempting to merge each into all of its users. +// Accumulates and reports stats on successful/failed merge attempts. +class FusionInstructionMerger { + public: + explicit FusionInstructionMerger(HloComputation* computation) + : computation_(computation) {} + + Status Run(); + + bool changed() const { return changed_; } + + private: + Status HandleFusion(HloInstruction* fusion); + + HloComputation* computation_; + bool changed_ = false; + + // Fusion instruction merge stats. + int total_visited_ = 0; + int total_merged_ = 0; + int num_fail_no_users_ = 0; + int num_fail_not_loop_fusion_ = 0; + int num_fail_merge_all_users_ = 0; + int num_fail_flops_to_byte_ratio_ = 0; + int num_fail_net_bytes_transferred_ratio_ = 0; + + TF_DISALLOW_COPY_AND_ASSIGN(FusionInstructionMerger); +}; + +Status FusionInstructionMerger::Run() { + for (auto* instruction : computation_->MakeInstructionPostOrder()) { + if (instruction->opcode() == HloOpcode::kFusion) { + TF_RETURN_IF_ERROR(HandleFusion(instruction)); + } + } + + VLOG(1) << "FusionInstructionMerger EXIT" + << " computation: " << computation_->name() + << " total_visited: " << total_visited_ + << " total_merged: " << total_merged_ << " merge failures { " + << " no_users: " << num_fail_no_users_ + << " not_loop_fusion: " << num_fail_not_loop_fusion_ + << " merge_all_users: " << num_fail_merge_all_users_ + << " flops_to_byte_ratio: " << num_fail_flops_to_byte_ratio_ + << " net_bytes_transferred: " << num_fail_net_bytes_transferred_ratio_ + << " }"; + return Status::OK(); +} + +Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { + VLOG(3) << "FusionInstructionMerger ENTRY fusion: " << fusion->name() + << " flops_to_bytes_ratio: " << CalculateFlopsToBytesRatio(fusion); + ++total_visited_; + // Skip 'fusion' instruction if there are no users into which we can merge. + if (fusion->users().empty()) { + ++num_fail_no_users_; + return Status::OK(); + } + + // Skip 'fusion' instruction if it is not a loop fusion. Library fusion + // instructions match specific patterns, so they shouldn't be further fused. + // Input fusion instructions need to be rooted at a particular HLO (e.g. + // kReduce), so they shouldn't be further fused either. + if (fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) { + ++num_fail_not_loop_fusion_; + return Status::OK(); + } + // Skip 'fusion' instruction if we cannot merge into all of its users. + // Merging into all users enables the removal of 'fusion' from the + // computation. + if (!std::all_of(fusion->users().begin(), fusion->users().end(), + [](const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kFusion && + instruction->fusion_kind() == + HloInstruction::FusionKind::kLoop; + })) { + ++num_fail_merge_all_users_; + return Status::OK(); + } + // Skip 'fusion' instruction if its flops to bytes transferred ratio + // exceeds the threshold value. + if (CalculateFlopsToBytesRatio(fusion) > + FusionMerger::GetThresholdFlopsToBytesRatio()) { + ++num_fail_flops_to_byte_ratio_; + return Status::OK(); + } + // Skip 'fusion' instruction if merging it into all users would result in a + // net increase in bytes transferred (currently allowing the net bytes + // transferred to be exceeded up to ~10% in exhange for eliminating the + // overhead from a GPU kernel launch). + const double current_bytes_transferred = GetCurrentBytesTransferred(fusion); + const double merged_bytes_transferred = GetMergedBytesTransferred(fusion); + const double merged_to_current_bytes_ratio = + merged_bytes_transferred / std::max(1.0, current_bytes_transferred); + if (merged_to_current_bytes_ratio > 1.10) { + ++num_fail_net_bytes_transferred_ratio_; + return Status::OK(); + } + // Merge fused instructions from 'fusion' into each user. + std::set<HloInstruction*> users = fusion->users(); + for (HloInstruction* user : users) { + user->MergeFusionInstruction(fusion); + changed_ = true; + } + ++total_merged_; + VLOG(2) << "Merged fusion instruction: " << fusion->name() + << " flops_to_bytes_ratio: " << CalculateFlopsToBytesRatio(fusion) + << " merged_to_current_bytes_ratio: " << merged_to_current_bytes_ratio + << " into users { " + << tensorflow::str_util::Join(users, ", ", + [](string* out, HloInstruction* user) { + tensorflow::strings::StrAppend( + out, user->name()); + }) + << " }"; + // Remove 'fusion' instruction. + CHECK_EQ(0, fusion->user_count()); + computation_->RemoveInstruction(fusion); + return Status::OK(); +} + +StatusOr<bool> FusionMerger::Run(HloModule* module) { + bool changed = false; + VLOG(2) << "FusionMerger for module: " << module->name(); + for (auto& computation : module->computations()) { + VLOG(1) << "Before running FusionInstructionMerger for computation: " + << computation->name(); + XLA_VLOG_LINES(3, computation->ToString()); + + FusionInstructionMerger fusion_merger(computation.get()); + TF_RETURN_IF_ERROR(fusion_merger.Run()); + changed |= fusion_merger.changed(); + + VLOG(1) << "After running FusionInstructionMerger for computation: " + << computation->name() << " changed: " << changed; + XLA_VLOG_LINES(3, computation->ToString()); + } + return changed; +} + +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.h b/tensorflow/compiler/xla/service/gpu/fusion_merger.h new file mode 100644 index 0000000000..717eb15b85 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.h @@ -0,0 +1,47 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FUSION_MERGER_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FUSION_MERGER_H_ + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass.h" + +namespace xla { +namespace gpu { + +// An HLO pass that attempts to merge fusion instructions to reduce kernel +// launch overhead and improve data locality. +// +// Fusion instructions are merged into their users if two conditons are met: +// +// 1) The flops_to_bytes ratio of the fusion instruction is below the threshold +// value of 1.0. +// 2) The result of merging the fusion instruction into its users would not +// increase bytes transferred. +// +class FusionMerger : public HloPass { + public: + FusionMerger() : HloPass("fusion merger") {} + + StatusOr<bool> Run(HloModule* module) override; + + static double GetThresholdFlopsToBytesRatio() { return 1.0; } +}; + +} // namespace gpu +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_GPU_FUSION_MERGER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc new file mode 100644 index 0000000000..a87e66ca86 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -0,0 +1,456 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" + +#include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" + +namespace xla { +namespace gpu { +namespace { + +class FusionMergerTest : public HloTestBase { + protected: + FusionMergerTest() : module_(TestName()) {} + + // Builds the following computation: + // + // Param + // / | \ + // / | \ + // OnesVec GTE(0) GTE(1) GTE(2) + // \ / \ / + // Add Add OnesVec + // \ / \ / + // \ Add Mul OnesVec + // \ | | / + // \ Mul Add + // \ | / + // \ | / + // Tuple + // + HloComputation* BuildComputation0() { + auto builder = HloComputation::Builder(TestName() + ".Computation0"); + // Create param instruction to access computation state. + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape3_, "param")); + + // Create GetTupleElement instructions for each tuple element. + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, param, 1)); + auto gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, param, 2)); + + // Create const vector of ones to be used in element-wise computations. + auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<float>({1.f, 1.f, 1.f, 1.f}))); + + // Create simple fusable computation for tuple element 0 (wont get merged). + auto out0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, one_vec, gte0)); + + // Create fusable computation which is dependent on second and third tuple + // elements (will initially be fused on its own). + auto add1 = builder.AddInstruction( + HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, gte1, gte2)); + + // Create two sub-computations, both of which are users of 'add1'. + + // First sub-computation: out1 = Mul(Add(add1, one_vec), one_vec) + auto add2 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, add1, one_vec)); + auto out1 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, add2, one_vec)); + + // Second sub-computation: out2 = Add(Mul(add1, one_vec), one_vec) + auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, add1, one_vec)); + auto out2 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, mul0, one_vec)); + + // Create output Tuple. + builder.AddInstruction(HloInstruction::CreateTuple({out0, out1, out2})); + return module_.AddEntryComputation(builder.Build()); + } + + // Builds the following computation: + // + // Param + // / \ + // GTE(0) GTE(1) + // | | \ / + // | | Mul + // \ \ | + // \ Mul + // \ | + // OnesVec Mul OnesVec + // \ / \ / + // OnesVec Add Mul OnesVec + // \ | | / + // Mul Add + // \ / + // \ / + // Tuple + // + HloComputation* BuildComputation1() { + auto builder = HloComputation::Builder(TestName() + ".Computation1"); + Shape tuple_shape2_ = ShapeUtil::MakeTupleShape({data_shape_, data_shape_}); + // Create param instruction to access computation state. + auto state = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape2_, "state")); + + // Create shared sub-computation (will initially be fused on its own). + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, state, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, state, 2)); + // Calculate the flops we need to generate for this shared computation + // to exceed the threshold flops_to_bytes_ratio. + // Note that bytes transferred is multiplied by 3 because there are two + // operands and one output of size 'data_shape_'. + const int64 flops_needed = FusionMerger::GetThresholdFlopsToBytesRatio() * + ShapeUtil::ByteSizeOf(data_shape_) * 3; + const int64 vec_elements = ShapeUtil::ElementsIn(data_shape_); + const int64 iters = (flops_needed + vec_elements - 1) / vec_elements; + + auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, gte0, gte1)); + for (int i = 0; i < iters; ++i) { + mul0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, gte0, mul0)); + } + + // Create two sub-computations, both of which are users of 'mul0'. + auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<float>({1.f, 1.f, 1.f, 1.f}))); + + // First sub-computation: out0 = Mul(Add(mul0, one_vec), one_vec) + auto add0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, mul0, one_vec)); + auto out0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, add0, one_vec)); + + // Second sub-computation: out1 = Add(Mul(mul0, one_vec), one_vec) + auto mul1 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, mul0, one_vec)); + auto out1 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, mul1, one_vec)); + + // Create output Tuple. + builder.AddInstruction(HloInstruction::CreateTuple({out0, out1})); + return module_.AddEntryComputation(builder.Build()); + } + + // Builds the following computation: + // + // Param + // / | | \ + // / | | \ + // / | | \ + // GTE(0) GTE(1) GTE(2) GTE(3) + // \ / / / + // Add / / + // \ / / + // Add / + // \ / + // \ / + // OnesVec Add OnesVec + // \ / \ / + // OnesVec Add Mul OnesVec + // \ | | / + // Mul Add + // \ / + // \ / + // Tuple + // + HloComputation* BuildComputation2(bool add_extra_input) { + auto builder = HloComputation::Builder(TestName() + ".Computation2"); + Shape state_shape = add_extra_input ? tuple_shape4_ : tuple_shape3_; + // Create param instruction to access computation state. + auto state = builder.AddInstruction( + HloInstruction::CreateParameter(0, state_shape, "state")); + + // Create GetTupleElement instructions for each tuple element. + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, state, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, state, 1)); + auto gte2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, state, 2)); + + // Create shared fusable computation that reduces its operands. + auto reduce0 = builder.AddInstruction( + HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, gte0, gte1)); + auto reduce_out = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, reduce0, gte2)); + if (add_extra_input) { + auto gte3 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(data_shape_, state, 3)); + reduce_out = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, reduce_out, gte3)); + } + + // Create two fusable sub-computations which are dependent on shared + // computation 'reduce_out'. + auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( + LiteralUtil::CreateR1<float>({1.f, 1.f, 1.f, 1.f}))); + + // First sub-computation: out0 = Mul(Add(reduce_out, one_vec), one_vec) + auto add2 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, reduce_out, one_vec)); + auto out0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, add2, one_vec)); + + // Second sub-computation: out1 = Add(Mul(reduce_out, one_vec), one_vec) + auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kMultiply, reduce_out, one_vec)); + auto out1 = builder.AddInstruction(HloInstruction::CreateBinary( + data_shape_, HloOpcode::kAdd, mul0, one_vec)); + + // Create output Tuple. + builder.AddInstruction(HloInstruction::CreateTuple({out0, out1})); + return module_.AddEntryComputation(builder.Build()); + } + + Shape data_shape_ = ShapeUtil::MakeShape(F32, {4}); + Shape tuple_shape2_ = ShapeUtil::MakeTupleShape({data_shape_, data_shape_}); + Shape tuple_shape3_ = + ShapeUtil::MakeTupleShape({data_shape_, data_shape_, data_shape_}); + Shape tuple_shape4_ = ShapeUtil::MakeTupleShape( + {data_shape_, data_shape_, data_shape_, data_shape_}); + + HloModule module_; +}; + +// Tests that we can merge a fusion instruction that is below threshold. +// +// Original computation: +// +// Param +// / | \ +// / | \ +// OnesVec GTE(0) GTE(1) GTE(2) +// \ / \ / +// Add Add OnesVec +// \ / \ / +// \ Add Mul OnesVec +// \ | | / +// \ Mul Add +// \ | / +// \ | / +// Tuple +// +// Computation after fusion passes: +// +// Param +// / \ +// Fusion3 Fusion2 +// | / \ +// \ Fusion0 Fusion1 +// \ | / +// \ | / +// Tuple +// +// Computation after fusion merger pass (Fusion2 is merged into Fusion0 and +// Fusion1): +// Param +// / | \ +// Fusion3 Fusion0 Fusion1 +// \ | / +// Tuple +// +TEST_F(FusionMergerTest, MergeSharedFusionInstruction) { + auto computation = BuildComputation0(); + // Run standard fusion passes. + EXPECT_TRUE( + GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie()); + EXPECT_FALSE( + GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie()); + // Run fusion merger pass, which should merge the shared fusion instruction + // into its two users. + EXPECT_TRUE(FusionMerger().Run(&module_).ValueOrDie()); + + auto* root = computation->root_instruction(); + EXPECT_EQ(HloOpcode::kTuple, root->opcode()); + // Check operand 0 (not merged). Should have 4 instructions. + auto* operand0 = root->operand(0); + EXPECT_EQ(HloOpcode::kFusion, operand0->opcode()); + EXPECT_EQ(4, operand0->fused_instructions().size()); + // Check operand 1 (should have merged in its operand fusion instruction). + auto* operand1 = root->operand(1); + EXPECT_EQ(HloOpcode::kFusion, operand1->opcode()); + EXPECT_EQ(7, operand1->fused_instructions().size()); + // Check operand 2 (should have merged in its operand fusion instruction). + auto* operand2 = root->operand(2); + EXPECT_EQ(HloOpcode::kFusion, operand2->opcode()); + EXPECT_EQ(7, operand2->fused_instructions().size()); +} + +// Tests that we do not merge a fusion instruction that above flops to bytes +// threshold. +// +// Original computation: +// +// Param +// / \ +// GTE(0) GTE(1) +// | | \ / +// | | Mul +// \ \ | +// \ Mul +// \ | +// OnesVec Mul OnesVec +// \ / \ / +// OnesVec Add Mul OnesVec +// \ | | / +// Mul Add +// \ / +// \ / +// Tuple +// +// Computation after fusion passes and fusion merger pass (Fusion2 is not +// merged because it exceeds the threshold flops to bytes ratio). +// +// Param +// | +// Fusion2 +// / \ +// Fusion0 Fusion1 +// \ / +// Tuple +// +TEST_F(FusionMergerTest, FlopsToBytesRatioThresholdExceeded) { + BuildComputation1(); + // Run standard fusion passes. + EXPECT_TRUE( + GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie()); + EXPECT_FALSE( + GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie()); + // Run fusion merger pass, which should detect that the flops/bytes of the + // shared fusion instruction exceeds the threshold ratio, and therefore + // cannot be merged with other fusion instructions. + EXPECT_FALSE(FusionMerger().Run(&module_).ValueOrDie()); +} + +// Tests that threshold for bytes transferred if merged is exceeded. +// +// Original computation: +// +// Param +// / | | \ +// / | | \ +// / | | \ +// GTE(0) GTE(1) GTE(2) GTE(3) +// \ / / / +// Add / / +// \ / / +// Add / +// \ / +// \ / +// OnesVec Add OnesVec +// \ / \ / +// OnesVec Add Mul OnesVec +// \ | | / +// Mul Add +// \ / +// \ / +// Tuple +// +// Computation after fusion passes and fusion merger pass. Fusion2 is not +// merged because it exceeds the threshold bytes transferred. This is because +// the bytes read by Fusion2 (when replicated if the instruction is merged +// into Fusion0 and Fusion1) would exceed the bytes transferred threshold. +// +// Param +// | +// Fusion2 +// / \ +// Fusion0 Fusion1 +// \ / +// Tuple +// +TEST_F(FusionMergerTest, BytesTransferredThresholdExeceeded) { + BuildComputation2(/*add_extra_input=*/true); + // Run standard fusion passes. + EXPECT_TRUE( + GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie()); + EXPECT_FALSE( + GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie()); + // Run fusion merger pass, which should detect that the net bytes transferred + // (if merged) would increase. + EXPECT_FALSE(FusionMerger().Run(&module_).ValueOrDie()); +} + +// Tests that threshold for bytes transferred if merged is not exceeded. +// +// Original computation: +// +// Param +// / | \ +// / | \ +// / | \ +// GTE(0) GTE(1) GTE(2) +// \ / / +// Add / +// \ / +// OnesVec Add OnesVec +// \ / \ / +// OnesVec Add Mul OnesVec +// \ / \ / +// Mul Add +// \ / +// \ / +// Tuple +// +// Computation after fusion passes: +// +// Param +// | +// Fusion2 +// / \ +// Fusion0 Fusion1 +// \ / +// Tuple +// +// Computation after fusion merger pass (Fusion2 is merged into Fusion0 and +// Fusion1, because bytes read from Param by Fusion2 is reduced for this test +// which makes the merge operation into its operand below the bytes +// transferred threshold. +// +// Param +// / \ +// Fusion0 Fusion1 +// \ / +// Tuple +// +TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) { + BuildComputation2(/*add_extra_input=*/false); + // Run standard fusion passes. + EXPECT_TRUE( + GpuInstructionFusion(/*may_duplicate=*/false).Run(&module_).ValueOrDie()); + EXPECT_FALSE( + GpuInstructionFusion(/*may_duplicate=*/true).Run(&module_).ValueOrDie()); + // Run fusion merger pass, which should detect that the net bytes transferred + // (if merged) would not increase. + EXPECT_TRUE(FusionMerger().Run(&module_).ValueOrDie()); +} + +} // namespace +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 2f95446e6c..b5d7ba48d2 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/gpu/convolution_folding.h" #include "tensorflow/compiler/xla/service/gpu/copy_insertion.h" +#include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" #include "tensorflow/compiler/xla/service/gpu/hlo_schedule.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" @@ -132,6 +133,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, HloPassFix<HloPassPipeline> fusion("fusion", dump_hlo); fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/false); fusion.AddPass<GpuInstructionFusion>(/*may_duplicate=*/true); + fusion.AddPass<FusionMerger>(); return fusion.Run(hlo_module).status(); } } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 7ae0a995af..48be0bd2c0 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -420,6 +420,37 @@ HloInstruction::CreateFusionForBackwardConvolution( return fusion; } +void HloInstruction::MergeFusionInstruction( + HloInstruction* instruction_to_merge) { + CHECK_EQ(opcode_, HloOpcode::kFusion); + CHECK_EQ(instruction_to_merge->opcode(), HloOpcode::kFusion); + // Clone the instruction from which to merge fused instructions. + std::unique_ptr<HloInstruction> clone = instruction_to_merge->Clone(); + // Replace uses of fused parameters with the corresponding operand of the + // fusion. + // Add all non-parameter fused instructions to 'unfused_instructions' to be + // merged into 'this'. + std::vector<HloInstruction*> unfused_instructions; + for (auto& fused_instruction : clone->fused_instructions()) { + if (fused_instruction->opcode() == HloOpcode::kParameter) { + fused_instruction->ReplaceAllUsesWith( + clone->mutable_operand(fused_instruction->parameter_number())); + } else { + unfused_instructions.push_back(fused_instruction.get()); + } + } + CHECK(unfused_instructions.front() == clone->fused_expression_root()); + // Replace instruction_to_merge use of 'this' with unfused_root. + instruction_to_merge->ReplaceUseWith(this, unfused_instructions.front()); + // Fuse 'unfused_instructions' into 'this'. + for (auto& instruction : unfused_instructions) { + FuseInstruction(instruction); + instruction->DetachFromOperands(); + } + CHECK_EQ(0, clone->user_count()); + clone->DetachFromOperands(); +} + HloInstruction* HloInstruction::FuseInstruction( HloInstruction* instruction_to_fuse) { CHECK_EQ(opcode_, HloOpcode::kFusion); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 8e7a253578..ecf29a476d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -79,11 +79,6 @@ class HloInstruction { const Shape& shape, RandomDistribution distribution, tensorflow::gtl::ArraySlice<HloInstruction*> parameters); - // Creates an n-ary elementwise operation. - static std::unique_ptr<HloInstruction> CreateNary( - const Shape& shape, HloOpcode opcode, - tensorflow::gtl::ArraySlice<HloInstruction*> operands); - // Creates a unary instruction (one operand). // Precondition: opcode must be a legitimate unary operation. static std::unique_ptr<HloInstruction> CreateUnary(const Shape& shape, @@ -492,6 +487,13 @@ class HloInstruction { return fusion_kind_; } + // Merges the fused instructions from 'instruction_to_merge' into the + // fused instruction set of 'this', updating operands as necessary. + // + // Precondition: opcode() == HloOpcode::kFusion + // Predondition: 'instruction_to_merge' must be an operand of 'this'. + void MergeFusionInstruction(HloInstruction* instruction_to_merge); + // Fuses the given instruction in this fusion instruction. instruction_to_fuse // is cloned and the clone is placed in the fusion // instruction. instruction_to_fuse is unchanged. Instruction is cloned rather @@ -636,6 +638,11 @@ class HloInstruction { private: enum class UseKind { kNoUse, kReuse, kUsePermutingElements, kUse }; + // Creates an n-ary elementwise operation. + static std::unique_ptr<HloInstruction> CreateNary( + const Shape& shape, HloOpcode opcode, + tensorflow::gtl::ArraySlice<HloInstruction*> operands); + // Appends operand to the list of operands and adds this instruction as a user // of the operand. void AppendOperand(HloInstruction* operand); diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 7f86a3cbb5..30bf450c5b 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -247,10 +247,9 @@ LocalService::CompileAheadOfTime( *instance.result_layout)); } - return execute_backend_->compiler() - ->CompileAheadOfTime(std::move(hlo_modules), std::move(module_configs), - MakeHloDumper(), options) - .ConsumeValueOrDie(); + return execute_backend_->compiler()->CompileAheadOfTime( + std::move(hlo_modules), std::move(module_configs), MakeHloDumper(), + options); } tensorflow::Status LocalService::ValidateExecuteOptions( diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index ab2c43cd3d..6626fe5af8 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -37,32 +37,62 @@ limitations under the License. namespace xla { -/* static */ bool ShapeUtil::CompareShapes(const Shape& lhs, const Shape& rhs, - bool compare_layouts) { - if (IsTuple(lhs)) { - return IsTuple(rhs) && - ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), - [=](const Shape& l, const Shape& r) { - return CompareShapes(l, r, compare_layouts); - }); +namespace { + +// Recursive helper for comparing the equality of two shapes. Returns true if +// the shapes are the same. If compare_layouts is true, then layouts must also +// match. +bool CompareShapes(const Shape& lhs, const Shape& rhs, bool compare_layouts) { + if (ShapeUtil::IsTuple(lhs)) { + if (!ShapeUtil::IsTuple(rhs)) { + VLOG(3) << "CompareShapes: lhs is a tuple, rhs not a tuple"; + return false; + } + + if (!ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), + [=](const Shape& l, const Shape& r) { + return CompareShapes(l, r, compare_layouts); + })) { + VLOG(3) << "CompareShapes: tuples on lhs and rhs not equal"; + return false; + } } // Explicitly compare the fields rather than using MessageDifferencer because // we want empty layouts to be treated identically to missing layouts. - if (compare_layouts && - (!ContainersEqual(lhs.layout().minor_to_major(), - rhs.layout().minor_to_major()) || - !ContainersEqual(lhs.layout().padded_dimensions(), - rhs.layout().padded_dimensions()) || - lhs.layout().padding_value() != rhs.layout().padding_value())) { + if (compare_layouts) { + if (!ContainersEqual(lhs.layout().minor_to_major(), + rhs.layout().minor_to_major())) { + VLOG(3) << "CompareShapes: lhs layout != rhs layout"; + return false; + } + if (!ContainersEqual(lhs.layout().padded_dimensions(), + rhs.layout().padded_dimensions())) { + VLOG(3) + << "CompareShapes: lhs padded_dimensions != rhs padded_dimensions"; + return false; + } + if (lhs.layout().padding_value() != rhs.layout().padding_value()) { + VLOG(3) << "CompareShapes: lhs padding value != rhs padding_value"; + return false; + } + } + + if (!ShapeUtil::SameDimensions(lhs, rhs)) { + VLOG(3) << "CompareShapes: lhs dimensions != rhs dimensions"; return false; } - return SameDimensions(lhs, rhs) && SameElementType(lhs, rhs); + if (!ShapeUtil::SameElementType(lhs, rhs)) { + VLOG(3) << "CompareShapes: lhs element type != rhs element type"; + return false; + } + return true; } +} // namespace + /* static */ bool ShapeUtil::Equal(const Shape& lhs, const Shape& rhs) { bool equal = CompareShapes(lhs, rhs, /*compare_layouts=*/true); if (!equal && VLOG_IS_ON(3)) { - // TODO(jeff): Maybe print more info about where lhs and rhs differ VLOG(3) << "ShapeUtil::Equal differ: lhs = " << lhs.ShortDebugString() << ", rhs = " << rhs.ShortDebugString(); } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index fa5fcc0224..963a3e4805 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -388,12 +388,6 @@ class ShapeUtil { Shape shape); private: - // Recursive helper for comparing the equality of two shapes. Returns true if - // the shapes are the same. If compare_layouts is true, then layouts must also - // match. - static bool CompareShapes(const Shape& lhs, const Shape& rhs, - bool compare_layouts); - // Validates all of the non-layout properties of the shape -- this is a helper // used by both the layout-optional and layout-required public method. static Status ValidateShapeWithOptionalLayoutInternal(const Shape& shape); diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 4e8a496e7e..fb2f8fb284 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -150,6 +150,26 @@ TEST(ShapeUtilTest, EmptyLayoutEqualsMissingLayout) { EXPECT_TRUE(ShapeUtil::Equal(scalar1, scalar2)); } +TEST(ShapeUtilTest, CompareShapesWithPaddedDimensionsMismatch) { + Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30}); + shape1.mutable_layout()->add_padded_dimensions(10); + + Shape shape2 = ShapeUtil::MakeShape(F32, {20, 30}); + shape2.mutable_layout()->add_padded_dimensions(11); + + EXPECT_FALSE(ShapeUtil::Equal(shape1, shape2)); +} + +TEST(ShapeUtilTest, CompareShapesWithPaddingValueMismatch) { + Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30}); + shape1.mutable_layout()->set_padding_value(ZERO_PAD); + + Shape shape2 = ShapeUtil::MakeShape(F32, {20, 30}); + shape2.mutable_layout()->set_padding_value(LOWEST_PAD); + + EXPECT_FALSE(ShapeUtil::Equal(shape1, shape2)); +} + TEST(ShapeUtilTest, ScalarUnpopulatedLayoutEqualsScalarLayout) { Shape scalar_unpopulated = ShapeUtil::MakeShape(F32, {}); scalar_unpopulated.clear_layout(); diff --git a/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py b/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py index 81fbf2a6ef..f378966562 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py @@ -26,7 +26,9 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test @@ -146,6 +148,32 @@ class CategoricalTest(test.TestCase): -(0.6 * np.log(0.6) + 0.4 * np.log(0.4)) ]) + def testEntropyGradient(self): + with self.test_session() as sess: + logits = constant_op.constant([[1., 2., 3.], [2., 5., 1.]]) + + probabilities = nn_ops.softmax(logits) + log_probabilities = nn_ops.log_softmax(logits) + true_entropy = - math_ops.reduce_sum( + probabilities * log_probabilities, axis=-1) + + categorical_distribution = categorical.Categorical(p=probabilities) + categorical_entropy = categorical_distribution.entropy() + + # works + true_entropy_g = gradients_impl.gradients(true_entropy, [logits]) + categorical_entropy_g = gradients_impl.gradients( + categorical_entropy, [logits]) + + res = sess.run({"true_entropy": true_entropy, + "categorical_entropy": categorical_entropy, + "true_entropy_g": true_entropy_g, + "categorical_entropy_g": categorical_entropy_g}) + self.assertAllClose(res["true_entropy"], + res["categorical_entropy"]) + self.assertAllClose(res["true_entropy_g"], + res["categorical_entropy_g"]) + def testSample(self): with self.test_session(): histograms = [[[0.2, 0.8], [0.4, 0.6]]] diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py index 57c873f59e..0181ded643 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py @@ -569,10 +569,11 @@ class SoftplusTest(test.TestCase): def testInverseSoftplusGradientNeverNan(self): with self.test_session(): # Note that this range contains both zero and inf. - x = constant_op.constant((10.**np.arange(-8, 6)).astype(np.float16)) - y = distribution_util.softplus_inverse(x).eval() + x = constant_op.constant(np.logspace(-8, 6).astype(np.float16)) + y = distribution_util.softplus_inverse(x) + grads = gradients_impl.gradients(y, x)[0].eval() # Equivalent to `assertAllFalse` (if it existed). - self.assertAllEqual(np.zeros_like(y).astype(np.bool), np.isnan(y)) + self.assertAllEqual(np.zeros_like(grads).astype(np.bool), np.isnan(grads)) if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py index 7e92f49677..41a4f9d859 100644 --- a/tensorflow/contrib/distributions/python/ops/bijector.py +++ b/tensorflow/contrib/distributions/python/ops/bijector.py @@ -1977,7 +1977,7 @@ class AffineLinearOperator(Bijector): if scale.tensor_rank is not None: batch_ndims = scale.tensor_rank - 2 else: - batch_ndims = scale.tensor_rank_dynamic() - 2 + batch_ndims = scale.tensor_rank_tensor() - 2 graph_parents += [batch_ndims] else: batch_ndims = 0 # We won't need shape inference when scale is None. diff --git a/tensorflow/contrib/distributions/python/ops/categorical.py b/tensorflow/contrib/distributions/python/ops/categorical.py index feca611d00..9573e89237 100644 --- a/tensorflow/contrib/distributions/python/ops/categorical.py +++ b/tensorflow/contrib/distributions/python/ops/categorical.py @@ -209,17 +209,8 @@ class Categorical(distribution.Distribution): return math_ops.exp(self._log_prob(k)) def _entropy(self): - if self.logits.get_shape().ndims == 2: - logits_2d = self.logits - else: - logits_2d = array_ops.reshape(self.logits, [-1, self.num_classes]) - histogram_2d = nn_ops.softmax(logits_2d) - ret = array_ops.reshape( - nn_ops.softmax_cross_entropy_with_logits(labels=histogram_2d, - logits=logits_2d), - self.batch_shape()) - ret.set_shape(self.get_batch_shape()) - return ret + return -math_ops.reduce_sum( + nn_ops.log_softmax(self.logits) * self.p, axis=-1) def _mode(self): ret = math_ops.argmax(self.logits, dimension=self._batch_rank) @@ -245,5 +236,6 @@ def _kl_categorical_categorical(a, b, name=None): name, "kl_categorical_categorical", [a.logits, b.logits]): # sum(p*ln(p/q)) return math_ops.reduce_sum( - nn_ops.softmax(a.logits)*(nn_ops.log_softmax(a.logits) - - nn_ops.log_softmax(b.logits)), reduction_indices=[-1]) + nn_ops.softmax(a.logits) * ( + nn_ops.log_softmax(a.logits) - nn_ops.log_softmax(b.logits)), + axis=-1) diff --git a/tensorflow/contrib/graph_editor/transform.py b/tensorflow/contrib/graph_editor/transform.py index 6fb347c834..832698b8a0 100644 --- a/tensorflow/contrib/graph_editor/transform.py +++ b/tensorflow/contrib/graph_editor/transform.py @@ -26,13 +26,13 @@ from six import iteritems from six import iterkeys from six import string_types from six import StringIO - from tensorflow.contrib.graph_editor import edit from tensorflow.contrib.graph_editor import reroute from tensorflow.contrib.graph_editor import select from tensorflow.contrib.graph_editor import subgraph from tensorflow.contrib.graph_editor import util from tensorflow.python.framework import ops as tf_ops +from tensorflow.python.platform import tf_logging as logging __all__ = [ "replace_t_with_placeholder_handler", @@ -87,17 +87,24 @@ def keep_t_if_possible_handler(info, t): def assign_renamed_collections_handler(info, elem, elem_): """Add the transformed elem to the (renamed) collections of elem. + A collection is renamed only if is not a known key, as described in + `tf.GraphKeys`. + Args: info: Transform._Info instance. elem: the original element (`tf.Tensor` or `tf.Operation`) elem_: the transformed element """ - # TODO(fkp): handle known special cases + known_collection_names = util.get_predefined_collection_names() for name, collection in iteritems(info.collections): if elem not in collection: continue - collection_name_ = info.transformer.new_name(name) - info.graph_.add_to_collection(collection_name_, elem_) + + if name in known_collection_names: + transformed_name = name + else: + transformed_name = info.transformer.new_name(name) + info.graph_.add_to_collection(transformed_name, elem_) def transform_op_if_inside_handler(info, op, keep_if_possible=True): @@ -150,6 +157,11 @@ def copy_op_handler(info, op, copy_shape=True): # Transform inputs: inputs_ = [info.transformer._transform_t(t) for t in op.inputs] + # Leave inputs empty if a graph cycle was found. + if None in inputs_: + info.cyclic_ops.append(op) + inputs_ = [] + # Clone the node def: node_def_ = deepcopy(op._node_def) @@ -239,7 +251,7 @@ class Transformer(object): self.transformed_ts = {} self.collections = dict((key, self.graph.get_collection(key)) for key in self.graph.get_all_collection_keys()) - + self.cyclic_ops = [] class ResultInfo(object): """"Contains information about the result of a transform operation.""" @@ -452,6 +464,17 @@ class Transformer(object): for op in remaining_roots: self._transform_op(op) + # Finalize cyclic ops: + for op in self._info.cyclic_ops: + logging.debug("Finalizing cyclic op: %s", op.name) + op_ = self._info.transformed_ops[op] + inputs_ = [self._info.transformed_ts[t] for t in op.inputs] + if None in inputs_: + raise ValueError("Could not find all the inputs of cyclic op: {}" + .format(op_.name)) + for input_id, t_ in enumerate(inputs_): + op_._update_input(input_id, t_) # pylint: disable=protected-access + sgv_ = self._transform_sgv(sgv) res_info = Transformer.ResultInfo(self._info) @@ -506,9 +529,13 @@ class Transformer(object): Returns: The transformed tensor. """ + logging.debug("Transforming tensor: %s", t.name) if t in self._info.transformed_ts: return self._info.transformed_ts[t] + # Mark as None to detect cycle. + self._info.transformed_ts[t] = None + op, op_index = t.op, t.value_index # If op is not in the subgraph: diff --git a/tensorflow/contrib/graph_editor/util.py b/tensorflow/contrib/graph_editor/util.py index 11ee2435c9..d8824f6792 100644 --- a/tensorflow/contrib/graph_editor/util.py +++ b/tensorflow/contrib/graph_editor/util.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import collections +import re from six import iteritems from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import array_ops as tf_array_ops @@ -465,3 +466,75 @@ def make_placeholder_from_dtype_and_shape(dtype, shape=None, scope=None): """ return tf_array_ops.placeholder( dtype=dtype, shape=shape, name=placeholder_name(scope=scope)) + + +_INTERNAL_VARIABLE_RE = re.compile(r"^__\w+__$") + + +def get_predefined_collection_names(): + """Return all the predefined collection names.""" + return [getattr(tf_ops.GraphKeys, key) for key in dir(tf_ops.GraphKeys) + if not _INTERNAL_VARIABLE_RE.match(key)] + + +def find_corresponding_elem(target, dst_graph, dst_scope="", src_scope=""): + """Find corresponding op/tensor in a different graph. + + Args: + target: A `tf.Tensor` or a `tf.Operation` belonging to the original graph. + dst_graph: The graph in which the corresponding graph element must be found. + dst_scope: A scope which is prepended to the name to look for. + src_scope: A scope which is removed from the original of `target` name. + + Returns: + The corresponding tf.Tensor` or a `tf.Operation`. + + Raises: + ValueError: if `src_name` does not start with `src_scope`. + TypeError: if `target` is not a `tf.Tensor` or a `tf.Operation` + KeyError: If the corresponding graph element cannot be found. + """ + src_name = target.name + if src_scope: + src_scope = scope_finalize(src_scope) + if not src_name.startswidth(src_scope): + raise ValueError("{} does not start with {}".format(src_name, src_scope)) + src_name = src_name[len(src_scope):] + + dst_name = src_name + if dst_scope: + dst_scope = scope_finalize(dst_scope) + dst_name = dst_scope + dst_name + + if isinstance(target, tf_ops.Tensor): + return dst_graph.get_tensor_by_name(dst_name) + if isinstance(target, tf_ops.Operation): + return dst_graph.get_operation_by_name(dst_name) + raise TypeError("Expected tf.Tensor or tf.Operation, got: {}", type(target)) + + +def find_corresponding(targets, dst_graph, dst_scope="", src_scope=""): + """Find corresponding ops/tensors in a different graph. + + `targets` is a Python tree, that is, a nested structure of iterable + (list, tupple, dictionary) whose leaves are instances of + `tf.Tensor` or `tf.Operation` + + Args: + targets: A Python tree containing `tf.Tensor` or `tf.Operation` + belonging to the original graph. + dst_graph: The graph in which the corresponding graph element must be found. + dst_scope: A scope which is prepended to the name to look for. + src_scope: A scope which is removed from the original of `top` name. + + Returns: + A Python tree containin the corresponding tf.Tensor` or a `tf.Operation`. + + Raises: + ValueError: if `src_name` does not start with `src_scope`. + TypeError: if `top` is not a `tf.Tensor` or a `tf.Operation` + KeyError: If the corresponding graph element cannot be found. + """ + def func(top): + return find_corresponding_elem(top, dst_graph, dst_scope, src_scope) + return transform_tree(targets, func) diff --git a/tensorflow/contrib/hvx/hexagon_controller/Makefile b/tensorflow/contrib/hvx/hexagon_controller/Makefile new file mode 100644 index 0000000000..9fe2ed596a --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/Makefile @@ -0,0 +1,19 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include glue/defines.min + +include target/make/android.min + +include $(RULES_MIN) diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c new file mode 100644 index 0000000000..3ca5532c38 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v1_graph_init.c @@ -0,0 +1,16 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +void init_graph_v1(int nn_id) {} diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c new file mode 100644 index 0000000000..dc61ae754a --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_float_data.c @@ -0,0 +1,16 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +float inception_dummy_float_data_299x299[299*299*3] = {}; diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c new file mode 100644 index 0000000000..27e1ca40b9 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_dummy_int_data.c @@ -0,0 +1,17 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include <stdint.h> +uint8_t inception_dummy_int_data_299x299[299*299*3] = {}; diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c new file mode 100644 index 0000000000..9def665827 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_dummy_data/inception_v3_graph_init.c @@ -0,0 +1,16 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +void init_graph(int nn_id) {} diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c new file mode 100644 index 0000000000..567485b035 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c @@ -0,0 +1,355 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// to demonstrate the performance difference between ION and HLOS memory +// for sharing with ADSP. +#define USE_ION_MEMORY + +#include <limits.h> +#include <stdio.h> + +#include "hexagon_controller.h" +#include "hexagon_nn.h" +#include "tfm_log.h" + +static const uint32_t MAX_NODES = 2048; +static const uint32_t MAX_EVENT_COUNT = 256; + +static const bool DUMP_OUTPUT = false; +static const bool DBG_EXECUTION = true; + +static const int OUT_RANKING_SIZE = 5; + +// static only for this file. +// TODO(satok): allocate dynamically +static float s_output_values[300 * 300 * 3 * 4]; + +extern void init_graph(uint32_t id); +extern void init_graph_v1(uint32_t id); +extern uint8_t inception_dummy_int_data_299x299[]; +extern uint8_t inception_sample_int_data_224x224[]; +extern float inception_dummy_float_data_299x299_299x299[]; + +enum InceptionVersion { + INCEPTION_V1, + INCEPTION_V3, +}; + +static enum InceptionVersion s_inception_version = INCEPTION_V3; + +///////////////////////////////////////////////// +// file local functions + +static const char *ConvertGraphInfoIdToName(unsigned int id) { + // TODO(satok): implement + return "?"; +} + +static const char *ConvertGraphInfoIdToOpName(unsigned int id) { + // TODO(satok): implement + return "?"; +} + +///////////////////////////////////////////////// +// file local utilities +static uint32_t FindMaxIdxWithExcludeList( + const float *data, uint32_t entries, const int exclude_size, + const int* exclude_idx) { + int i; + float maxval = data[0]; + int maxidx = 0; + for (i = 0; i < entries; i++) { + bool exclude = false; + for (int j = 0; j < exclude_size; ++j) { + if (exclude_idx[j] == i) { + exclude = true; + break; + } + } + if (exclude) { + continue; + } + if (maxval < data[i]) { + maxval = data[i]; + maxidx = i; + } + } + return maxidx; +} + +static uint32_t FindMaxIdx(const float* data, uint32_t entries) { + return FindMaxIdxWithExcludeList(data, entries, 0, NULL); +} + +void hexagon_controller_PrintMaxNIdx(const float *data, const uint32_t entries, + const int n, int* out_ranking) { + if (DUMP_OUTPUT) { + for (int i = 0; i < entries; ++i) { + TFMLOGD("%d: val = %f", i, data[i]); + } + } + for (int i = 0; i < n; ++i) { + out_ranking[i] = INT_MAX; + } + for (int i = 0; i < n; ++i) { + out_ranking[i] = FindMaxIdxWithExcludeList(data, entries, n, out_ranking); + } + TFMLOGD("=== RANKING ==="); + for (int i = 0; i < n; ++i) { + TFMLOGD("%d: id = %d, val = %f", i, out_ranking[i], data[out_ranking[i]]); + } +} + +static inline unsigned long long int GetCounter(hexagon_nn_perfinfo s) { + unsigned long long int ret; + ret = s.counter_hi; + ret <<= 32; + ret |= s.counter_lo; + return ret; +} + +static int CompareCycle(const void *va, const void *vb) { + const hexagon_nn_perfinfo *a = va; + const hexagon_nn_perfinfo *b = vb; + unsigned long long int acount = GetCounter(*a); + unsigned long long int bcount = GetCounter(*b); + if (acount < bcount) { + return -1; + } else if (acount > bcount) { + return 1; + } else { + return 0; + } +} + +///////////////////////////////////////////////// +// Graph functions + +uint32_t hexagon_controller_InstantiateGraph() { + const uint32_t nn_id = hexagon_nn_init(); + // set debug level to 99 for now + //hexagon_nn_set_debug_level(nn_id, 99); + // TODO(satok): make this as argument + hexagon_nn_set_debug_level(nn_id, 0); + return nn_id; +} + +void hexagon_controller_InitGraph(int version, uint32_t nn_id) { + if (version == 1) { + s_inception_version = INCEPTION_V1; + } else if (version == 3) { + s_inception_version = INCEPTION_V3; + } else { + TFMLOGE("Unsupported inception version %d", version); + return; + } + if (s_inception_version == INCEPTION_V3) { + init_graph(nn_id); + } else if (s_inception_version == INCEPTION_V1) { + init_graph_v1(nn_id); + } + TFMLOGD("Init graph (inception version = %d) done.", version); +} + +bool hexagon_controller_ConstructGraph(uint32_t nn_id) { + int err; + if ((err = hexagon_nn_prepare(nn_id)) != 0) { + TFMLOGE("Prepare failed! returned 0x%x\n", err); + return false; + } else { + TFMLOGD("Prepare success!\n"); + return true; + } +} + +uint32_t hexagon_controller_SetupGraph(int version) { + const uint32_t nn_id = hexagon_controller_InstantiateGraph(); + hexagon_controller_InitGraph(version, nn_id); + hexagon_controller_ConstructGraph(nn_id); + return nn_id; +} + +bool hexagon_controller_ExecuteGraph( + const uint32_t nn_id, + const uint32_t batches, + const uint32_t height, + const uint32_t width, + const uint32_t depth, + uint8_t* int_data, + const uint32_t int_data_size, + uint32_t* out_batches, + uint32_t* out_height, + uint32_t* out_width, + uint32_t* out_depth, + uint8_t* out_vals, + const uint32_t output_val_byte_size, + uint32_t* out_data_byte_size) { + int err; + if (DBG_EXECUTION) { + TFMLOGD("Preparing to execute..."); + TFMLOGD("Input: %d, %d, %d, %d, %d, %d", + batches, height, width, depth, int_data[0], int_data_size); + TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals); + LogDHexagon("Execute graph!"); + } + + if ((err = hexagon_nn_execute(nn_id, + batches, + height, + width, + depth, + int_data, + int_data_size, + out_batches, + out_height, + out_width, + out_depth, + out_vals, + output_val_byte_size, + out_data_byte_size)) != 0) { + if (DBG_EXECUTION) { + LogDHexagon("Execution failed!"); + TFMLOGE("execute got err: %d\n",err); + } + return false; + } else { + if (DBG_EXECUTION) { + LogDHexagon("Execution succeeded!"); + TFMLOGD("%d x %d x %d x %d, byte size = %d\n", + *out_batches, + *out_height, + *out_width, + *out_depth, + *out_data_byte_size); + } + return true; + } +} + +bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id) { + uint32_t out_batches, out_height, out_width, out_depth; + uint32_t out_data_size; + // s_output_values = 300 * 300 * 3 * 4 * 4 + const bool success = hexagon_controller_ExecuteGraph( + nn_id, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, + (uint8_t *)inception_dummy_int_data_299x299, + INCEPTION_PARAM_HEIGHT_V3 * INCEPTION_PARAM_WIDTH_V3 * + INCEPTION_PARAM_DEPTH, + &out_batches, &out_height, &out_width, &out_depth, + (uint8_t *)s_output_values, sizeof(s_output_values), + &out_data_size); + if (success) { + int out_ranking[OUT_RANKING_SIZE]; + hexagon_controller_PrintMaxNIdx( + s_output_values, + out_batches * out_height * out_width * out_depth, + OUT_RANKING_SIZE, out_ranking); + TFMLOGD("%d x %d x %d x %d, size = %d\n", + out_batches, + out_height, + out_width, + out_depth, + out_data_size); + TFMLOGD("max idx: %d\n", FindMaxIdx( + s_output_values, + out_batches * out_height * out_width * out_depth)); + if (out_ranking[0] == 169 && out_ranking[1] == 7) { + return true; + } else { + TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]); + return false; + } + } else { + return false; + } +} + +void hexagon_controller_DumpPerf(uint32_t nn_id) { + hexagon_nn_perfinfo info[MAX_NODES]; + unsigned long long int total_cycles = 0; + unsigned long long int cum_cycles = 0; + unsigned long long int counter = 0; + int n_nodes; + int i; + TFMLOGD("Perf dump follows:"); + if (hexagon_nn_get_perfinfo(nn_id, info, MAX_NODES,&n_nodes) != 0) { + TFMLOGE("perf info failure"); + return; + } + TFMLOGD("Total %d nodes.",n_nodes); + qsort(info,n_nodes,sizeof(info[0]), CompareCycle); + for (i = 0; i < n_nodes; i++) { + total_cycles += GetCounter(info[i]); + } + TFMLOGD("Total %lld cycles.",total_cycles); + for (i = 0; i < n_nodes; i++) { + counter = GetCounter(info[i]); + cum_cycles += counter; + TFMLOGD("node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%," + "cum_cycles,%lld,%f %%\n", + info[i].node_id, + ConvertGraphInfoIdToName(info[i].node_id), + ConvertGraphInfoIdToOpName(info[i].node_id), + info[i].executions, + counter, + 100*((double)counter)/total_cycles, + cum_cycles, + 100*((double)cum_cycles)/total_cycles); + } +#ifdef ENABLE_HVX_FULL_DEBUG + DumpAllPerf(nn_id); +#endif +} + +void hexagon_controller_DumpNodeName(uint32_t nn_id) { + TFMLOGD("Show node name"); + const uint32_t id = nn_id; + hexagon_nn_perfinfo info[MAX_NODES]; + unsigned long long int total_cycles = 0; + unsigned long long int cum_cycles = 0; + unsigned long long int counter = 0; + int node_count; + int i; + TFMLOGD("Perf dump follows:"); + if (hexagon_nn_get_perfinfo(id, info, MAX_NODES, &node_count) != 0) { + TFMLOGD("perf info failure"); + return; + } + TFMLOGD("Total %d nodes.",node_count); + qsort(info, node_count, sizeof(info[0]), CompareCycle); + for (i = 0; i < node_count; i++) { + total_cycles += GetCounter(info[i]); + } + TFMLOGD("Total %lld cycles.", total_cycles); + for (i = 0; i < node_count; i++) { + counter = GetCounter(info[i]); + cum_cycles += counter; + TFMLOGD("node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%," + "cum_cycles,%lld,%f %%", + info[i].node_id, + ConvertGraphInfoIdToName(info[i].node_id), + ConvertGraphInfoIdToOpName(info[i].node_id), + info[i].executions, + counter, + 100*((double)counter)/total_cycles, + cum_cycles, + 100*((double)cum_cycles)/total_cycles); + } +} + +void hexagon_controller_Teardown(uint32_t nn_id) { + hexagon_nn_teardown(nn_id); +} diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c new file mode 100644 index 0000000000..fe329e2f59 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -0,0 +1,374 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// to demonstrate the performance difference between ION and HLOS memory +// for sharing with ADSP. +#define USE_ION_MEMORY + +#include "hexagon_controller.h" + +#include <malloc.h> +#include <stdio.h> + +#include "adspmsgd.h" +#include "dspCV.h" +#include "rpcmem.h" // helper API's for shared buffer allocation +#include "soc_interface.h" +#include "tfm_log.h" + +// if false, use int data as input. This is only for acceleration purpose +static const bool USE_FLOAT_DATA = true; + +// if true, show id for each node +static const bool DBG_SHOW_ID = false; + +static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000; + +// extern pre-generated inception dummy data +extern uint8_t inception_dummy_int_data_224x224[]; +extern uint8_t inception_dummy_int_data_299x299[]; +extern float inception_dummy_float_data_299x299_299x299[]; + +#define GEMM_WRAPPER_VERSION 1 + +// allocate print bufsize in advance @MB +#define PRINT_BUFSIZE (2 * 1024 * 1024) + +static unsigned char s_print_buf[PRINT_BUFSIZE]; + +// input node data buffer size +// x2 1024 * 1024 * 2 > 299 * 299 * 3 * 4 > 1024 * 1024 +static const int INPUT_NODE_DATA_BUFFER_SIZE = 1024 * 1024 * 2; +// output node data buffer size +// (1008 is enough for inception) +static const int OUTPUT_NODE_DATA_BUFFER_SIZE = 300 * 300 * 3 * 4; + +static struct NodeDataFloat s_input_node_data_float_buffer; +static float* s_output_node_data_float_buffer; +static int s_output_node_data_float_buffer_byte_size; +static int s_output_node_data_float_array_size; +static uint32_t s_target_graph_id; + +static bool s_dbg_use_inception_dummy_data = false; + +void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) { + if (version == 1) { + if (USE_FLOAT_DATA) { + TFMLOGE("ERROR!!!! Do not use float data for v1"); + return; + } + hexagon_controller_CopyByteNodeData( + INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1, + INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, + 1, inception_dummy_int_data_224x224); + } else if (version == 3) { + if (USE_FLOAT_DATA) { + hexagon_controller_CopyByteNodeData( + INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, + sizeof(float), (uint8_t*)inception_dummy_float_data_299x299_299x299); + } else { + hexagon_controller_CopyByteNodeData( + INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, + 1, inception_dummy_int_data_299x299); + } + } +} + +bool hexagon_controller_ExecuteGraphWithBuffer( + uint32_t nn_id, bool show_ranking) { + uint32_t out_batches, out_height, out_width, out_depth; + uint32_t out_data_size; + int x = s_input_node_data_float_buffer.x; + int y = s_input_node_data_float_buffer.y; + int z = s_input_node_data_float_buffer.z; + int d = s_input_node_data_float_buffer.d; + uint8_t *byte_data = s_input_node_data_float_buffer.byte_array_data; + int array_size = s_input_node_data_float_buffer.array_size; + const bool success = hexagon_controller_ExecuteGraph( + nn_id, x, y, z, d, byte_data, array_size, + &out_batches, &out_height, &out_width, &out_depth, + (uint8_t *)s_output_node_data_float_buffer, + s_output_node_data_float_buffer_byte_size, + &out_data_size); + s_output_node_data_float_array_size = + out_batches * out_height * out_width * out_depth; + if (!success) { + TFMLOGE("Execution failed"); + return false; + } else if (!show_ranking) { + return true; + } + + static const int OUT_RANKING_SIZE = 5; + int out_ranking[OUT_RANKING_SIZE]; + hexagon_controller_PrintMaxNIdx( + s_output_node_data_float_buffer, + out_batches * out_height * out_width * out_depth, + OUT_RANKING_SIZE, out_ranking); + TFMLOGD("%d x %d x %d x %d, byte size = %d\n", + out_batches, + out_height, + out_width, + out_depth, + out_data_size); + if (s_dbg_use_inception_dummy_data) { + // Check the result of inception with a dummy data. This step shouldn't + // be passed when show_ranking != true to avoid adding unnecessary + // additional computation cost. + if (out_ranking[0] == 169 && out_ranking[1] == 7) { + TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]); + return true; + } else { + TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]); + return false; + } + } + return true; +} + +uint32_t hexagon_controller_GetTargetGraphId() { + return s_target_graph_id; +} + +void hexagon_controller_SetTargetGraphId(uint32_t graph_id) { + s_target_graph_id = graph_id; +} + +void hexagon_controller_PrintGraph(uint32_t id) { + int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE); + TFMLOGD("PrintGraph %s\n", s_print_buf); + if (retval) { + TFMLOGE("Error on print graph\n"); + } +} + +int hexagon_controller_GetWrapperVersion() { + return GEMM_WRAPPER_VERSION; +} + +int hexagon_controller_GetHexagonBinaryVersion() { + int retval = 0; + hexagon_nn_GetHexagonBinaryVersion(&retval); + return retval; +} + +bool hexagon_controller_AllocateNodeDataBuffers( + int input_size, int output_size) { + TFMLOGD("Allocate memory for input / output node data float"); + if (s_input_node_data_float_buffer.buf_size != 0) { + TFMLOGE("ERROR! input buffer is already allocated!!"); + return false; + } else { + int byte_array_data_size = USE_FLOAT_DATA ? + input_size * sizeof(float) : input_size; /* sizeof(uint8_t) ? */ + s_input_node_data_float_buffer.buf_size = input_size; + // unused? remove? + s_input_node_data_float_buffer.array_data = + malloc(input_size * sizeof(float)); + s_input_node_data_float_buffer.byte_array_data = + malloc(byte_array_data_size); + + s_output_node_data_float_buffer = malloc(output_size * sizeof(float)); + s_output_node_data_float_buffer_byte_size = output_size * sizeof(float); + s_output_node_data_float_array_size = 0; + TFMLOGD("allocate node data buffers"); + } + return true; +} + +bool hexagon_controller_ReleaseNodeDataBuffers() { + if (s_input_node_data_float_buffer.buf_size == 0) { + TFMLOGE("ERROR! input buffer has not been allocated yet!!"); + return false; + } else { + s_input_node_data_float_buffer.buf_size = 0; + free(s_input_node_data_float_buffer.array_data); + } + if (s_output_node_data_float_buffer_byte_size == 0) { + TFMLOGE("ERROR! output buffer has not been allocated yet!!"); + return false; + } else { + s_output_node_data_float_buffer_byte_size = 0; + free(s_input_node_data_float_buffer.byte_array_data); + } + return true; +} + +bool hexagon_controller_CopyByteNodeData( + int x, int y, int z, int d, int type_byte_size, uint8_t* array_data) { + int array_byte_size = x * y * z * d * type_byte_size; + TFMLOGD("--- %d, %d, %d, %d, %d, %d",x,y,z,d,type_byte_size,array_byte_size); + if (s_input_node_data_float_buffer.buf_size < array_byte_size) { + TFMLOGE("ERROR! input buffer size is too small! %d < %d", + s_input_node_data_float_buffer.buf_size, array_byte_size); + return false; + } + memcpy(s_input_node_data_float_buffer.byte_array_data, + array_data, array_byte_size); + s_input_node_data_float_buffer.array_size = array_byte_size; + s_input_node_data_float_buffer.x = x; + s_input_node_data_float_buffer.y = y; + s_input_node_data_float_buffer.z = z; + s_input_node_data_float_buffer.d = d; + return true; +} + +int hexagon_controller_InitHexagonWithMaxAttributes( + int enable_dcvs, int bus_usage, int version) { + TFMLOGI("Init hexagon with max attributes"); + const int MCPS = 1000; + const int MBPS = 12000; + + adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096); + + dspCV_Attribute attrib[] = { + // The below values will result in the maximum aDSP performance, + // at Turbo voltage. + // Slightly more MCPS than are available on current targets + {DSP_TOTAL_MCPS, MCPS}, + // drive the clock to MAX on known targets + {DSP_MCPS_PER_THREAD, MCPS / 2}, + // 12 GB/sec is slightly higher than the max realistic + // max BW on existing targets. + {PEAK_BUS_BANDWIDTH_MBPS, MBPS}, + // This app is non-real time, and constantly reading/writing memory + {BUS_USAGE_PERCENT, bus_usage}, + }; + int retval = 0; + if (!enable_dcvs) { + retval = hexagon_nn_disableDcvs(); + if (retval) { + TFMLOGE("Failed to disable DSP DCVS: %x\n", retval); + } + } + + retval = + dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0])); + TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval); + + hexagon_controller_AllocateNodeDataBuffers( + INPUT_NODE_DATA_BUFFER_SIZE, OUTPUT_NODE_DATA_BUFFER_SIZE); + + if (s_dbg_use_inception_dummy_data) { + hexagon_controller_InitInputNodeDataToInceptionDummyData(version); + } + s_target_graph_id = 0; + + return retval; +} + +int hexagon_controller_DeInitHexagon() { + adspmsgd_stop(); + TFMLOGI("Finalize hexagon"); + const int retval = dspCV_deinitQ6(); + TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval); + + hexagon_controller_ReleaseNodeDataBuffers(); + + return retval; +} + +void hexagon_controller_GrowMemorySize() { + hexagon_nn_config(); +} + +struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer() { + return &s_input_node_data_float_buffer; +} + +float* hexagon_controller_GetOutputNodeDataFloatBuffer( + const char *const node_name, int* out_array_size) { + *out_array_size = s_output_node_data_float_array_size; + return s_output_node_data_float_buffer; +} + +// Append const node to the graph +int hexagon_controller_AppendConstNode( + const char* const name, int graph_id, int node_id, + int batch, int height, int width, int depth, + const uint8_t* const data, int data_length) { + if (DBG_SHOW_ID) { + TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d", + name, node_id, batch, height, width, depth, data_length); + } else { + TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d", + name, batch, height, width, depth, data_length); + } + const int retval = hexagon_nn_append_const_node( + graph_id, node_id, batch, height, width, depth, data, data_length); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return retval; + } + return retval; +} + +// Append node to the graph +int hexagon_controller_AppendNode( + const char* const name, int graph_id, int node_id, int ops_id, + int padding_id, const hexagon_nn_input* const inputs, + int inputs_count, const hexagon_nn_output* const outputs, + int outputs_count) { + char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE]; + memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE); + int pos = 0; + pos += snprintf(&input_param_buf[pos], 500, "in: "); + for (int i = 0; i < inputs_count; ++i) { + if (DBG_SHOW_ID) { + pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ", + inputs[i].src_id, inputs[i].output_idx); + } else { + pos += snprintf(&input_param_buf[pos], 500, "(%d), ", + inputs[i].output_idx); + } + } + + char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE]; + memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE); + pos = 0; + pos += snprintf(&output_param_buf[pos], 500, "out: "); + for (int i = 0; i < outputs_count; ++i) { + pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size); + } + + if (DBG_SHOW_ID) { + TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id, + ops_id, padding_id, inputs_count, outputs_count, input_param_buf, + output_param_buf); + } else { + TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name, + ops_id, padding_id, inputs_count, outputs_count, input_param_buf, + output_param_buf); + } + const int retval = hexagon_nn_append_node( + graph_id, node_id, ops_id, padding_id, + inputs, inputs_count, + outputs, outputs_count); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return retval; + } + return retval; +} + +void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) { + s_dbg_use_inception_dummy_data = enable; +} + +bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() { + return s_dbg_use_inception_dummy_data; +} diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h new file mode 100644 index 0000000000..eaf4a58751 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h @@ -0,0 +1,124 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef GEMM_WRAPPER_H +#define GEMM_WRAPPER_H + +#include <stdbool.h> +#include <stdlib.h> + +#include "hexagon_nn.h" +#include "node_data_float.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define INCEPTION_PARAM_BATCHES 1 +#define INCEPTION_PARAM_HEIGHT_V1 224 +#define INCEPTION_PARAM_WIDTH_V1 224 +#define INCEPTION_PARAM_HEIGHT_V3 299 +#define INCEPTION_PARAM_WIDTH_V3 299 +#define INCEPTION_PARAM_DEPTH 3 + +// General functions +void hexagon_controller_PrintGraph(uint32_t nn_id); + +int hexagon_controller_GetWrapperVersion(); + +int hexagon_controller_GetHexagonBinaryVersion(); + +// Hexagon perf functions +int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs, + int bus_usage, int version); + +bool hexagon_controller_AllocateNodeDataBuffers(int input_size, + int output_size); + +bool hexagon_controller_ReleaseNodeDataBuffers(); + +bool hexagon_controller_CopyByteNodeData(int x, int y, int z, int d, + int type_byte_size, + uint8_t* array_data); + +int hexagon_controller_DeInitHexagon(); + +uint32_t hexagon_controller_GetTargetGraphId(); + +void hexagon_controller_SetTargetGraphId(uint32_t graph_id); + +// Hexagon config functions +void hexagon_controller_GrowMemorySize(); + +// Graph data transfer functions +struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer(); + +float* hexagon_controller_GetOutputNodeDataFloatBuffer( + const char* const node_name, int* out_array_size); + +// Graph functions +uint32_t hexagon_controller_InstantiateGraph(); + +void hexagon_controller_InitGraph(int version, uint32_t nn_id); + +bool hexagon_controller_ConstructGraph(uint32_t nn_id); + +uint32_t hexagon_controller_SetupGraph(int version); + +bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id); + +bool hexagon_controller_ExecuteGraph( + const uint32_t nn_id, const uint32_t batches, const uint32_t height, + const uint32_t width, const uint32_t depth, uint8_t* int_data, + const uint32_t int_data_size, uint32_t* out_batches, uint32_t* out_height, + uint32_t* out_width, uint32_t* out_depth, uint8_t* out_vals, + const uint32_t output_val_byte_size, uint32_t* out_data_byte_size); + +bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id, + bool show_ranking); + +void hexagon_controller_DumpPerf(uint32_t nn_id); + +void hexagon_controller_DumpNodeName(uint32_t nn_id); + +void hexagon_controller_Teardown(uint32_t nn_id); + +void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries, + const int n, int* out_ranking); + +void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version); + +int hexagon_controller_AppendNode(const char* const name, int graph_id, + int node_id, int op_id, int padding_id, + const hexagon_nn_input* const inputs, + int inputs_count, + const hexagon_nn_output* const outputs, + int outputs_count); + +int hexagon_controller_AppendConstNode(const char* const name, int graph_id, + int node_id, int batch, int height, + int width, int depth, + const uint8_t* const data, + int data_length); + +void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable); + +bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled(); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // GEMM_WRAPPER_H diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h new file mode 100644 index 0000000000..e8615fd4ec --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h @@ -0,0 +1,74 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef GEMM_WRAPPER_LOG_H +#define GEMM_WRAPPER_LOG_H + +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> + +#define TFM_LOG_LEVEL_VERBOSE -2 +#define TFM_LOG_LEVEL_DEBUG -1 +#define TFM_LOG_LEVEL_INFO 0 +#define TFM_LOG_LEVEL_WARNING 1 +#define TFM_LOG_LEVEL_ERROR 2 +#define TFM_LOG_LEVEL_FATAL 3 + +static int s_log_level = TFM_LOG_LEVEL_INFO; + +static inline bool IsLogOn(int log_level) { return log_level >= s_log_level; } + +static inline void SetLogLevel(int log_level) { s_log_level = log_level; } + +#define TFMLOGV(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_VERBOSE)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +#define TFMLOGD(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_DEBUG)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +#define TFMLOGI(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_INFO)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +#define TFMLOGE(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_ERROR)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +static inline void PrintLogHexagon(const char* fmt, va_list ap) { + char buffer[200]; + const int count = snprintf(buffer, 200, fmt, ap); + buffer[count] = 0; + TFMLOGI("%s", buffer); +} + +static inline void LogDHexagon(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + PrintLogHexagon(fmt, ap); + va_end(ap); +} + +#endif diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h new file mode 100644 index 0000000000..a9c3296e9f --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h @@ -0,0 +1,41 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef NODE_DATA_FLOAT_H +#define NODE_DATA_FLOAT_H + +#ifdef __cplusplus +extern "C" { +#else +#include <inttypes.h> +#endif +#define NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE 100 + +struct NodeDataFloat { + int x; + int y; + int z; + int d; + int buf_size; + int array_size; + float* array_data; + uint8_t* byte_array_data; + char node_name[NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE]; +}; +#ifdef __cplusplus +} +#endif + +#endif // NODE_DATA_FLOAT_H diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c index ebcbb963e8..7db8d4870c 100755 --- a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c @@ -15,110 +15,230 @@ limitations under the License. #include "soc_interface.h" +#include <inttypes.h> + +#include "hexagon_controller.h" +#include "hexagon_nn.h" +#include "node_data_float.h" +#include "tfm_log.h" + +const int64_t FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT = 0x01; + +static const int INCEPTION_VERSION = 3; + +static hexagon_nn_input* s_node_inputs_array; +static int s_node_inputs_array_index; +static int s_node_inputs_array_max_count; + +static hexagon_nn_output* s_node_outputs_array; +static int s_node_outputs_array_index; +static int s_node_outputs_array_max_count; + int soc_interface_GetWrapperVersion() { - // TODO(satok): implement - return -1; + TFMLOGD("GetWrapperVersion"); + return hexagon_controller_GetWrapperVersion(); } int soc_interface_GetSocControllerVersion() { - // TODO(satok): implement - return -1; + TFMLOGD("GetSocControllerVersion"); + return hexagon_controller_GetHexagonBinaryVersion(); } bool soc_interface_Init() { - // TODO(satok): implement - return false; + TFMLOGD("Init"); + hexagon_controller_InitHexagonWithMaxAttributes( + 0, 100, INCEPTION_VERSION /* version */); + hexagon_controller_GrowMemorySize(); + return true; } bool soc_interface_Finalize() { - // TODO(satok): implement - return false; + TFMLOGD("Finalize"); + hexagon_controller_DeInitHexagon(); + return true; } bool soc_interface_ExecuteGraph() { - // TODO(satok): implement - return false; + TFMLOGD("ExecuteGraph"); + if (hexagon_controller_IsDbgUseInceptionDummyDataEnabled()) { + hexagon_controller_InitInputNodeDataToInceptionDummyData( + INCEPTION_VERSION /* version */); + } + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + if (graph_id == 0) { + TFMLOGE("Graph id has not been set yet."); + return false; + } + hexagon_controller_ExecuteGraphWithBuffer(graph_id, true); + return true; } bool soc_interface_TeardownGraph() { - // TODO(satok): implement - return false; + TFMLOGD("TeardownGraph"); + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + if (graph_id == 0) { + TFMLOGE("Graph id has not been set yet."); + return false; + } + hexagon_controller_Teardown(graph_id); + return true; } bool soc_interface_FillInputNodeFloat( - int x, int y, int z, int d, const uint8_t* const buf, uint64_t buf_size) { - // TODO(satok): implement - return false; + int x, int y, int z, int d, const uint8_t* const buf, + uint64_t buf_size) { + TFMLOGD("FillInputNodeFloat"); + struct NodeDataFloat* node_data_float = + hexagon_controller_GetInputNodeDataFloatBuffer(); + const int array_size = x * y * z * d; + if (array_size > node_data_float->buf_size) { + TFMLOGE("Array size exceeds buf size %d > %d", + array_size, node_data_float->buf_size); + return false; + } + if (buf_size != array_size * sizeof(float)) { + TFMLOGE("Invalid buf size!"); + return false; + } + memcpy(node_data_float->byte_array_data, buf, buf_size); + node_data_float->x = x; + node_data_float->y = y; + node_data_float->z = z; + node_data_float->d = d; + node_data_float->array_size = buf_size; + return true; } // TODO(satok): Remove and use runtime version bool soc_interface_ReadOutputNodeFloat( const char* const node_name, uint8_t** buf, uint64_t *buf_size) { - // TODO(satok): implement - return false; + TFMLOGD("ReadOutputNodeFloat"); + int array_size = -1; + float* output_node_data_float = + hexagon_controller_GetOutputNodeDataFloatBuffer(node_name, &array_size); + if (array_size < 0) { + TFMLOGE("Failed to read data."); + return false; + } + *buf = (uint8_t*)output_node_data_float; + *buf_size = array_size * sizeof(float); + return true; } bool soc_interface_SetupGraphDummy(int version) { - // TODO(satok): implement - return false; + TFMLOGD("SetupGraphDummy"); + const uint32_t graph_id = hexagon_controller_SetupGraph(version); + if (graph_id == 0) { + TFMLOGE("Failed to setup graph"); + return false; + } + hexagon_controller_SetTargetGraphId(graph_id); + return true; } bool soc_interface_AllocateNodeInputAndNodeOutputArray( int total_input_count, int total_output_count) { - // TODO(satok): implement - return false; + TFMLOGD("Allocate node inputs and node outputs array %d, %d", + total_input_count, total_output_count); + s_node_inputs_array = malloc(total_input_count * sizeof(hexagon_nn_input)); + s_node_outputs_array = malloc(total_output_count * sizeof(hexagon_nn_output)); + s_node_inputs_array_index = 0; + s_node_outputs_array_index = 0; + s_node_inputs_array_max_count = total_input_count; + s_node_outputs_array_max_count = total_output_count; + return true; } bool soc_interface_ReleaseNodeInputAndNodeOutputArray() { - // TODO(satok): implement - return false; + TFMLOGD("Release node inputs and node outputs array"); + free(s_node_inputs_array); + free(s_node_outputs_array); + return true; } void* soc_interface_SetOneNodeInputs( int input_count, const int* const node_id, const int* const port) { - // TODO(satok): implement - return 0; + if (s_node_inputs_array_index + input_count > s_node_inputs_array_max_count) { + TFMLOGE("input count exceeds limit"); + return 0; + } + for (int i = 0; i < input_count; ++i) { + const int index = s_node_inputs_array_index + i; + s_node_inputs_array[index].src_id = node_id[i]; + s_node_inputs_array[index].output_idx = port[i]; + } + void* retval = (void*)(&s_node_inputs_array[s_node_inputs_array_index]); + s_node_inputs_array_index += input_count; + return retval; } void* soc_interface_SetOneNodeOutputs(int output_count, int* max_size) { - // TODO(satok): implement - return 0; + if (s_node_outputs_array_index + output_count > + s_node_outputs_array_max_count) { + TFMLOGE("output count exceeds limit"); + return 0; + } + for (int i = 0; i < output_count; ++i) { + const int index = s_node_outputs_array_index + i; + s_node_outputs_array[index].max_size = max_size[i]; + } + void* retval = (void*)(&s_node_outputs_array[s_node_outputs_array_index]); + s_node_outputs_array_index += output_count; + return retval; } // Append const node to the graph bool soc_interface_AppendConstNode( - const char* const name, int node_id, int batch, int height, int width, - int depth, const uint8_t* const data, int data_length) { - // TODO(satok): implement - return false; + const char* const name, int node_id, int batch, int height, int width, int depth, + const uint8_t* const data, int data_length) { + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + const int retval = hexagon_controller_AppendConstNode( + name, graph_id, node_id, batch, height, width, depth, data, data_length); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return false; + } + return true; } // Append node to the graph bool soc_interface_AppendNode( - const char* const name, int node_id, int ops_id, int padding_id, - const void* const inputs, int inputs_count, const void* const outputs, - int outputs_count) { - // TODO(satok): implement - return false; + const char* const name, int node_id, int ops_id, int padding_id, const void* const inputs, + int inputs_count, const void* const outputs, int outputs_count) { + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + const int retval = hexagon_controller_AppendNode( + name, graph_id, node_id, ops_id, padding_id, + (hexagon_nn_input*) inputs, inputs_count, + (hexagon_nn_output*) outputs, outputs_count); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return false; + } + return true; } // Instantiate graph bool soc_interface_InstantiateGraph() { - // TODO(satok): implement - return false; + const uint32_t nn_id = hexagon_controller_InstantiateGraph(); + hexagon_controller_SetTargetGraphId(nn_id); + return true; } // Construct graph bool soc_interface_ConstructGraph() { - // TODO(satok): implement - return false; + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + return hexagon_controller_ConstructGraph(graph_id); } void soc_interface_SetLogLevel(int log_level) { - // TODO(satok): implement + SetLogLevel(log_level); } void soc_interface_SetDebugFlag(uint64_t flag) { - // TODO(satok): implement + TFMLOGI("Set debug flag 0x%" PRIx64, flag); + if ((flag & FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT) != 0) { + TFMLOGI("Enable always use panda data"); + hexagon_controller_EnableDbgUseInceptionDummyData(true); + } } diff --git a/tensorflow/contrib/hvx/hexagon_controller/target/make/android.min b/tensorflow/contrib/hvx/hexagon_controller/target/make/android.min new file mode 100644 index 0000000000..4770d31c56 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/target/make/android.min @@ -0,0 +1,70 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +$(info ------------------------------------------) +$(info --- V = $(V)) +$(info --- GLUE_DIR = $(GLUE_DIR)) +$(info --- HEXAGON_SDK_ROOT = $(HEXAGON_SDK_ROOT)) +$(info ------------------------------------------) + +INCDIRS += ../../../libs/common/adspmsgd/ship/android_Release + +INCDIRS += src_impl/include +INCDIRS += src_log/include +INCDIRS += src_soc_interface/include + +LIBDIRS += ../../../libs/common/adspmsgd/ship/android_Release + +BUILD_DLLS=libhexagon_controller + +hexagon_controller_lib_QAICIDLS += \ +interface/hexagon_nn \ +$(MAKE_D_DSPCV_INCDIR)/dspCV + +# hexagon controller library +hexagon_controller_lib_C_SRCS += \ +src_impl/hexagon_controller \ +src_impl/graph_functions_wrapper \ +src_soc_interface/soc_interface + +# dummy data +hexagon_controller_lib_C_SRCS += \ +src_dummy_data/inception_v1_graph_init \ +src_dummy_data/inception_v3_dummy_float_data \ +src_dummy_data/inception_v3_dummy_int_data \ +src_dummy_data/inception_v3_graph_init + +# hexagon interface +hexagon_controller_lib_C_SRCS += \ +$V/hexagon_nn_stub \ +$V/dspCV_stub + +hexagon_controller_lib_DLLS += libadsprpc +hexagon_controller_lib_LIBS += rpcmem adspmsgd +hexagon_controller_lib_LD_FLAGS += -llog +hexagon_controller_lib_DEFINES += VERIFY_PRINT_ERROR + +libhexagon_controller_QAICIDLS += $(hexagon_controller_lib_QAICIDLS) +libhexagon_controller_C_SRCS += $(hexagon_controller_lib_C_SRCS) +libhexagon_controller_DLLS += $(hexagon_controller_lib_DLLS) +libhexagon_controller_LIBS += $(hexagon_controller_lib_LIBS) +libhexagon_controller_LD_FLAGS += $(hexagon_controller_lib_LD_FLAGS) +libhexagon_controller_DEFINES += $(hexagon_controller_lib_DEFINES) + +BUILD_COPIES = \ + $(DLLS) \ + $(EXES) \ + $(LIBS) \ + $(SHIP_DIR)/ ; diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 2673495b90..e47342f966 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -1385,7 +1385,8 @@ def fully_connected(inputs, if not isinstance(num_outputs, six.integer_types): raise ValueError('num_outputs should be int or long, got %s.', num_outputs) - layer_variable_getter = _build_variable_getter({'bias': 'biases'}) + layer_variable_getter = _build_variable_getter({'bias': 'biases', + 'kernel': 'weights'}) with variable_scope.variable_scope( scope, 'fully_connected', [inputs], @@ -1395,9 +1396,9 @@ def fully_connected(inputs, units=num_outputs, activation=None, use_bias=not normalizer_fn and biases_initializer, - weights_initializer=weights_initializer, + kernel_initializer=weights_initializer, bias_initializer=biases_initializer, - weights_regularizer=weights_regularizer, + kernel_regularizer=weights_regularizer, bias_regularizer=biases_regularizer, activity_regularizer=None, trainable=trainable, @@ -1408,7 +1409,7 @@ def fully_connected(inputs, outputs = layer.apply(inputs) # Add variables to collections. - _add_variable_to_collections(layer.w, variables_collections, 'weights') + _add_variable_to_collections(layer.kernel, variables_collections, 'weights') if layer.bias is not None: _add_variable_to_collections(layer.bias, variables_collections, 'biases') diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index d1b35e33c2..6043d4dc0e 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1563,7 +1563,7 @@ class FCTest(test.TestCase): _layers.fully_connected(inputs, 32, weights_regularizer=weight_decay) wd = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)[0] self.assertEqual(wd.op.name, - 'fully_connected/weights/Regularizer/l2_regularizer') + 'fully_connected/kernel/Regularizer/l2_regularizer') sess.run(variables_lib.global_variables_initializer()) self.assertLess(sess.run(wd), 0.4) diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index 0b50d93b72..bab59d0048 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -176,6 +176,11 @@ def optimize_loss(loss, str(type(learning_rate)))) if summaries is None: summaries = ["loss", "learning_rate"] + else: + for summ in summaries: + if summ not in OPTIMIZER_SUMMARIES: + raise ValueError("Summaries should be one of [%s], you provided %s." % + (", ".join(OPTIMIZER_SUMMARIES), summ)) if learning_rate is not None and learning_rate_decay_fn is not None: if global_step is None: raise ValueError("global_step is required for learning_rate_decay_fn.") diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py index b7b984b1e8..9dc612e58e 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers_test.py +++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py @@ -108,6 +108,14 @@ class OptimizersTest(test.TestCase): optimizers_lib.optimize_loss( loss, global_step, learning_rate=0.1, optimizer=optimizer) + def testBadSummaries(self): + with ops.Graph().as_default() as g, self.test_session(graph=g): + _, _, loss, global_step = _setup_model() + with self.assertRaises(ValueError): + optimizers_lib.optimize_loss( + loss, global_step, learning_rate=0.1, optimizer="SGD", + summaries=["loss", "bad_summary"]) + def testInvalidLoss(self): with ops.Graph().as_default() as g, self.test_session(graph=g): _, _, _, global_step = _setup_model() diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py index d7b9aaffd4..6a6ff10d44 100644 --- a/tensorflow/contrib/learn/python/learn/__init__.py +++ b/tensorflow/contrib/learn/python/learn/__init__.py @@ -46,4 +46,5 @@ from tensorflow.contrib.learn.python.learn.learn_io import * from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec from tensorflow.contrib.learn.python.learn.monitors import NanLossDuringTrainingError from tensorflow.contrib.learn.python.learn.trainable import Trainable +from tensorflow.contrib.learn.python.learn.utils import * # pylint: enable=wildcard-import diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 1d36389722..becdf61709 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -36,7 +36,6 @@ from tensorflow.contrib.framework import deprecated_arg_values from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework import list_variables from tensorflow.contrib.framework import load_variable -from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.framework.python.ops import variables as contrib_variables from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import metric_spec @@ -68,7 +67,6 @@ from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import device_setter from tensorflow.python.training import monitored_session from tensorflow.python.training import saver -from tensorflow.python.training import session_run_hook from tensorflow.python.training import summary_io from tensorflow.python.util import compat @@ -815,9 +813,10 @@ class BaseEstimator( update_op, eval_dict = self._extract_metric_update_ops(eval_dict) - hooks = hooks or [] + # We need to copy the hook array as we modify it, thus [:]. + hooks = hooks[:] if hooks else [] if feed_fn: - hooks.append(_FeedFnHook(feed_fn)) + hooks.append(basic_session_run_hooks.FeedFnHook(feed_fn)) if steps: hooks.append( evaluation.StopAfterNEvalsHook( @@ -1216,22 +1215,20 @@ class Estimator(BaseEstimator): self._labels_info) return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.INFER) - @experimental def export_savedmodel( - self, export_dir_base, input_fn, + self, export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, - as_text=False, - exports_to_keep=None): + as_text=False): """Exports inference graph as a SavedModel into given dir. Args: export_dir_base: A string containing a directory to write the exported graph and checkpoints. - input_fn: A function that takes no argument and + serving_input_fn: A function that takes no argument and returns an `InputFnOps`. default_output_alternative_key: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -1240,7 +1237,6 @@ class Estimator(BaseEstimator): renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. - exports_to_keep: Number of exports to keep. Returns: The string path to the exported directory. @@ -1248,14 +1244,14 @@ class Estimator(BaseEstimator): Raises: ValueError: if an unrecognized export_type is requested. """ - if input_fn is None: - raise ValueError('input_fn must be defined.') + if serving_input_fn is None: + raise ValueError('serving_input_fn must be defined.') with ops.Graph().as_default() as g: contrib_variables.create_global_step(g) - # Call the input_fn and collect the input alternatives. - input_ops = input_fn() + # Call the serving_input_fn and collect the input alternatives. + input_ops = serving_input_fn() input_alternatives, features = ( saved_model_export_utils.get_input_alternatives(input_ops)) @@ -1266,7 +1262,7 @@ class Estimator(BaseEstimator): saved_model_export_utils.get_output_alternatives( model_fn_ops, default_output_alternative_key)) - # Build the SignatureDefs from all pairs of input and output signatures + # Build the SignatureDefs from all pairs of input and output alternatives signature_def_map = saved_model_export_utils.build_all_signature_defs( input_alternatives, output_alternatives, actual_default_output_alternative_key) @@ -1317,17 +1313,6 @@ class Estimator(BaseEstimator): return export_dir -class _FeedFnHook(session_run_hook.SessionRunHook): - """Runs feed_fn and sets the feed_dict accordingly.""" - - def __init__(self, feed_fn): - self.feed_fn = feed_fn - - def before_run(self, run_context): # pylint: disable=unused-argument - return session_run_hook.SessionRunArgs( - fetches=None, feed_dict=self.feed_fn()) - - # For time of deprecation x,y from Estimator allow direct access. # pylint: disable=protected-access class SKCompat(sklearn.BaseEstimator): @@ -1343,7 +1328,7 @@ class SKCompat(sklearn.BaseEstimator): epochs=None) all_monitors = [] if feed_fn: - all_monitors = [_FeedFnHook(feed_fn)] + all_monitors = [basic_session_run_hooks.FeedFnHook(feed_fn)] if monitors: all_monitors.extend(monitors) diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py index 0b4897d4b2..ffa2e17aec 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py @@ -211,12 +211,12 @@ def _build_estimator_for_export_tests(tmpdir): feature_spec = feature_column_lib.create_feature_spec_for_parsing( feature_columns) - export_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec) + serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec) # hack in an op that uses an asset, in order to test asset export. # this is not actually valid, of course. - def export_input_fn_with_asset(): - features, labels, inputs = export_input_fn() + def serving_input_fn_with_asset(): + features, labels, inputs = serving_input_fn() vocab_file_name = os.path.join(tmpdir, 'my_vocab_file') vocab_file = gfile.GFile(vocab_file_name, mode='w') @@ -229,7 +229,7 @@ def _build_estimator_for_export_tests(tmpdir): return input_fn_utils.InputFnOps(features, labels, inputs) - return est, export_input_fn_with_asset + return est, serving_input_fn_with_asset class CheckCallsMonitor(monitors_lib.BaseMonitor): @@ -620,6 +620,16 @@ class EstimatorTest(test.TestCase): predictions = list(est.predict(x=iris.data)) self.assertEqual(len(predictions), iris.target.shape[0]) + def testHooksNotChanged(self): + est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) + # We pass empty array and expect it to remain empty after calling + # fit and evaluate. Requires inside to copy this array if any hooks were + # added. + my_array = [] + est.fit(input_fn=iris_input_fn, steps=100, monitors=my_array) + _ = est.evaluate(input_fn=iris_input_fn, steps=1, hooks=my_array) + self.assertEqual(my_array, []) + def testIrisInputFnLabelsDict(self): iris = base.load_iris() est = estimator.Estimator(model_fn=logistic_model_no_mode_fn) @@ -811,7 +821,7 @@ class EstimatorTest(test.TestCase): def test_export_savedmodel(self): tmpdir = tempfile.mkdtemp() - est, export_input_fn = _build_estimator_for_export_tests(tmpdir) + est, serving_input_fn = _build_estimator_for_export_tests(tmpdir) extra_file_name = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('my_extra_file')) @@ -823,7 +833,7 @@ class EstimatorTest(test.TestCase): export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = est.export_savedmodel( - export_dir_base, export_input_fn, assets_extra=assets_extra) + export_dir_base, serving_input_fn, assets_extra=assets_extra) self.assertTrue(gfile.Exists(export_dir_base)) self.assertTrue(gfile.Exists(export_dir)) diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py index e7805d9a90..c898a4865b 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/svm.py +++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py @@ -18,14 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import inspect -import re - from tensorflow.contrib import layers from tensorflow.contrib.framework import deprecated_arg_values -from tensorflow.contrib.framework.python.framework import experimental -from tensorflow.contrib.learn.python.learn import evaluable -from tensorflow.contrib.learn.python.learn import trainable from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.contrib.learn.python.learn.estimators import linear @@ -38,15 +32,7 @@ def _as_iterable(preds, output): yield pred[output] -def _get_metric_args(metric): - if hasattr(metric, "__code__"): - return inspect.getargspec(metric).args - elif hasattr(metric, "func") and hasattr(metric, "keywords"): - return [arg for arg in inspect.getargspec(metric.func).args - if arg not in metric.keywords.keys()] - - -class SVM(trainable.Trainable, evaluable.Evaluable): +class SVM(estimator.Estimator): """Support Vector Machine (SVM) model for binary classification. Currently, only linear SVMs are supported. For the underlying optimization @@ -106,7 +92,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable): kernels=None, config=None, feature_engineering_fn=None): - """Constructs a `SVM~ estimator object. + """Constructs an `SVM` estimator object. Args: example_id_column: A string defining the feature column name representing @@ -139,15 +125,15 @@ class SVM(trainable.Trainable, evaluable.Evaluable): """ if kernels is not None: raise ValueError("Kernel SVMs are not currently supported.") - self._optimizer = sdca_optimizer.SDCAOptimizer( + optimizer = sdca_optimizer.SDCAOptimizer( example_id_column=example_id_column, num_loss_partitions=num_loss_partitions, symmetric_l1_regularization=l1_regularization, symmetric_l2_regularization=l2_regularization) self._feature_columns = feature_columns - self._chief_hook = linear._SdcaUpdateWeightsHook() # pylint: disable=protected-access - self._estimator = estimator.Estimator( + chief_hook = linear._SdcaUpdateWeightsHook() # pylint: disable=protected-access + super(SVM, self).__init__( model_fn=linear.sdca_model_fn, model_dir=model_dir, config=config, @@ -156,62 +142,20 @@ class SVM(trainable.Trainable, evaluable.Evaluable): weight_column_name=weight_column_name, enable_centered_bias=False), "feature_columns": feature_columns, - "optimizer": self._optimizer, + "optimizer": optimizer, "weight_column_name": weight_column_name, - "update_weights_hook": self._chief_hook, + "update_weights_hook": chief_hook, }, feature_engineering_fn=feature_engineering_fn) - if not self._estimator.config.is_chief: - self._chief_hook = None - - @property - def model_dir(self): - """See trainable.Evaluable.""" - return self._estimator.model_dir - - def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, - monitors=None, max_steps=None): - """See trainable.Trainable.""" - if monitors is None: - monitors = [] - if self._chief_hook: - monitors.append(self._chief_hook) - return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, - batch_size=batch_size, monitors=monitors, - max_steps=max_steps) - - # pylint: disable=protected-access - def evaluate(self, - x=None, - y=None, - input_fn=None, - feed_fn=None, - batch_size=None, - steps=None, - metrics=None, - name=None, - checkpoint_path=None, - hooks=None): - """See evaluable.Evaluable.""" - return self._estimator.evaluate( - x=x, - y=y, - input_fn=input_fn, - feed_fn=feed_fn, - batch_size=batch_size, - steps=steps, - metrics=metrics, - name=name, - checkpoint_path=checkpoint_path, - hooks=hooks) @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, as_iterable=False) - def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True): + def predict_classes(self, x=None, input_fn=None, batch_size=None, + as_iterable=True): """Runs inference to determine the predicted class.""" key = prediction_key.PredictionKey.CLASSES - preds = self._estimator.predict( + preds = super(SVM, self).predict( x=x, input_fn=input_fn, batch_size=batch_size, @@ -228,7 +172,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable): as_iterable=True): """Runs inference to determine the class probability predictions.""" key = prediction_key.PredictionKey.PROBABILITIES - preds = self._estimator.predict( + preds = super(SVM, self).predict( x=x, input_fn=input_fn, batch_size=batch_size, @@ -239,51 +183,30 @@ class SVM(trainable.Trainable, evaluable.Evaluable): return preds[key] # pylint: enable=protected-access - def get_variable_names(self): - return self._estimator.get_variable_names() - def export(self, export_dir, signature_fn=None, input_fn=None, default_batch_size=1, exports_to_keep=None): """See BaseEstimator.export.""" + return self.export_with_defaults( + export_dir=export_dir, + signature_fn=signature_fn, + input_fn=input_fn, + default_batch_size=default_batch_size, + exports_to_keep=exports_to_keep) + + def export_with_defaults( + self, + export_dir, + signature_fn=None, + input_fn=None, + default_batch_size=1, + exports_to_keep=None): + """Same as BaseEstimator.export, but uses some defaults.""" def default_input_fn(unused_estimator, examples): return layers.parse_feature_columns_from_examples( examples, self._feature_columns) - return self._estimator.export(export_dir=export_dir, - signature_fn=signature_fn, - input_fn=input_fn or default_input_fn, - default_batch_size=default_batch_size, - exports_to_keep=exports_to_keep) - - @experimental - def export_savedmodel(self, - export_dir_base, - input_fn, - default_output_alternative_key=None, - assets_extra=None, - as_text=False, - exports_to_keep=None): - return self._estimator.export_savedmodel( - export_dir_base, - input_fn, - default_output_alternative_key=default_output_alternative_key, - assets_extra=assets_extra, - as_text=as_text, - exports_to_keep=exports_to_keep) - - @property - def weights_(self): - values = {} - optimizer_regex = r".*/"+self._optimizer.get_name() + r"(_\d)?$" - for name in self.get_variable_names(): - if (name.startswith("linear/") and - name != "linear/bias_weight" and - not re.match(optimizer_regex, name)): - values[name] = self.get_variable_value(name) - if len(values) == 1: - return values[list(values.keys())[0]] - return values - - @property - def bias_(self): - return self.get_variable_value("linear/bias_weight") + return super(SVM, self).export(export_dir=export_dir, + signature_fn=signature_fn, + input_fn=input_fn or default_input_fn, + default_batch_size=default_batch_size, + exports_to_keep=exports_to_keep) diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 3bc5013540..ed0e546442 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -139,8 +139,8 @@ class Experiment(object): self._continuous_eval_throttle_secs = continuous_eval_throttle_secs self._min_eval_frequency = min_eval_frequency self._delay_workers_by_global_step = delay_workers_by_global_step + self._train_monitors = train_monitors or [] # Mutable fields, using the setters. - self.train_monitors = train_monitors self.eval_hooks = eval_hooks self.export_strategies = export_strategies self.continuous_eval_predicate_fn = continuous_eval_predicate_fn @@ -170,12 +170,9 @@ class Experiment(object): return self._eval_steps @property - def train_monitors(self): - return self._train_monitors - - @train_monitors.setter - def train_monitors(self, value): - self._train_monitors = value or [] + def train_hooks(self): + """Returns a shallow copy of train hooks for inspecting.""" + return [m for m in self._train_monitors] @property def eval_hooks(self): @@ -232,6 +229,10 @@ class Experiment(object): raise ValueError("`export_strategies` must be an ExportStrategy, " "a list of ExportStrategies, or None.") + def extend_train_hooks(self, additional_hooks): + """Extends the hooks for training.""" + self._train_monitors.extend(additional_hooks) + def train(self, delay_secs=None): """Fit the estimator using the training data. @@ -378,7 +379,8 @@ class Experiment(object): steps=self._eval_steps, metrics=self._eval_metrics, name=name, - checkpoint_path=latest_path) + checkpoint_path=latest_path, + hooks=self._eval_hooks) # Ensure eval result is not None for next round of evaluation. if not eval_result: eval_result = {} @@ -454,14 +456,15 @@ class Experiment(object): self._train_monitors += [monitors.ValidationMonitor( input_fn=self._eval_input_fn, eval_steps=self._eval_steps, metrics=self._eval_metrics, every_n_steps=self._min_eval_frequency, - name=eval_dir_suffix, + name=eval_dir_suffix, hooks=self._eval_hooks )] self.train(delay_secs=0) eval_result = self._estimator.evaluate(input_fn=self._eval_input_fn, steps=self._eval_steps, metrics=self._eval_metrics, - name=eval_dir_suffix) + name=eval_dir_suffix, + hooks=self._eval_hooks) export_results = self._maybe_export(eval_result) return eval_result, export_results diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py index 8b43973bb8..096d334e8c 100644 --- a/tensorflow/contrib/learn/python/learn/experiment_test.py +++ b/tensorflow/contrib/learn/python/learn/experiment_test.py @@ -42,6 +42,7 @@ from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging from tensorflow.python.training import saver from tensorflow.python.training import server_lib +from tensorflow.python.training import session_run_hook from tensorflow.python.util import compat from tensorflow.python.util.all_util import reveal_undocumented @@ -74,6 +75,7 @@ class TestEstimator(evaluable.Evaluable, trainable.Trainable): self._max_evals = max_evals self.export_count = 0 self.monitors = [] + self.eval_hooks = [] self._config = config or run_config.RunConfig() self._model_dir = tempfile.mkdtemp() @@ -87,6 +89,8 @@ class TestEstimator(evaluable.Evaluable, trainable.Trainable): def evaluate(self, **kwargs): tf_logging.info('evaluate called with args: %s' % kwargs) + if 'hooks' in kwargs: + self.eval_hooks = kwargs['hooks'] self.eval_count += 1 if self.eval_count > self._max_evals: tf_logging.info('Ran %d evals. Done.' % self.eval_count) @@ -109,14 +113,18 @@ class TestEstimator(evaluable.Evaluable, trainable.Trainable): self.monitors = kwargs['monitors'] return [(key, kwargs[key]) for key in sorted(kwargs.keys())] - def export_savedmodel(self, export_dir_base, export_input_fn, **kwargs): + def export_savedmodel(self, export_dir_base, serving_input_fn, **kwargs): tf_logging.info('export_savedmodel called with args: %s, %s, %s' % - (export_dir_base, export_input_fn, kwargs)) + (export_dir_base, serving_input_fn, kwargs)) self.export_count += 1 return os.path.join( compat.as_bytes(export_dir_base), compat.as_bytes('bogus_timestamp')) +class _NoopHook(session_run_hook.SessionRunHook): + pass + + class ExperimentTest(test.TestCase): def _cluster_spec(self): @@ -253,52 +261,63 @@ class ExperimentTest(test.TestCase): def test_evaluate(self): est = TestEstimator() est.fake_checkpoint() + noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', + eval_hooks=[noop_hook], eval_steps='steps', eval_delay_secs=0) ex.evaluate() - self.assertEquals(1, est.eval_count) self.assertEquals(0, est.fit_count) + self.assertEquals(1, est.eval_count) + self.assertEquals([noop_hook], est.eval_hooks) def test_evaluate_delay(self): est = TestEstimator() est.fake_checkpoint() + noop_hook = _NoopHook() ex = experiment.Experiment( - est, train_input_fn='train_input', eval_input_fn='eval_input') + est, train_input_fn='train_input', eval_input_fn='eval_input', + eval_hooks=[noop_hook]) for delay in [0, 1, 3]: with test.mock.patch('time.sleep', SheepCounter()) as sheep: ex.evaluate(delay_secs=delay) self.assertAlmostEqual(delay, sheep.total_time, delta=0.1) + self.assertEquals([noop_hook], est.eval_hooks) def test_continuous_eval(self): est = TestEstimator() est.fake_checkpoint() + noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', + eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0) self.assertRaises( StopIteration, ex.continuous_eval, evaluate_checkpoint_only_once=False) - self.assertEquals(6, est.eval_count) self.assertEquals(0, est.fit_count) + self.assertEquals(6, est.eval_count) + self.assertEquals([noop_hook], est.eval_hooks) def test_continuous_eval_throttle_delay(self): for delay in [0, 1, 2]: est = TestEstimator() est.fake_checkpoint() + noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', + eval_hooks=[noop_hook], continuous_eval_throttle_secs=delay, eval_delay_secs=0) with test.mock.patch('time.sleep', SheepCounter()) as sheep: @@ -311,6 +330,7 @@ class ExperimentTest(test.TestCase): def test_continuous_eval_predicate_fn(self): est = TestEstimator() est.fake_checkpoint() + noop_hook = _NoopHook() def _predicate_fn(unused_eval_result): return est.eval_count < 3 @@ -320,20 +340,24 @@ class ExperimentTest(test.TestCase): train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', + eval_hooks=[noop_hook], eval_delay_secs=0, continuous_eval_throttle_secs=0, continuous_eval_predicate_fn=_predicate_fn) ex.continuous_eval(evaluate_checkpoint_only_once=False) - self.assertEquals(3, est.eval_count) self.assertEquals(0, est.fit_count) + self.assertEquals(3, est.eval_count) + self.assertEquals([noop_hook], est.eval_hooks) def test_run_local(self): est = TestEstimator() + noop_hook = _NoopHook() ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', + eval_hooks=[noop_hook], train_steps=100, eval_steps=100, local_eval_frequency=10) @@ -341,17 +365,42 @@ class ExperimentTest(test.TestCase): self.assertEquals(1, est.fit_count) self.assertEquals(1, est.eval_count) self.assertEquals(1, len(est.monitors)) + self.assertEquals([noop_hook], est.eval_hooks) self.assertTrue(isinstance(est.monitors[0], monitors.ValidationMonitor)) + def test_train_monitors_returns_shallow_copy(self): + noop_hook = _NoopHook() + ex = experiment.Experiment( + TestEstimator(), + train_input_fn='train_input', + eval_input_fn='eval_input', + eval_metrics='eval_metrics', + train_monitors=[noop_hook], + train_steps=100, + eval_steps=100, + local_eval_frequency=10) + self.assertAllEqual([noop_hook], ex.train_hooks) + + another_noop_hook = _NoopHook() + # Assert that the property getter returns a shallow copy. + ex.train_hooks.extend([another_noop_hook]) + self.assertAllEqual([noop_hook], ex.train_hooks) + + # Assert that the extend API mutates the monitors. + ex.extend_train_hooks([another_noop_hook]) + self.assertAllEqual([noop_hook, another_noop_hook], ex.train_hooks) + def test_train_and_evaluate(self): est = TestEstimator() + noop_hook = _NoopHook() export_strategy = saved_model_export_utils.make_export_strategy( - est, 'export_input') + est, 'export_input', exports_to_keep=None) ex = experiment.Experiment( est, train_input_fn='train_input', eval_input_fn='eval_input', eval_metrics='eval_metrics', + eval_hooks=[noop_hook], train_steps=100, eval_steps=100, export_strategies=export_strategy) @@ -360,6 +409,7 @@ class ExperimentTest(test.TestCase): self.assertEquals(1, est.eval_count) self.assertEquals(1, est.export_count) self.assertEquals(1, len(est.monitors)) + self.assertEquals([noop_hook], est.eval_hooks) self.assertTrue(isinstance(est.monitors[0], monitors.ValidationMonitor)) @test.mock.patch.object(server_lib, 'Server') diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index d8fe2315da..ab6ea0fb02 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -618,7 +618,8 @@ class ValidationMonitor(EveryN): def __init__(self, x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, - every_n_steps=100, metrics=None, early_stopping_rounds=None, + every_n_steps=100, metrics=None, hooks=None, + early_stopping_rounds=None, early_stopping_metric="loss", early_stopping_metric_minimize=True, name=None): """Initializes a ValidationMonitor. @@ -632,6 +633,8 @@ class ValidationMonitor(EveryN): every_n_steps: Check for new checkpoints to evaluate every N steps. If a new checkpoint is found, it is evaluated. See `EveryN`. metrics: See `BaseEstimator.evaluate`. + hooks: A list of `SessionRunHook` hooks to pass to the + `Estimator`'s `evaluate` function. early_stopping_rounds: `int`. If the metric indicated by `early_stopping_metric` does not change according to `early_stopping_metric_minimize` for this many steps, then training @@ -660,6 +663,7 @@ class ValidationMonitor(EveryN): self.batch_size = batch_size self.eval_steps = eval_steps self.metrics = metrics + self.hooks = hooks self.early_stopping_rounds = early_stopping_rounds self.early_stopping_metric = early_stopping_metric self.early_stopping_metric_minimize = early_stopping_metric_minimize @@ -709,7 +713,8 @@ class ValidationMonitor(EveryN): # Run evaluation and log it. validation_outputs = self._estimator.evaluate( x=self.x, y=self.y, input_fn=self.input_fn, batch_size=self.batch_size, - steps=self.eval_steps, metrics=self.metrics, name=self.name) + steps=self.eval_steps, metrics=self.metrics, hooks=self.hooks, + name=self.name) stats = [] for name in validation_outputs: stats.append("%s = %s" % (name, str(validation_outputs[name]))) diff --git a/tensorflow/contrib/learn/python/learn/utils/__init__.py b/tensorflow/contrib/learn/python/learn/utils/__init__.py index f313699c14..74236da979 100644 --- a/tensorflow/contrib/learn/python/learn/utils/__init__.py +++ b/tensorflow/contrib/learn/python/learn/utils/__init__.py @@ -20,3 +20,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.learn.python.learn.utils.export import export_estimator +from tensorflow.contrib.learn.python.learn.utils.input_fn_utils import build_default_serving_input_fn +from tensorflow.contrib.learn.python.learn.utils.input_fn_utils import build_parsing_serving_input_fn +from tensorflow.contrib.learn.python.learn.utils.saved_model_export_utils import make_export_strategy + diff --git a/tensorflow/contrib/learn/python/learn/utils/export_test.py b/tensorflow/contrib/learn/python/learn/utils/export_test.py index caae60029a..ce1d73256a 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export_test.py +++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py @@ -112,7 +112,7 @@ class ExportTest(test.TestCase): def testExportMonitorInputFeatureKeyMissing(self): random.seed(42) - def _export_input_fn(): + def _serving_input_fn(): return { _X_KEY: random_ops.random_uniform( @@ -123,7 +123,7 @@ class ExportTest(test.TestCase): monitor = learn.monitors.ExportMonitor( every_n_steps=1, export_dir=tempfile.mkdtemp() + 'export/', - input_fn=_export_input_fn, + input_fn=_serving_input_fn, input_feature_key=input_feature_key, exports_to_keep=2, signature_fn=export.generic_signature_fn) @@ -135,13 +135,13 @@ class ExportTest(test.TestCase): random.seed(42) input_feature_key = 'my_example_key' - def _export_input_fn(): + def _serving_input_fn(): return {input_feature_key: None}, None monitor = learn.monitors.ExportMonitor( every_n_steps=1, export_dir=tempfile.mkdtemp() + 'export/', - input_fn=_export_input_fn, + input_fn=_serving_input_fn, input_feature_key=input_feature_key, exports_to_keep=2, signature_fn=export.generic_signature_fn) @@ -154,7 +154,7 @@ class ExportTest(test.TestCase): random.seed(42) input_feature_key = 'my_example_key' - def _export_input_fn(): + def _serving_input_fn(): return { input_feature_key: None, @@ -166,7 +166,7 @@ class ExportTest(test.TestCase): monitor = learn.monitors.ExportMonitor( every_n_steps=1, export_dir=tempfile.mkdtemp() + 'export/', - input_fn=_export_input_fn, + input_fn=_serving_input_fn, input_feature_key=input_feature_key, exports_to_keep=2, signature_fn=export.generic_signature_fn) @@ -178,7 +178,7 @@ class ExportTest(test.TestCase): random.seed(42) input_feature_key = 'my_example_key' - def _export_input_fn(): + def _serving_input_fn(): return { input_feature_key: array_ops.placeholder( @@ -188,7 +188,7 @@ class ExportTest(test.TestCase): monitor = learn.monitors.ExportMonitor( every_n_steps=1, export_dir=tempfile.mkdtemp() + 'export/', - input_fn=_export_input_fn, + input_fn=_serving_input_fn, input_feature_key=input_feature_key, exports_to_keep=2, signature_fn=export.generic_signature_fn) @@ -200,7 +200,7 @@ class ExportTest(test.TestCase): random.seed(42) input_feature_key = 'my_example_key' - def _export_input_fn(): + def _serving_input_fn(): return { input_feature_key: array_ops.placeholder( @@ -214,7 +214,7 @@ class ExportTest(test.TestCase): monitor = learn.monitors.ExportMonitor( every_n_steps=1, export_dir=export_dir, - input_fn=_export_input_fn, + input_fn=_serving_input_fn, input_feature_key=input_feature_key, exports_to_keep=2, signature_fn=export.generic_signature_fn) diff --git a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py index 18bfdc61c6..1a51971619 100644 --- a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py @@ -41,7 +41,7 @@ InputFnOps = collections.namedtuple('InputFnOps', 'default_inputs']) -def build_parsing_serving_input_fn(feature_spec, default_batch_size=1): +def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): """Build an input_fn appropriate for serving, expecting fed tf.Examples. Creates an input_fn that expects a serialized tf.Example fed into a string @@ -52,6 +52,7 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=1): Args: feature_spec: a dict of string to `VarLenFeature`/`FixedLenFeature`. default_batch_size: the number of query examples expected per batch. + Leave unset for variable batch size (recommended). Returns: An input_fn suitable for use in serving. @@ -68,7 +69,7 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=1): return input_fn -def build_default_serving_input_fn(features, default_batch_size=1): +def build_default_serving_input_fn(features, default_batch_size=None): """Build an input_fn appropriate for serving, expecting feature Tensors. Creates an input_fn that expects all features to be fed directly. @@ -78,6 +79,7 @@ def build_default_serving_input_fn(features, default_batch_size=1): Args: features: a dict of string to `Tensor`. default_batch_size: the number of query examples expected per batch. + Leave unset for variable batch size (recommended). Returns: An input_fn suitable for use in serving. diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 9e452d0905..8d53b01511 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import os -import re import time from tensorflow.contrib.learn.python.learn import export_strategy @@ -208,7 +207,7 @@ def get_timestamped_export_dir(export_dir_base): Each export is written into a new subdirectory named using the current time. This guarantees monotonically increasing version numbers even across multiple runs of the pipeline. - The timestamp used is the number of milliseconds since epoch UTC. + The timestamp used is the number of seconds since epoch UTC. Args: export_dir_base: A string containing a directory to write the exported @@ -216,7 +215,7 @@ def get_timestamped_export_dir(export_dir_base): Returns: The full path of the new subdirectory (which is not actually created yet). """ - export_timestamp = int(time.time() * 1e3) + export_timestamp = int(time.time()) export_dir = os.path.join( compat.as_bytes(export_dir_base), @@ -241,37 +240,63 @@ def garbage_collect_exports(export_dir_base, exports_to_keep): keep_filter = gc.largest_export_versions(exports_to_keep) delete_filter = gc.negation(keep_filter) - # Export dir must not end with / or it will break the re match below. - if export_dir_base.endswith('/'): - export_dir_base = export_dir_base[:-1] - # create a simple parser that pulls the export_version from the directory. def parser(path): - match = re.match('^' + export_dir_base + '/(\\d{13})$', path.path) - if not match: + filename = os.path.basename(path.path) + if not (len(filename) == 10 and filename.isdigit()): return None - return path._replace(export_version=int(match.group(1))) + return path._replace(export_version=int(filename)) for p in delete_filter(gc.get_paths(export_dir_base, parser=parser)): gfile.DeleteRecursively(p.path) -def make_export_strategy(export_input_fn, +def make_export_strategy(serving_input_fn, default_output_alternative_key='default', assets_extra=None, as_text=False, - exports_to_keep=None): - """Create an ExportStrategy for use with Experiment.""" + exports_to_keep=5): + """Create an ExportStrategy for use with Experiment. + + Args: + serving_input_fn: A function that takes no arguments and returns an + `InputFnOps`. + default_output_alternative_key: the name of the head to serve when an + incoming serving request does not explicitly request a specific head. + Not needed for single-headed models. + assets_extra: A dict specifying how to populate the assets.extra directory + within the exported SavedModel. Each key should give the destination + path (including the filename) relative to the assets.extra directory. + The corresponding value gives the full path of the source file to be + copied. For example, the simple case of copying a single file without + renaming it is specified as + `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. + as_text: whether to write the SavedModel proto in text format. + exports_to_keep: Number of exports to keep. Older exports will be + garbage-collected. Defaults to 5. Set to None to disable garbage + collection. + + Returns: + an ExportStrategy that can be passed to the Experiment constructor. + """ def export_fn(estimator, export_dir_base): - """Exports the given Estimator as a SavedModel.""" + """Exports the given Estimator as a SavedModel. + + Args: + estimator: the Estimator to export. + export_dir_base: A string containing a directory to write the exported + graph and checkpoints. + + Returns: + The string path to the exported directory. + """ export_result = estimator.export_savedmodel( export_dir_base, - export_input_fn, + serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, - as_text=as_text, - exports_to_keep=exports_to_keep) + as_text=as_text) garbage_collect_exports(export_dir_base, exports_to_keep) return export_result diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py index 955e14ae44..e22f11943b 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py @@ -240,21 +240,21 @@ class SavedModelExportUtilsTest(test.TestCase): export_dir_base = tempfile.mkdtemp() + "export/" export_dir_1 = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) - time.sleep(0.001) + time.sleep(1) export_dir_2 = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) - time.sleep(0.001) + time.sleep(1) export_dir_3 = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) - # Export directories should be named using a timestamp that is milliseconds - # since epoch. Such a timestamp is 13 digits long. + # Export directories should be named using a timestamp that is seconds + # since epoch. Such a timestamp is 10 digits long. time_1 = os.path.basename(export_dir_1) - self.assertEqual(13, len(time_1)) + self.assertEqual(10, len(time_1)) time_2 = os.path.basename(export_dir_2) - self.assertEqual(13, len(time_2)) + self.assertEqual(10, len(time_2)) time_3 = os.path.basename(export_dir_3) - self.assertEqual(13, len(time_3)) + self.assertEqual(10, len(time_3)) self.assertTrue(int(time_1) < int(time_2)) self.assertTrue(int(time_2) < int(time_3)) @@ -283,10 +283,10 @@ class SavedModelExportUtilsTest(test.TestCase): def test_make_export_strategy(self): """Only tests that an ExportStrategy instance is created.""" - def _export_input_fn(): + def _serving_input_fn(): return array_ops.constant([1]), None export_strategy = saved_model_export_utils.make_export_strategy( - export_input_fn=_export_input_fn, + serving_input_fn=_serving_input_fn, default_output_alternative_key="default", assets_extra={"from/path": "to/path"}, as_text=False, @@ -299,7 +299,7 @@ def _create_test_export_dir(export_dir_base): export_dir = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) gfile.MkDir(export_dir) - time.sleep(0.001) + time.sleep(1) return export_dir diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py index 2f60554104..6309d36258 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py @@ -200,16 +200,16 @@ class NonSquareLinearOperatorCompositionTest( operator = linalg.LinearOperatorComposition(operators) self.assertAllEqual((2, 3, 5), operator.shape) - def test_dynamic_shapes_when_statically_available(self): + def test_shape_tensors_when_statically_available(self): operators = [ linalg.LinearOperatorMatrix(rng.rand(2, 3, 4)), linalg.LinearOperatorMatrix(rng.rand(2, 4, 5)) ] operator = linalg.LinearOperatorComposition(operators) with self.test_session(): - self.assertAllEqual((2, 3, 5), operator.shape_dynamic().eval()) + self.assertAllEqual((2, 3, 5), operator.shape_tensor().eval()) - def test_dynamic_shapes_when_only_dynamically_available(self): + def test_shape_tensors_when_only_dynamically_available(self): mat_1 = rng.rand(1, 2, 3, 4) mat_2 = rng.rand(1, 2, 4, 5) mat_ph_1 = array_ops.placeholder(dtypes.float64) @@ -223,7 +223,7 @@ class NonSquareLinearOperatorCompositionTest( operator = linalg.LinearOperatorComposition(operators) with self.test_session(): self.assertAllEqual( - (1, 2, 3, 5), operator.shape_dynamic().eval(feed_dict=feed_dict)) + (1, 2, 3, 5), operator.shape_tensor().eval(feed_dict=feed_dict)) if __name__ == "__main__": diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py index 8f77c5e6e3..c099194eed 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py @@ -31,7 +31,7 @@ rng = np.random.RandomState(123) class LinearOperatorShape(linalg.LinearOperator): - """LinearOperator that implements the methods ._shape and _shape_dynamic.""" + """LinearOperator that implements the methods ._shape and _shape_tensor.""" def __init__(self, shape, @@ -49,7 +49,7 @@ class LinearOperatorShape(linalg.LinearOperator): def _shape(self): return tensor_shape.TensorShape(self._stored_shape) - def _shape_dynamic(self): + def _shape_tensor(self): return constant_op.constant(self._stored_shape, dtype=dtypes.int32) @@ -71,7 +71,7 @@ class LinearOperatorApplyOnly(linalg.LinearOperator): def _shape(self): return self._matrix.get_shape() - def _shape_dynamic(self): + def _shape_tensor(self): return array_ops.shape(self._matrix) def _apply(self, x, adjoint=False): @@ -96,11 +96,11 @@ class LinearOperatorTest(test.TestCase): shape = (1, 2, 3, 4) operator = LinearOperatorShape(shape) - self.assertAllEqual(shape, operator.shape_dynamic().eval()) - self.assertAllEqual(4, operator.tensor_rank_dynamic().eval()) - self.assertAllEqual((1, 2), operator.batch_shape_dynamic().eval()) - self.assertAllEqual(4, operator.domain_dimension_dynamic().eval()) - self.assertAllEqual(3, operator.range_dimension_dynamic().eval()) + self.assertAllEqual(shape, operator.shape_tensor().eval()) + self.assertAllEqual(4, operator.tensor_rank_tensor().eval()) + self.assertAllEqual((1, 2), operator.batch_shape_tensor().eval()) + self.assertAllEqual(4, operator.domain_dimension_tensor().eval()) + self.assertAllEqual(3, operator.range_dimension_tensor().eval()) def test_is_x_properties(self): operator = LinearOperatorShape( @@ -120,7 +120,7 @@ class LinearOperatorTest(test.TestCase): self.assertAllEqual((2, 3, 4), operator_dense.get_shape()) self.assertAllClose(matrix, operator_dense.eval()) - def test_generic_to_dense_method_non_square_matrix_dynamic(self): + def test_generic_to_dense_method_non_square_matrix_tensor(self): matrix = rng.randn(2, 3, 4) matrix_ph = array_ops.placeholder(dtypes.float64) operator = LinearOperatorApplyOnly(matrix_ph) diff --git a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py index 4eac01092f..bf6f8f8302 100644 --- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py +++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py @@ -96,7 +96,7 @@ class DomainDimensionStubOperator(object): def __init__(self, domain_dimension): self._domain_dimension = ops.convert_to_tensor(domain_dimension) - def domain_dimension_dynamic(self): + def domain_dimension_tensor(self): return self._domain_dimension diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator.py b/tensorflow/contrib/linalg/python/ops/linear_operator.py index e229820edc..2467603605 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator.py @@ -180,13 +180,15 @@ class LinearOperator(object): self._is_positive_definite = is_positive_definite self._name = name or type(self).__name__ - # We will cache some values to avoid repeatedly adding shape - # manipulation ops to the graph. Cleaner. - self._cached_shape_dynamic = None - self._cached_batch_shape_dynamic = None - self._cached_domain_dimension_dynamic = None - self._cached_range_dimension_dynamic = None - self._cached_tensor_rank_dynamic = None + # We will cache some tensors to avoid repeatedly adding shape + # manipulation ops to the graph. + # Naming convention: + # self._cached_X_tensor is the cached version of self._X_tensor. + self._cached_shape_tensor = None + self._cached_batch_shape_tensor = None + self._cached_domain_dimension_tensor = None + self._cached_range_dimension_tensor = None + self._cached_tensor_rank_tensor = None @contextlib.contextmanager def _name_scope(self, name=None, values=None): @@ -240,10 +242,10 @@ class LinearOperator(object): """ return self._shape() - def _shape_dynamic(self): - raise NotImplementedError("_shape_dynamic is not implemented.") + def _shape_tensor(self): + raise NotImplementedError("_shape_tensor is not implemented.") - def shape_dynamic(self, name="shape_dynamic"): + def shape_tensor(self, name="shape_tensor"): """Shape of this `LinearOperator`, determined at runtime. If this operator acts like the batch matrix `A` with @@ -258,14 +260,14 @@ class LinearOperator(object): """ with self._name_scope(name): # Be clean by avoiding adding shape Ops to the graph too many times. - if self._cached_shape_dynamic is None: + if self._cached_shape_tensor is None: # Prefer to use statically defined shape if available. if self.shape.is_fully_defined(): - self._cached_shape_dynamic = linear_operator_util.shape_tensor( + self._cached_shape_tensor = linear_operator_util.shape_tensor( self.shape.as_list()) else: - self._cached_shape_dynamic = self._shape_dynamic() - return self._cached_shape_dynamic + self._cached_shape_tensor = self._shape_tensor() + return self._cached_shape_tensor @property def batch_shape(self): @@ -281,7 +283,7 @@ class LinearOperator(object): # Derived classes get this "for free" once .shape is implemented. return self.shape[:-2] - def batch_shape_dynamic(self, name="batch_shape_dynamic"): + def batch_shape_tensor(self, name="batch_shape_tensor"): """Shape of batch dimensions of this operator, determined at runtime. If this operator acts like the batch matrix `A` with @@ -296,14 +298,14 @@ class LinearOperator(object): """ # Derived classes get this "for free" once .shape() is implemented. with self._name_scope(name): - if self._cached_batch_shape_dynamic is None: + if self._cached_batch_shape_tensor is None: # Prefer to use statically defined shape if available. if self.batch_shape.is_fully_defined(): - self._cached_batch_shape_dynamic = linear_operator_util.shape_tensor( + self._cached_batch_shape_tensor = linear_operator_util.shape_tensor( self.batch_shape.as_list(), name="batch_shape") else: - self._cached_batch_shape_dynamic = self.shape_dynamic()[:-2] - return self._cached_batch_shape_dynamic + self._cached_batch_shape_tensor = self.shape_tensor()[:-2] + return self._cached_batch_shape_tensor @property def tensor_rank(self, name="tensor_rank"): @@ -322,7 +324,7 @@ class LinearOperator(object): with self._name_scope(name): return self.shape.ndims - def tensor_rank_dynamic(self, name="tensor_rank_dynamic"): + def tensor_rank_tensor(self, name="tensor_rank_tensor"): """Rank (in the sense of tensors) of matrix corresponding to this operator. If this operator acts like the batch matrix `A` with @@ -336,15 +338,15 @@ class LinearOperator(object): """ # Derived classes get this "for free" once .shape() is implemented. with self._name_scope(name): - if self._cached_tensor_rank_dynamic is None: + if self._cached_tensor_rank_tensor is None: # Prefer to use statically defined shape if available. if self.tensor_rank is not None: - self._cached_tensor_rank_dynamic = ops.convert_to_tensor( + self._cached_tensor_rank_tensor = ops.convert_to_tensor( self.tensor_rank) else: - self._cached_tensor_rank_dynamic = array_ops.size( - self.shape_dynamic()) - return self._cached_tensor_rank_dynamic + self._cached_tensor_rank_tensor = array_ops.size( + self.shape_tensor()) + return self._cached_tensor_rank_tensor @property def domain_dimension(self): @@ -359,7 +361,7 @@ class LinearOperator(object): # Derived classes get this "for free" once .shape is implemented. return self.shape[-1] - def domain_dimension_dynamic(self, name="domain_dimension_dynamic"): + def domain_dimension_tensor(self, name="domain_dimension_tensor"): """Dimension (in the sense of vector spaces) of the domain of this operator. Determined at runtime. @@ -375,14 +377,14 @@ class LinearOperator(object): """ # Derived classes get this "for free" once .shape() is implemented. with self._name_scope(name): - if self._cached_domain_dimension_dynamic is None: + if self._cached_domain_dimension_tensor is None: # Prefer to use statically defined shape if available. if self.domain_dimension.value is not None: - self._cached_domain_dimension_dynamic = ops.convert_to_tensor( + self._cached_domain_dimension_tensor = ops.convert_to_tensor( self.domain_dimension.value) else: - self._cached_domain_dimension_dynamic = self.shape_dynamic()[-1] - return self._cached_domain_dimension_dynamic + self._cached_domain_dimension_tensor = self.shape_tensor()[-1] + return self._cached_domain_dimension_tensor @property def range_dimension(self): @@ -397,7 +399,7 @@ class LinearOperator(object): # Derived classes get this "for free" once .shape is implemented. return self.shape[-2] - def range_dimension_dynamic(self, name="range_dimension_dynamic"): + def range_dimension_tensor(self, name="range_dimension_tensor"): """Dimension (in the sense of vector spaces) of the range of this operator. Determined at runtime. @@ -413,14 +415,14 @@ class LinearOperator(object): """ # Derived classes get this "for free" once .shape() is implemented. with self._name_scope(name): - if self._cached_range_dimension_dynamic is None: + if self._cached_range_dimension_tensor is None: # Prefer to use statically defined shape if available. if self.range_dimension.value is not None: - self._cached_range_dimension_dynamic = ops.convert_to_tensor( + self._cached_range_dimension_tensor = ops.convert_to_tensor( self.range_dimension.value) else: - self._cached_range_dimension_dynamic = self.shape_dynamic()[-2] - return self._cached_range_dimension_dynamic + self._cached_range_dimension_tensor = self.shape_tensor()[-2] + return self._cached_range_dimension_tensor def _assert_non_singular(self): raise NotImplementedError("assert_non_singular is not implemented.") @@ -574,12 +576,12 @@ class LinearOperator(object): if self.batch_shape.is_fully_defined(): batch_shape = self.batch_shape else: - batch_shape = self.batch_shape_dynamic() + batch_shape = self.batch_shape_tensor() if self.domain_dimension.value is not None: n = self.domain_dimension.value else: - n = self.domain_dimension_dynamic() + n = self.domain_dimension_tensor() eye = linalg_ops.eye(num_rows=n, batch_shape=batch_shape, dtype=self.dtype) return self.apply(eye) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py b/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py index 3e118ebbd4..81e7735841 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_composition.py @@ -202,7 +202,7 @@ class LinearOperatorComposition(linear_operator.LinearOperator): return batch_shape.concatenate(matrix_shape) - def _shape_dynamic(self): + def _shape_tensor(self): # Avoid messy broadcasting if possible. if self.shape.is_fully_defined(): return ops.convert_to_tensor( @@ -212,14 +212,14 @@ class LinearOperatorComposition(linear_operator.LinearOperator): # the graph. Things will fail at runtime naturally if shapes are # incompatible. matrix_shape = array_ops.stack([ - self.operators[0].range_dimension_dynamic(), - self.operators[-1].domain_dimension_dynamic() + self.operators[0].range_dimension_tensor(), + self.operators[-1].domain_dimension_tensor() ]) # Dummy Tensor of zeros. Will never be materialized. - zeros = array_ops.zeros(shape=self.operators[0].batch_shape_dynamic()) + zeros = array_ops.zeros(shape=self.operators[0].batch_shape_tensor()) for operator in self.operators[1:]: - zeros += array_ops.zeros(shape=operator.batch_shape_dynamic()) + zeros += array_ops.zeros(shape=operator.batch_shape_tensor()) batch_shape = array_ops.shape(zeros) return array_ops.concat((batch_shape, matrix_shape), 0) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py b/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py index d59e8be767..4700e65518 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_diag.py @@ -166,7 +166,7 @@ class LinearOperatorDiag(linear_operator.LinearOperator): d_shape = self._diag.get_shape() return d_shape.concatenate(d_shape[-1:]) - def _shape_dynamic(self): + def _shape_tensor(self): d_shape = array_ops.shape(self._diag) k = d_shape[-1] return array_ops.concat((d_shape, [k]), 0) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py b/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py index 3304698ec6..6559f8b116 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_identity.py @@ -261,7 +261,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity): batch_shape = tensor_shape.TensorShape(self._batch_shape_static) return batch_shape.concatenate(matrix_shape) - def _shape_dynamic(self): + def _shape_tensor(self): matrix_shape = array_ops.stack( (self._num_rows, self._num_rows), axis=0) if self._batch_shape_arg is None: @@ -307,7 +307,7 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity): # Dynamic broadcast: # Always add to an array of zeros, rather than using a "cond", since a # cond would require copying data from GPU --> CPU. - special_shape = array_ops.concat((self.batch_shape_dynamic(), [1, 1]), 0) + special_shape = array_ops.concat((self.batch_shape_tensor(), [1, 1]), 0) zeros = array_ops.zeros(shape=special_shape, dtype=self.dtype) return x + zeros @@ -320,10 +320,10 @@ class LinearOperatorIdentity(BaseLinearOperatorIdentity): return self._possibly_broadcast_batch_shape(x) def _determinant(self): - return array_ops.ones(shape=self.batch_shape_dynamic(), dtype=self.dtype) + return array_ops.ones(shape=self.batch_shape_tensor(), dtype=self.dtype) def _log_abs_determinant(self): - return array_ops.zeros(shape=self.batch_shape_dynamic(), dtype=self.dtype) + return array_ops.zeros(shape=self.batch_shape_tensor(), dtype=self.dtype) def _solve(self, rhs, adjoint=False): return self._apply(rhs) @@ -566,7 +566,7 @@ class LinearOperatorScaledIdentity(BaseLinearOperatorIdentity): batch_shape = self.multiplier.get_shape() return batch_shape.concatenate(matrix_shape) - def _shape_dynamic(self): + def _shape_tensor(self): matrix_shape = array_ops.stack( (self._num_rows, self._num_rows), axis=0) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py b/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py index 7ca18450d1..3b5dc7c481 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_matrix.py @@ -157,7 +157,7 @@ class LinearOperatorMatrix(linear_operator.LinearOperator): def _shape(self): return self._matrix.get_shape() - def _shape_dynamic(self): + def _shape_tensor(self): return array_ops.shape(self._matrix) def _apply(self, x, adjoint=False): diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py b/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py index 5de9bb5d77..85cd7fcd9a 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py @@ -174,6 +174,29 @@ class LinearOperatorDerivedClassTest(test.TestCase): feed_dict=feed_dict) self.assertAC(op_det_v, mat_det_v) + def test_log_abs_det(self): + self._maybe_skip("log_abs_det") + for use_placeholder in False, True: + for shape in self._shapes_to_test: + for dtype in self._dtypes_to_test: + if dtype.is_complex: + self.skipTest( + "tf.matrix_determinant does not work with complex, so this " + "test is being skipped.") + with self.test_session(graph=ops.Graph()) as sess: + sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED + operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( + shape, dtype, use_placeholder=use_placeholder) + op_log_abs_det = operator.log_abs_determinant() + mat_log_abs_det = math_ops.log( + math_ops.abs(linalg_ops.matrix_determinant(mat))) + if not use_placeholder: + self.assertAllEqual(shape[:-2], op_log_abs_det.get_shape()) + op_log_abs_det_v, mat_log_abs_det_v = sess.run( + [op_log_abs_det, mat_log_abs_det], + feed_dict=feed_dict) + self.assertAC(op_log_abs_det_v, mat_log_abs_det_v) + def test_apply(self): self._maybe_skip("apply") for use_placeholder in False, True: @@ -262,8 +285,8 @@ class SquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): n = operator.domain_dimension.value x_shape = batch_shape + [n, r] else: - batch_shape = operator.batch_shape_dynamic() - n = operator.domain_dimension_dynamic() + batch_shape = operator.batch_shape_tensor() + n = operator.domain_dimension_tensor() x_shape = array_ops.concat((batch_shape, [n, r]), 0) return random_normal(x_shape, dtype=operator.dtype) @@ -291,7 +314,7 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): @property def _tests_to_skip(self): """List of test names to skip.""" - return ["solve", "det"] + return ["solve", "det", "log_abs_det"] @property def _shapes_to_test(self): @@ -316,11 +339,11 @@ class NonSquareLinearOperatorDerivedClassTest(LinearOperatorDerivedClassTest): n = operator.domain_dimension.value x_shape = batch_shape + [n, r] else: - batch_shape = operator.batch_shape_dynamic() + batch_shape = operator.batch_shape_tensor() if adjoint: - n = operator.range_dimension_dynamic() + n = operator.range_dimension_tensor() else: - n = operator.domain_dimension_dynamic() + n = operator.domain_dimension_tensor() x_shape = array_ops.concat((batch_shape, [n, r]), 0) return random_normal(x_shape, dtype=operator.dtype) diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py b/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py index 7c5b9b6b54..2b1fb4c04c 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_tril.py @@ -157,7 +157,7 @@ class LinearOperatorTriL(linear_operator.LinearOperator): def _shape(self): return self._tril.get_shape() - def _shape_dynamic(self): + def _shape_tensor(self): return array_ops.shape(self._tril) def _assert_non_singular(self): diff --git a/tensorflow/contrib/linalg/python/ops/linear_operator_util.py b/tensorflow/contrib/linalg/python/ops/linear_operator_util.py index 44092f0c06..6e56fac2e3 100644 --- a/tensorflow/contrib/linalg/python/ops/linear_operator_util.py +++ b/tensorflow/contrib/linalg/python/ops/linear_operator_util.py @@ -83,10 +83,10 @@ def assert_compatible_matrix_dimensions(operator, x): Returns: `Assert` `Op`. """ - # Static checks are done in the base class. Only dynamic asserts here. + # Static checks are done in the base class. Only tensor asserts here. assert_same_dd = check_ops.assert_equal( array_ops.shape(x)[-2], - operator.domain_dimension_dynamic(), + operator.domain_dimension_tensor(), message=( "Incompatible matrix dimensions. " "shape[-2] of argument to be the same as this operator")) diff --git a/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in index 3bad4c42a9..986150cb3f 100644 --- a/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in +++ b/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in @@ -44,6 +44,7 @@ CXXFLAGS += -DTENSORFLOW_DISABLE_META CXXFLAGS += -D__ANDROID_TYPES_FULL__ GRAPH_EXECUTION_SRCS := \ +tensorflow/core/kernels/hexagon/graph_transfer_utils.cc \ tensorflow/core/kernels/hexagon/graph_transferer.cc \ tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc \ tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc \ diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index c3f59dd84c..96acead47f 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -28,6 +28,7 @@ tensorflow/core/kernels/split_op.cc tensorflow/core/kernels/split_v_op.cc tensorflow/core/kernels/split_lib_cpu.cc tensorflow/core/kernels/sparse_to_dense_op.cc +tensorflow/core/kernels/sparse_matmul_op.cc tensorflow/core/kernels/softsign_op.cc tensorflow/core/kernels/softplus_op.cc tensorflow/core/kernels/softmax_op.cc diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 6ceeacbc72..7ac337732a 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -167,10 +167,10 @@ def streaming_true_positives(predictions, labels, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` @@ -206,10 +206,10 @@ def streaming_true_negatives(predictions, labels, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` @@ -233,11 +233,11 @@ def streaming_true_negatives(predictions, labels, weights=None, with variable_scope.variable_scope( name, 'true_negatives', (predictions, labels, weights)): - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) + predictions = math_ops.cast(predictions, dtype=dtypes.bool) + labels = math_ops.cast(labels, dtype=dtypes.bool) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_true_negative = math_ops.logical_and(math_ops.equal(labels, 0), - math_ops.equal(predictions, 0)) + is_true_negative = math_ops.logical_and(math_ops.equal(labels, False), + math_ops.equal(predictions, False)) return _count_condition(is_true_negative, weights, metrics_collections, updates_collections) @@ -251,10 +251,10 @@ def streaming_false_positives(predictions, labels, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` @@ -290,10 +290,10 @@ def streaming_false_negatives(predictions, labels, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 3e2e408e6f..4fb244e3d4 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -663,35 +663,41 @@ class StreamingTruePositivesTest(test.TestCase): _assert_local_variables(self, ('true_positives/count:0',)) def testUnweighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - tp, tp_update_op = metrics.streaming_true_positives(predictions, labels) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + tp, tp_update_op = metrics.streaming_true_positives(predictions, labels) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, tp.eval()) - self.assertEqual(1, tp_update_op.eval()) - self.assertEqual(1, tp.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, tp.eval()) + self.assertEqual(1, tp_update_op.eval()) + self.assertEqual(1, tp.eval()) def testWeighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - tp, tp_update_op = metrics.streaming_true_positives( - predictions, labels, weights=37.0) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + tp, tp_update_op = metrics.streaming_true_positives( + predictions, labels, weights=37.0) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, tp.eval()) - self.assertEqual(37.0, tp_update_op.eval()) - self.assertEqual(37.0, tp.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, tp.eval()) + self.assertEqual(37.0, tp_update_op.eval()) + self.assertEqual(37.0, tp.eval()) class StreamingFalseNegativesTest(test.TestCase): @@ -706,35 +712,41 @@ class StreamingFalseNegativesTest(test.TestCase): _assert_local_variables(self, ('false_negatives/count:0',)) def testUnweighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - fn, fn_update_op = metrics.streaming_false_negatives(predictions, labels) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + fn, fn_update_op = metrics.streaming_false_negatives(predictions, labels) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, fn.eval()) - self.assertEqual(2, fn_update_op.eval()) - self.assertEqual(2, fn.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, fn.eval()) + self.assertEqual(2, fn_update_op.eval()) + self.assertEqual(2, fn.eval()) def testWeighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - fn, fn_update_op = metrics.streaming_false_negatives( - predictions, labels, weights=((3.0,), (5.0,), (7.0,))) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + fn, fn_update_op = metrics.streaming_false_negatives( + predictions, labels, weights=((3.0,), (5.0,), (7.0,))) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, fn.eval()) - self.assertEqual(8.0, fn_update_op.eval()) - self.assertEqual(8.0, fn.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, fn.eval()) + self.assertEqual(8.0, fn_update_op.eval()) + self.assertEqual(8.0, fn.eval()) class StreamingFalsePositivesTest(test.TestCase): @@ -749,39 +761,45 @@ class StreamingFalsePositivesTest(test.TestCase): _assert_local_variables(self, ('false_positives/count:0',)) def testUnweighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - fp, fp_update_op = metrics.streaming_false_positives(predictions, labels) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + fp, fp_update_op = metrics.streaming_false_positives(predictions, labels) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, fp.eval()) - self.assertEqual(4, fp_update_op.eval()) - self.assertEqual(4, fp.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, fp.eval()) + self.assertEqual(4, fp_update_op.eval()) + self.assertEqual(4, fp.eval()) def testWeighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - fp, fp_update_op = metrics.streaming_false_positives( - predictions, - labels, - weights=((1.0, 2.0, 3.0, 5.0), - (7.0, 11.0, 13.0, 17.0), - (19.0, 23.0, 29.0, 31.0))) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + fp, fp_update_op = metrics.streaming_false_positives( + predictions, + labels, + weights=((1.0, 2.0, 3.0, 5.0), + (7.0, 11.0, 13.0, 17.0), + (19.0, 23.0, 29.0, 31.0))) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, fp.eval()) - self.assertEqual(42.0, fp_update_op.eval()) - self.assertEqual(42.0, fp.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, fp.eval()) + self.assertEqual(42.0, fp_update_op.eval()) + self.assertEqual(42.0, fp.eval()) class StreamingTrueNegativesTest(test.TestCase): @@ -796,35 +814,41 @@ class StreamingTrueNegativesTest(test.TestCase): _assert_local_variables(self, ('true_negatives/count:0',)) def testUnweighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - tn, tn_update_op = metrics.streaming_true_negatives(predictions, labels) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + tn, tn_update_op = metrics.streaming_true_negatives(predictions, labels) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, tn.eval()) - self.assertEqual(5, tn_update_op.eval()) - self.assertEqual(5, tn.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, tn.eval()) + self.assertEqual(5, tn_update_op.eval()) + self.assertEqual(5, tn.eval()) def testWeighted(self): - predictions = constant_op.constant(((1, 0, 1, 0), - (0, 1, 1, 1), - (0, 0, 0, 0))) - labels = constant_op.constant(((0, 1, 1, 0), - (1, 0, 0, 0), - (0, 0, 0, 0))) - tn, tn_update_op = metrics.streaming_true_negatives( - predictions, labels, weights=((0.0, 2.0, 3.0, 5.0),)) + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast(constant_op.constant( + ((1, 0, 1, 0), + (0, 1, 1, 1), + (0, 0, 0, 0))), dtype=dtype) + labels = math_ops.cast(constant_op.constant( + ((0, 1, 1, 0), + (1, 0, 0, 0), + (0, 0, 0, 0))), dtype=dtype) + tn, tn_update_op = metrics.streaming_true_negatives( + predictions, labels, weights=((0.0, 2.0, 3.0, 5.0),)) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertEqual(0, tn.eval()) - self.assertEqual(15.0, tn_update_op.eval()) - self.assertEqual(15.0, tn.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertEqual(0, tn.eval()) + self.assertEqual(15.0, tn_update_op.eval()) + self.assertEqual(15.0, tn.eval()) class StreamingTruePositivesAtThresholdsTest(test.TestCase): diff --git a/tensorflow/contrib/seq2seq/BUILD b/tensorflow/contrib/seq2seq/BUILD index 3c314e2f28..a739487ae3 100644 --- a/tensorflow/contrib/seq2seq/BUILD +++ b/tensorflow/contrib/seq2seq/BUILD @@ -72,6 +72,46 @@ cuda_py_test( ], ) +cuda_py_test( + name = "sampling_decoder_test", + size = "medium", + srcs = ["python/kernel_tests/sampling_decoder_test.py"], + additional_deps = [ + ":seq2seq_py", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/contrib/rnn:rnn_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:init_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:rnn", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ], +) + +cuda_py_test( + name = "decoder_test", + size = "medium", + srcs = ["python/kernel_tests/decoder_test.py"], + additional_deps = [ + ":seq2seq_py", + "//tensorflow/contrib/layers:layers_py", + "//tensorflow/contrib/rnn:rnn_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:init_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:rnn", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py new file mode 100644 index 0000000000..b3c6c593c5 --- /dev/null +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py @@ -0,0 +1,156 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for contrib.seq2seq.python.seq2seq.decoder.""" +# pylint: disable=unused-import,g-bad-import-order +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# pylint: enable=unused-import + +import sys + +# TODO(jart): #6568 Remove this hack that makes dlopen() not crash. +if hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags"): + import ctypes # pylint: disable=g-import-not-at-top + sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL) + +# pylint: disable=g-import-not-at-top +import numpy as np + +from tensorflow.contrib.rnn import core_rnn_cell +from tensorflow.contrib.seq2seq.python.ops import decoder +from tensorflow.contrib.seq2seq.python.ops import sampling_decoder +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import rnn +from tensorflow.python.ops import variables +from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.platform import test +# pylint: enable=g-import-not-at-top + + +class DynamicDecodeRNNTest(test.TestCase): + + def _testDynamicDecodeRNN(self, time_major): + + sequence_length = [3, 4, 3, 1, 0] + batch_size = 5 + max_time = 8 + input_depth = 7 + cell_depth = 10 + max_out = max(sequence_length) + + with self.test_session() as sess: + if time_major: + inputs = np.random.randn(max_time, batch_size, + input_depth).astype(np.float32) + else: + inputs = np.random.randn(batch_size, max_time, + input_depth).astype(np.float32) + cell = core_rnn_cell.LSTMCell(cell_depth) + sampler = sampling_decoder.BasicTrainingSampler( + inputs, sequence_length, time_major=time_major) + my_decoder = sampling_decoder.BasicSamplingDecoder( + cell=cell, + sampler=sampler, + initial_state=cell.zero_state( + dtype=dtypes.float32, batch_size=batch_size)) + + final_outputs, final_state = decoder.dynamic_decode_rnn( + my_decoder, output_time_major=time_major) + + def _t(shape): + if time_major: + return (shape[1], shape[0]) + shape[2:] + return shape + + self.assertTrue( + isinstance(final_outputs, sampling_decoder.SamplingDecoderOutput)) + self.assertTrue(isinstance(final_state, core_rnn_cell.LSTMStateTuple)) + + self.assertEqual( + _t((batch_size, None, cell_depth)), + tuple(final_outputs.rnn_output.get_shape().as_list())) + self.assertEqual( + _t((batch_size, None)), + tuple(final_outputs.sample_id.get_shape().as_list())) + + sess.run(variables.global_variables_initializer()) + sess_results = sess.run({ + "final_outputs": final_outputs, + "final_state": final_state + }) + + self.assertEqual( + _t((batch_size, max_out, cell_depth)), + sess_results["final_outputs"].rnn_output.shape) + self.assertEqual( + _t((batch_size, max_out)), + sess_results["final_outputs"].sample_id.shape) + + def testDynamicDecodeRNNBatchMajor(self): + self._testDynamicDecodeRNN(time_major=False) + + def testDynamicDecodeRNNTimeMajor(self): + self._testDynamicDecodeRNN(time_major=True) + + def testDynamicDecodeRNNWithBasicTrainingSamplerMatchesDynamicRNN(self): + sequence_length = [3, 4, 3, 1, 0] + batch_size = 5 + max_time = 8 + input_depth = 7 + cell_depth = 10 + max_out = max(sequence_length) + + with self.test_session() as sess: + inputs = np.random.randn(batch_size, max_time, + input_depth).astype(np.float32) + + cell = core_rnn_cell.LSTMCell(cell_depth) + zero_state = cell.zero_state(dtype=dtypes.float32, batch_size=batch_size) + sampler = sampling_decoder.BasicTrainingSampler(inputs, sequence_length) + my_decoder = sampling_decoder.BasicSamplingDecoder( + cell=cell, sampler=sampler, initial_state=zero_state) + + # Match the variable scope of dynamic_rnn below so we end up + # using the same variables + with vs.variable_scope("rnn"): + final_decoder_outputs, final_decoder_state = decoder.dynamic_decode_rnn( + my_decoder) + + with vs.variable_scope(vs.get_variable_scope(), reuse=True): + final_rnn_outputs, final_rnn_state = rnn.dynamic_rnn( + cell, + inputs, + sequence_length=sequence_length, + initial_state=zero_state) + + sess.run(variables.global_variables_initializer()) + sess_results = sess.run({ + "final_decoder_outputs": final_decoder_outputs, + "final_decoder_state": final_decoder_state, + "final_rnn_outputs": final_rnn_outputs, + "final_rnn_state": final_rnn_state + }) + + # Decoder only runs out to max_out; ensure values are identical + # to dynamic_rnn, which also zeros out outputs and passes along state. + self.assertAllClose(sess_results["final_decoder_outputs"].rnn_output, + sess_results["final_rnn_outputs"][:, 0:max_out, :]) + self.assertAllClose(sess_results["final_decoder_state"], + sess_results["final_rnn_state"]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py new file mode 100644 index 0000000000..ba945a0ecb --- /dev/null +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/sampling_decoder_test.py @@ -0,0 +1,109 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for contrib.seq2seq.python.seq2seq.sampling_decoder.""" +# pylint: disable=unused-import,g-bad-import-order +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +# pylint: enable=unused-import + +import sys + +# TODO(jart): #6568 Remove this hack that makes dlopen() not crash. +if hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags"): + import ctypes # pylint: disable=g-import-not-at-top + sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL) + +# pylint: disable=g-import-not-at-top +import numpy as np + +from tensorflow.contrib.rnn import core_rnn_cell +from tensorflow.contrib.seq2seq.python.ops import sampling_decoder +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +# pylint: enable=g-import-not-at-top + + +class BasicSamplingDecoderTest(test.TestCase): + + def testStepWithBasicTrainingSampler(self): + sequence_length = [3, 4, 3, 1, 0] + batch_size = 5 + max_time = 8 + input_depth = 7 + cell_depth = 10 + + with self.test_session() as sess: + inputs = np.random.randn(batch_size, max_time, + input_depth).astype(np.float32) + cell = core_rnn_cell.LSTMCell(cell_depth) + sampler = sampling_decoder.BasicTrainingSampler( + inputs, sequence_length, time_major=False) + my_decoder = sampling_decoder.BasicSamplingDecoder( + cell=cell, + sampler=sampler, + initial_state=cell.zero_state( + dtype=dtypes.float32, batch_size=batch_size)) + output_size = my_decoder.output_size + output_dtype = my_decoder.output_dtype + batch_size_t = my_decoder.batch_size + self.assertEqual( + sampling_decoder.SamplingDecoderOutput(cell_depth, + tensor_shape.TensorShape([])), + output_size) + self.assertEqual( + sampling_decoder.SamplingDecoderOutput(dtypes.float32, dtypes.int32), + output_dtype) + + (first_finished, first_inputs, first_state) = my_decoder.initialize() + (step_outputs, step_state, step_next_inputs, + step_finished) = my_decoder.step( + constant_op.constant(0), first_inputs, first_state) + + self.assertTrue(isinstance(first_state, core_rnn_cell.LSTMStateTuple)) + self.assertTrue(isinstance(step_state, core_rnn_cell.LSTMStateTuple)) + self.assertTrue( + isinstance(step_outputs, sampling_decoder.SamplingDecoderOutput)) + self.assertEqual((batch_size, cell_depth), step_outputs[0].get_shape()) + self.assertEqual((batch_size,), step_outputs[1].get_shape()) + self.assertEqual((batch_size, cell_depth), first_state[0].get_shape()) + self.assertEqual((batch_size, cell_depth), first_state[1].get_shape()) + self.assertEqual((batch_size, cell_depth), step_state[0].get_shape()) + self.assertEqual((batch_size, cell_depth), step_state[1].get_shape()) + + sess.run(variables.global_variables_initializer()) + sess_results = sess.run({ + "batch_size": batch_size_t, + "first_finished": first_finished, + "first_inputs": first_inputs, + "first_state": first_state, + "step_outputs": step_outputs, + "step_state": step_state, + "step_next_inputs": step_next_inputs, + "step_finished": step_finished + }) + + self.assertAllEqual([False, False, False, False, True], + sess_results["first_finished"]) + self.assertAllEqual([False, False, False, True, True], + sess_results["step_finished"]) + self.assertAllEqual([-1] * 5, sess_results["step_outputs"].sample_id) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/seq2seq/python/ops/decoder.py b/tensorflow/contrib/seq2seq/python/ops/decoder.py new file mode 100644 index 0000000000..3ab6cb0e8c --- /dev/null +++ b/tensorflow/contrib/seq2seq/python/ops/decoder.py @@ -0,0 +1,237 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Seq2seq layer operations for use in neural networks. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +import six + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.util import nest + +__all__ = ["Decoder", "dynamic_decode_rnn"] + + +def _transpose_batch_time(x): + """Transpose the batch and time dimensions of a Tensor. + + Retains as much of the static shape information as possible. + + Args: + x: A tensor of rank 2 or higher. + + Returns: + x transposed along the first two dimensions. + + Raises: + ValueError: if `x` is rank 1 or lower. + """ + x_static_shape = x.get_shape() + if x_static_shape.ndims is not None and x_static_shape.ndims < 2: + raise ValueError( + "Expected input tensor %s to have rank at least 2, but saw shape: %s" % + (x, x_static_shape)) + x_rank = array_ops.rank(x) + x_t = array_ops.transpose( + x, array_ops.concat_v2( + ([1, 0], math_ops.range(2, x_rank)), axis=0)) + x_t.set_shape( + tensor_shape.TensorShape([ + x_static_shape[1].value, x_static_shape[0].value + ]).concatenate(x_static_shape[2:])) + return x_t + + +@six.add_metaclass(abc.ABCMeta) +class Decoder(object): + """An RNN Decoder abstract interface object.""" + + @property + def batch_size(self): + """The batch size of the inputs returned by `sample`.""" + raise NotImplementedError + + @property + def output_size(self): + """A (possibly nested tuple of...) integer[s] or `TensorShape` object[s].""" + raise NotImplementedError + + @property + def output_dtype(self): + """A (possibly nested tuple of...) dtype[s].""" + raise NotImplementedError + + @abc.abstractmethod + def initialize(self, name=None): + """Called before any decoding iterations. + + Args: + name: Name scope for any created operations. + + Returns: + `(finished, first_inputs, initial_state)`. + """ + raise NotImplementedError + + @abc.abstractmethod + def step(self, time, inputs, state): + """Called per step of decoding (but only once for dynamic decoding). + + Args: + time: Scalar `int32` tensor. + inputs: Input (possibly nested tuple of) tensor[s] for this time step. + state: State (possibly nested tuple of) tensor[s] from previous time step. + + Returns: + `(outputs, next_state, next_inputs, finished)`. + """ + raise NotImplementedError + + +def _create_zero_outputs(size, dtype, batch_size): + """Create a zero outputs Tensor structure.""" + def _t(s): + return (s if isinstance(s, ops.Tensor) else constant_op.constant( + tensor_shape.TensorShape(s).as_list(), + dtype=dtypes.int32, + name="zero_suffix_shape")) + + def _create(s, d): + return array_ops.zeros( + array_ops.concat( + ([batch_size], _t(s)), axis=0), dtype=d) + + return nest.map_structure(_create, size, dtype) + + +def dynamic_decode_rnn(decoder, + output_time_major=False, + parallel_iterations=32, + swap_memory=False): + """Perform dynamic decoding with `decoder`. + + Args: + decoder: A `Decoder` instance. + output_time_major: Python boolean. Default: `False` (batch major). If + `True`, outputs are returned as time major tensors (this mode is faster). + Otherwise, outputs are returned as batch major tensors (this adds extra + time to the computation). + parallel_iterations: Argument passed to `tf.while_loop`. + swap_memory: Argument passed to `tf.while_loop`. + + Returns: + `(final_outputs, final_state)`. + + Raises: + TypeError: if `decoder` is not an instance of `Decoder`. + """ + if not isinstance(decoder, Decoder): + raise TypeError("Expected decoder to be type Decoder, but saw: %s" % + type(decoder)) + + zero_outputs = _create_zero_outputs(decoder.output_size, decoder.output_dtype, + decoder.batch_size) + + initial_finished, initial_inputs, initial_state = decoder.initialize() + initial_time = constant_op.constant(0, dtype=dtypes.int32) + + def _shape(batch_size, from_shape): + if not isinstance(from_shape, tensor_shape.TensorShape): + return tensor_shape.TensorShape(None) + else: + batch_size = tensor_util.constant_value( + ops.convert_to_tensor( + batch_size, name="batch_size")) + return tensor_shape.TensorShape([batch_size]).concatenate(from_shape) + + def _create_ta(s, d): + return tensor_array_ops.TensorArray( + dtype=d, size=0, dynamic_size=True, + element_shape=_shape(decoder.batch_size, s)) + + initial_outputs_ta = nest.map_structure( + _create_ta, decoder.output_size, decoder.output_dtype) + + def condition(unused_time, unused_outputs_ta, unused_state, unused_inputs, + finished): + return math_ops.logical_not(math_ops.reduce_all(finished)) + + def body(time, outputs_ta, state, inputs, finished): + """Internal while_loop body. + + Args: + time: scalar int32 tensor. + outputs_ta: structure of TensorArray. + state: (structure of) state tensors and TensorArrays. + inputs: (structure of) input tensors. + finished: 1-D bool tensor. + + Returns: + `(time + 1, outputs_ta, next_state, next_inputs, next_finished)`. + """ + (next_outputs, decoder_state, next_inputs, decoder_finished) = decoder.step( + time, inputs, state) + next_finished = math_ops.logical_or(decoder_finished, finished) + + nest.assert_same_structure(state, decoder_state) + nest.assert_same_structure(outputs_ta, next_outputs) + nest.assert_same_structure(inputs, next_inputs) + + # Zero out output values past finish + emit = nest.map_structure( + lambda out, zero: array_ops.where(finished, zero, out), next_outputs, + zero_outputs) + + # Copy through states past finish + def _maybe_copy_state(new, cur): + return (new if isinstance(cur, tensor_array_ops.TensorArray) else + array_ops.where(finished, cur, new)) + + next_state = nest.map_structure(_maybe_copy_state, decoder_state, state) + outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out), + outputs_ta, emit) + return (time + 1, outputs_ta, next_state, next_inputs, next_finished) + + res = control_flow_ops.while_loop( + condition, + body, + loop_vars=[ + initial_time, initial_outputs_ta, initial_state, initial_inputs, + initial_finished + ], + parallel_iterations=parallel_iterations, + swap_memory=swap_memory) + + final_outputs_ta = res[1] + final_state = res[2] + + final_outputs = nest.map_structure(lambda ta: ta.stack(), final_outputs_ta) + if not output_time_major: + final_outputs = nest.map_structure(_transpose_batch_time, final_outputs) + + return final_outputs, final_state diff --git a/tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py b/tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py new file mode 100644 index 0000000000..c4654e535d --- /dev/null +++ b/tensorflow/contrib/seq2seq/python/ops/sampling_decoder.py @@ -0,0 +1,190 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A class of Decoders that may sample to generate the next input. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import collections + +import six + +from tensorflow.contrib.rnn import core_rnn_cell +from tensorflow.contrib.seq2seq.python.ops import decoder +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.util import nest + +__all__ = [ + "Sampler", "SamplingDecoderOutput", "BasicSamplingDecoder", + "BasicTrainingSampler" +] + +_transpose_batch_time = decoder._transpose_batch_time # pylint: disable=protected-access + + +@six.add_metaclass(abc.ABCMeta) +class Sampler(object): + + @property + def batch_size(self): + pass + + @abc.abstractmethod + def initialize(self): + pass + + @abc.abstractmethod + def sample(self, time, outputs, state): + pass + + +class SamplingDecoderOutput( + collections.namedtuple("SamplingDecoderOutput", + ("rnn_output", "sample_id"))): + pass + + +class BasicSamplingDecoder(decoder.Decoder): + """Basic sampling decoder.""" + + def __init__(self, cell, sampler, initial_state): + """Initialize BasicSamplingDecoder. + + Args: + cell: An `RNNCell` instance. + sampler: A `Sampler` instance. + initial_state: A (possibly nested tuple of...) tensors and TensorArrays. + + Raises: + TypeError: if `cell` is not an instance of `RNNCell` or `sampler` + is not an instance of `Sampler`. + """ + if not isinstance(cell, core_rnn_cell.RNNCell): + raise TypeError("cell must be an RNNCell, received: %s" % type(cell)) + if not isinstance(sampler, Sampler): + raise TypeError("sampler must be a Sampler, received: %s" % + type(sampler)) + self._cell = cell + self._sampler = sampler + self._initial_state = initial_state + + @property + def batch_size(self): + return self._sampler.batch_size + + @property + def output_size(self): + # Return the cell output and the id + return SamplingDecoderOutput( + rnn_output=self._cell.output_size, + sample_id=tensor_shape.TensorShape([])) + + @property + def output_dtype(self): + # Assume the dtype of the cell is the output_size structure + # containing the input_state's first component's dtype. + # Return that structure and int32 (the id) + dtype = nest.flatten(self._initial_state)[0].dtype + return SamplingDecoderOutput( + nest.map_structure(lambda _: dtype, self._cell.output_size), + dtypes.int32) + + def initialize(self, name=None): + return self._sampler.initialize() + (self._initial_state,) + + def step(self, time, inputs, state): + """Perform a decoding step. + + Args: + time: scalar `int32` tensor. + inputs: A (structure of) input tensors. + state: A (structure of) state tensors and TensorArrays. + + Returns: + `(outputs, next_state, next_inputs, finished)`. + """ + cell_outputs, next_state = self._cell(inputs, state) + (sample_id, finished, next_inputs) = self._sampler.sample( + time=time, outputs=cell_outputs, state=next_state) + outputs = SamplingDecoderOutput(cell_outputs, sample_id) + return (outputs, next_state, next_inputs, finished) + + +class BasicTrainingSampler(Sampler): + """A (non-)sampler for use during training. Only reads inputs.""" + + def __init__(self, inputs, sequence_length, time_major=False): + """Initializer. + + Args: + inputs: A (structure of) input tensors. + sequence_length: An int32 vector tensor. + time_major: Python bool. + + Raises: + ValueError: if `sequence_length` is not a 1D tensor. + """ + inputs = ops.convert_to_tensor(inputs, name="inputs") + if not time_major: + inputs = nest.map_structure(_transpose_batch_time, inputs) + + def _unstack_ta(inp): + return tensor_array_ops.TensorArray( + dtype=inp.dtype, size=array_ops.shape(inp)[0], + element_shape=inp.get_shape()[1:]).unstack(inp) + + self._input_tas = nest.map_structure(_unstack_ta, inputs) + sequence_length = ops.convert_to_tensor( + sequence_length, name="sequence_length") + if sequence_length.get_shape().ndims != 1: + raise ValueError( + "Expected sequence_length to be a vector, but received shape: %s" % + sequence_length.get_shape()) + self._sequence_length = sequence_length + self._zero_inputs = nest.map_structure( + lambda inp: array_ops.zeros_like(inp[0, :]), inputs) + self._batch_size = array_ops.size(sequence_length) + + @property + def batch_size(self): + return self._batch_size + + def initialize(self): + finished = math_ops.equal(0, self._sequence_length) + all_finished = math_ops.reduce_all(finished) + next_inputs = control_flow_ops.cond( + all_finished, lambda: self._zero_inputs, + lambda: nest.map_structure(lambda inp: inp.read(0), self._input_tas)) + return (finished, next_inputs) + + def sample(self, time, **unused_kwargs): + next_time = time + 1 + finished = (next_time >= self._sequence_length) + all_finished = math_ops.reduce_all(finished) + sample_id = array_ops.tile([constant_op.constant(-1)], [self._batch_size]) + next_inputs = control_flow_ops.cond( + all_finished, lambda: self._zero_inputs, + lambda: nest.map_structure(lambda inp: inp.read(next_time), self._input_tas)) + return (sample_id, finished, next_inputs) diff --git a/tensorflow/contrib/tensor_forest/client/random_forest.py b/tensorflow/contrib/tensor_forest/client/random_forest.py index 28d9b43bbd..420a6d3138 100644 --- a/tensorflow/contrib/tensor_forest/client/random_forest.py +++ b/tensorflow/contrib/tensor_forest/client/random_forest.py @@ -18,7 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib import framework as contrib_framework -from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import trainable @@ -355,18 +354,15 @@ class TensorForestEstimator(evaluable.Evaluable, trainable.Trainable): # pylint: enable=protected-access return result - @experimental def export_savedmodel(self, export_dir_base, - input_fn, + serving_input_fn, default_output_alternative_key=None, assets_extra=None, - as_text=False, - exports_to_keep=None): + as_text=False): return self._estimator.export_savedmodel( export_dir_base, - input_fn, + serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, - as_text=as_text, - exports_to_keep=exports_to_keep) + as_text=as_text) diff --git a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py index 9fd102d0f6..0f52c2128d 100644 --- a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py +++ b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py @@ -26,6 +26,8 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import string_ops @@ -41,6 +43,31 @@ class BatchSequencesWithStatesTest(test.TestCase): def setUp(self): super(BatchSequencesWithStatesTest, self).setUp() self.value_length = 4 + ind1 = np.array([ + [0, 0], + [1, 0], [1, 3], [1, 4], + [3, 2], [3, 3]]) + val1 = np.array([0, 10, 13, 14, 32, 33]) + shape1 = np.array([self.value_length, 6]) + sp_tensor1 = sparse_tensor.SparseTensor( + array_ops.constant(ind1, dtypes.int64), + array_ops.constant(val1, dtypes.int64), + array_ops.constant(shape1, dtypes.int64)) + ind2 = np.array([ + [0, 0, 1], + [0, 1, 0], + [0, 1, 2], + [1, 0, 3], + [1, 1, 0], + [1, 1, 1], + [1, 1, 2], + [1, 2, 2]]) + val2 = np.array([1, 10, 12, 103, 150, 149, 150, 122]) + shape2 = np.array([self.value_length, 3, 4]) + sp_tensor2 = sparse_tensor.SparseTensor( + array_ops.constant(ind2, dtypes.int64), + array_ops.constant(val2, dtypes.int64), + array_ops.constant(shape2, dtypes.int64)) self.batch_size = 2 self.key = string_ops.string_join([ "key_", string_ops.as_string( @@ -48,8 +75,9 @@ class BatchSequencesWithStatesTest(test.TestCase): ]) self.sequences = { "seq1": np.random.rand(self.value_length, 5), - "seq2": np.random.rand(self.value_length, 4, 2) - } + "seq2": np.random.rand(self.value_length, 4, 2), + "seq3": sp_tensor1, + "seq4": sp_tensor2} self.context = {"context1": [3, 4]} self.initial_states = { "state1": np.random.rand(6, 7), @@ -60,9 +88,12 @@ class BatchSequencesWithStatesTest(test.TestCase): return set( [s.decode("ascii").split(":")[0].encode("ascii") for s in key_value]) - def _testBasics(self, num_unroll, length, pad, expected_seq1_batch1, - expected_seq2_batch1, expected_seq1_batch2, - expected_seq2_batch2): + def _testBasics(self, num_unroll, length, pad, + expected_seq1_batch1, expected_seq2_batch1, + expected_seq1_batch2, expected_seq2_batch2, + expected_seq3_batch1, expected_seq3_batch2, + expected_seq4_batch1, expected_seq4_batch2): + with self.test_session() as sess: next_batch = sqss.batch_sequences_with_states( input_key=self.key, @@ -99,12 +130,13 @@ class BatchSequencesWithStatesTest(test.TestCase): threads = queue_runner_impl.start_queue_runners(coord=coord) # Step 1 - (key_value, next_key_value, seq1_value, seq2_value, context1_value, - state1_value, state2_value, length_value, _, _) = sess.run( + (key_value, next_key_value, seq1_value, seq2_value, seq3_value, + seq4_value, context1_value, state1_value, state2_value, length_value, + _, _) = sess.run( (next_batch.key, next_batch.next_key, next_batch.sequences["seq1"], - next_batch.sequences["seq2"], next_batch.context["context1"], + next_batch.sequences["seq2"], next_batch.sequences["seq3"], + next_batch.sequences["seq4"], next_batch.context["context1"], state1, state2, next_batch.length, state1_update, state2_update)) - expected_first_keys = set([b"00000_of_00002"]) expected_second_keys = set([b"00001_of_00002"]) expected_final_keys = set([b"STOP"]) @@ -116,6 +148,14 @@ class BatchSequencesWithStatesTest(test.TestCase): context1_value) self.assertAllEqual(expected_seq1_batch1, seq1_value) self.assertAllEqual(expected_seq2_batch1, seq2_value) + self.assertAllEqual(expected_seq3_batch1.indices, seq3_value.indices) + self.assertAllEqual(expected_seq3_batch1.values, seq3_value.values) + self.assertAllEqual(expected_seq3_batch1.dense_shape, + seq3_value.dense_shape) + self.assertAllEqual(expected_seq4_batch1.indices, seq4_value.indices) + self.assertAllEqual(expected_seq4_batch1.values, seq4_value.values) + self.assertAllEqual(expected_seq4_batch1.dense_shape, + seq4_value.dense_shape) self.assertAllEqual( np.tile(self.initial_states["state1"], (self.batch_size, 1, 1)), state1_value) @@ -125,12 +165,13 @@ class BatchSequencesWithStatesTest(test.TestCase): self.assertAllEqual(length_value, [num_unroll, num_unroll]) # Step 2 - (key_value, next_key_value, seq1_value, seq2_value, context1_value, - state1_value, state2_value, length_value, _, _) = sess.run( + (key_value, next_key_value, seq1_value, seq2_value, seq3_value, + seq4_value, context1_value, state1_value, state2_value, length_value, + _, _) = sess.run( (next_batch.key, next_batch.next_key, next_batch.sequences["seq1"], - next_batch.sequences["seq2"], next_batch.context["context1"], - next_batch.state("state1"), next_batch.state("state2"), - next_batch.length, state1_update, state2_update)) + next_batch.sequences["seq2"], next_batch.sequences["seq3"], + next_batch.sequences["seq4"], next_batch.context["context1"], + state1, state2, next_batch.length, state1_update, state2_update)) self.assertEqual(expected_second_keys, self._prefix(key_value)) self.assertEqual(expected_final_keys, self._prefix(next_key_value)) @@ -139,6 +180,14 @@ class BatchSequencesWithStatesTest(test.TestCase): context1_value) self.assertAllEqual(expected_seq1_batch2, seq1_value) self.assertAllEqual(expected_seq2_batch2, seq2_value) + self.assertAllEqual(expected_seq3_batch2.indices, seq3_value.indices) + self.assertAllEqual(expected_seq3_batch2.values, seq3_value.values) + self.assertAllEqual(expected_seq3_batch2.dense_shape, + seq3_value.dense_shape) + self.assertAllEqual(expected_seq4_batch2.indices, seq4_value.indices) + self.assertAllEqual(expected_seq4_batch2.values, seq4_value.values) + self.assertAllEqual(expected_seq4_batch2.dense_shape, + seq4_value.dense_shape) self.assertAllEqual(1 + np.tile(self.initial_states["state1"], (self.batch_size, 1, 1)), state1_value) self.assertAllEqual(-1 + np.tile(self.initial_states["state2"], @@ -148,7 +197,7 @@ class BatchSequencesWithStatesTest(test.TestCase): coord.request_stop() coord.join(threads, stop_grace_period_secs=2) - def testBasicPadding(self): + def _testBasicPadding(self, pad): num_unroll = 2 # Divisor of value_length - so no padding necessary. expected_seq1_batch1 = np.tile( self.sequences["seq1"][np.newaxis, 0:num_unroll, :], @@ -162,37 +211,74 @@ class BatchSequencesWithStatesTest(test.TestCase): expected_seq2_batch2 = np.tile( self.sequences["seq2"][np.newaxis, num_unroll:self.value_length, :, :], (self.batch_size, 1, 1, 1)) + ind1_1 = np.array([ + # batch entry 1 + [0, 0, 0], + [0, 1, 0], [0, 1, 3], [0, 1, 4], + # batch entry 2 + [1, 0, 0], + [1, 1, 0], [1, 1, 3], [1, 1, 4]]) + ind1_2 = np.array([ + # batch entry 1 + [0, 1, 2], [0, 1, 3], + # batch entry 2 + [1, 1, 2], [1, 1, 3]]) + val1_1 = np.array([0, 10, 13, 14, + 0, 10, 13, 14]) + val1_2 = np.array([32, 33, + 32, 33]) + shape1 = np.array([self.batch_size, num_unroll, 6]) + + # For sp_tensor2 all values fall into the first segment. + ind2_1 = np.array([ + # batch entry 1 + [0, 0, 0, 1], + [0, 0, 1, 0], + [0, 0, 1, 2], + [0, 1, 0, 3], + [0, 1, 1, 0], + [0, 1, 1, 1], + [0, 1, 1, 2], + [0, 1, 2, 2], + # batch entry 2 + [1, 0, 0, 1], + [1, 0, 1, 0], + [1, 0, 1, 2], + [1, 1, 0, 3], + [1, 1, 1, 0], + [1, 1, 1, 1], + [1, 1, 1, 2], + [1, 1, 2, 2], + ]) + val2_1 = np.array([1, 10, 12, 103, 150, 149, 150, 122, + 1, 10, 12, 103, 150, 149, 150, 122]) + shape2 = np.array([self.batch_size, num_unroll, 3, 4]) + expected_seq3_batch1 = sparse_tensor.SparseTensorValue( + ind1_1, val1_1, shape1) + expected_seq3_batch2 = sparse_tensor.SparseTensorValue( + ind1_2, val1_2, shape1) + expected_seq4_batch1 = sparse_tensor.SparseTensorValue( + ind2_1, val2_1, shape2) + expected_seq4_batch2 = sparse_tensor.SparseTensorValue( + np.empty(shape=[0, 4], dtype=np.int64), np.array([]), shape2) self._testBasics( num_unroll=num_unroll, length=3, - pad=True, + pad=pad, expected_seq1_batch1=expected_seq1_batch1, - expected_seq2_batch1=expected_seq2_batch1, expected_seq1_batch2=expected_seq1_batch2, - expected_seq2_batch2=expected_seq2_batch2) - - def testBasics(self): - num_unroll = 2 # Divisor of value_length - so no padding necessary. - expected_seq1_batch1 = np.tile( - self.sequences["seq1"][np.newaxis, 0:num_unroll, :], - (self.batch_size, 1, 1)) - expected_seq2_batch1 = np.tile( - self.sequences["seq2"][np.newaxis, 0:num_unroll, :, :], - (self.batch_size, 1, 1, 1)) - expected_seq1_batch2 = np.tile( - self.sequences["seq1"][np.newaxis, num_unroll:self.value_length, :], - (self.batch_size, 1, 1)) - expected_seq2_batch2 = np.tile( - self.sequences["seq2"][np.newaxis, num_unroll:self.value_length, :, :], - (self.batch_size, 1, 1, 1)) - self._testBasics( - num_unroll=num_unroll, - length=3, - pad=False, - expected_seq1_batch1=expected_seq1_batch1, expected_seq2_batch1=expected_seq2_batch1, - expected_seq1_batch2=expected_seq1_batch2, - expected_seq2_batch2=expected_seq2_batch2) + expected_seq2_batch2=expected_seq2_batch2, + expected_seq3_batch1=expected_seq3_batch1, + expected_seq3_batch2=expected_seq3_batch2, + expected_seq4_batch1=expected_seq4_batch1, + expected_seq4_batch2=expected_seq4_batch2) + + def testBasicPadding(self): + self._testBasicPadding(pad=True) + + def testBasicNoPadding(self): + self._testBasicPadding(pad=False) def testNotAMultiple(self): num_unroll = 3 # Not a divisor of value_length - @@ -254,14 +340,69 @@ class BatchSequencesWithStatesTest(test.TestCase): expected_seq2_batch2 = np.concatenate( [padded_seq2] * self.batch_size, axis=0) + ind1_1 = np.array([ + # batch entry 1 + [0, 0, 0], + [0, 1, 0], [0, 1, 3], [0, 1, 4], + # batch entry 2 + [1, 0, 0], + [1, 1, 0], [1, 1, 3], [1, 1, 4]]) + ind1_2 = np.array([ + # batch entry 1 + [0, 0, 2], [0, 0, 3], + # batch entry 2 + [1, 0, 2], [1, 0, 3]]) + val1_1 = np.array([0, 10, 13, 14, + 0, 10, 13, 14]) + val1_2 = np.array([32, 33, + 32, 33]) + shape1 = np.array([self.batch_size, num_unroll, 6]) + + # For sp_tensor2 all values fall into the first segment. + ind2_1 = np.array([ + # batch entry 1 + [0, 0, 0, 1], + [0, 0, 1, 0], + [0, 0, 1, 2], + [0, 1, 0, 3], + [0, 1, 1, 0], + [0, 1, 1, 1], + [0, 1, 1, 2], + [0, 1, 2, 2], + # batch entry 2 + [1, 0, 0, 1], + [1, 0, 1, 0], + [1, 0, 1, 2], + [1, 1, 0, 3], + [1, 1, 1, 0], + [1, 1, 1, 1], + [1, 1, 1, 2], + [1, 1, 2, 2], + ]) + val2_1 = np.array([1, 10, 12, 103, 150, 149, 150, 122, + 1, 10, 12, 103, 150, 149, 150, 122]) + shape2 = np.array([self.batch_size, num_unroll, 3, 4]) + expected_seq3_batch1 = sparse_tensor.SparseTensorValue( + ind1_1, val1_1, shape1) + expected_seq3_batch2 = sparse_tensor.SparseTensorValue( + ind1_2, val1_2, shape1) + expected_seq4_batch1 = sparse_tensor.SparseTensorValue( + ind2_1, val2_1, shape2) + expected_seq4_batch2 = sparse_tensor.SparseTensorValue( + np.empty(shape=[0, 4], dtype=np.int64), np.array([]), shape2) + self._testBasics( num_unroll=num_unroll, length=None, pad=True, expected_seq1_batch1=expected_seq1_batch1, - expected_seq2_batch1=expected_seq2_batch1, expected_seq1_batch2=expected_seq1_batch2, - expected_seq2_batch2=expected_seq2_batch2) + expected_seq2_batch1=expected_seq2_batch1, + expected_seq2_batch2=expected_seq2_batch2, + expected_seq3_batch1=expected_seq3_batch1, + expected_seq3_batch2=expected_seq3_batch2, + expected_seq4_batch1=expected_seq4_batch1, + expected_seq4_batch2=expected_seq4_batch2) class PaddingTest(test.TestCase): @@ -270,8 +411,8 @@ class PaddingTest(test.TestCase): with ops.Graph().as_default() as g, self.test_session(graph=g): sequences = { "key_1": constant_op.constant([1, 2, 3]), # length 3 - "key_2": constant_op.constant([1.5, 2.5]) - } # length 2 + "key_2": constant_op.constant([1.5, 2.5]) # length 2 + } _, padded_seq = sqss._padding(sequences, 2) with self.assertRaisesOpError( @@ -300,5 +441,63 @@ class PaddingTest(test.TestCase): math_ops.reduce_all(math_ops.equal(val, padded_seq[key])).eval()) +class SparseTensorReConstructionTest(test.TestCase): + + def testAddManyTakeManyRoundTripBatched(self): + with self.test_session(use_gpu=False) as sess: + # N == 4 because shape_value == [4, 5] + indices_value_1 = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64) + values_value_1 = np.array([b"a", b"b", b"c"]) + shape_value_1 = np.array([4, 5], dtype=np.int64) + sparse_tensor_1 = sparse_tensor.SparseTensor( + array_ops.placeholder(dtypes.int64), + array_ops.placeholder(dtypes.string), + array_ops.placeholder(dtypes.int64)) + dict1 = {"key": sparse_tensor_1} + indices_value_2 = np.array([[1, 4], [2, 3]], dtype=np.int64) + values_value_2 = np.array([b"d", b"e"]) + shape_value_2 = np.array([4, 5], dtype=np.int64) + sparse_tensor_2 = sparse_tensor.SparseTensor( + array_ops.placeholder(dtypes.int64), + array_ops.placeholder(dtypes.string), + array_ops.placeholder(dtypes.int64)) + dict2 = {"key": sparse_tensor_2} + + input_seq1, keys1, tensor_list1 = sqss._deconstruct_sparse_tensor_seq( + dict1, shared_name="a") + handles_1 = input_seq1["key"] + input_seq2, _, _ = sqss._deconstruct_sparse_tensor_seq( + dict2, shared_name="a") + handles_2 = input_seq2["key"] + + combined_handles = array_ops.stack( + [handles_1[1], handles_1[2], handles_1[3], + handles_2[1], handles_2[2], handles_2[3]]) + batched_dict = {"key": combined_handles} + sqss._reconstruct_sparse_tensor_seq( + batched_dict, + keys1, + tensor_list1, + batch_size=2, + num_unroll=3) + + roundtrip_value, = sess.run( + [batched_dict["key"]], + feed_dict={sparse_tensor_1.indices: indices_value_1, + sparse_tensor_1.values: values_value_1, + sparse_tensor_1.dense_shape: shape_value_1, + sparse_tensor_2.indices: indices_value_2, + sparse_tensor_2.values: values_value_2, + sparse_tensor_2.dense_shape: shape_value_2}) + + self.assertAllEqual(roundtrip_value.indices, + np.array([[0, 1, 0], [1, 0, 4], [1, 1, 3]], + dtype=np.int64)) + self.assertAllEqual(roundtrip_value.values, + np.array([b"c", b"d", b"e"])) + self.assertAllEqual(roundtrip_value.dense_shape, + np.array([2, 3, 5], dtype=np.int64)) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py index a4f753acca..19e0809be8 100644 --- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py +++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py @@ -29,16 +29,23 @@ import six from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops from tensorflow.python.summary import summary from tensorflow.python.training import queue_runner +# pylint: disable=protected-access +_restore_sparse = sparse_ops._take_many_sparse_from_tensors_map +_store_sparse = sparse_ops._add_many_sparse_to_tensors_map +# pylint: enable=protected-access + class _SequenceInputWrapper(object): """A wrapper object for storing sequence-related input. @@ -1418,23 +1425,60 @@ def batch_sequences_with_states(input_key, elif input_sequences: # Assert that value_length is a multiple of num_unroll. for key, value in input_sequences.items(): - value_length = array_ops.shape(value)[0] - with ops.control_dependencies([ - control_flow_ops.Assert( - math_ops.logical_and( - math_ops.equal(value_length % num_unroll, 0), - math_ops.not_equal(value_length, 0)), - [ - string_ops.string_join([ - "Tensor %s first dimension should be a multiple of: " % - key, string_ops.as_string(num_unroll), - ", but saw value: ", string_ops.as_string(value_length), - ". Consider setting pad=True." - ]) - ]) - ]): - input_sequences[key] = array_ops.identity( - value, name="multiple_of_checked") + if (isinstance(value, sparse_tensor.SparseTensor) or + isinstance(value, sparse_tensor.SparseTensorValue)): + value_length = value.dense_shape[0] + with ops.control_dependencies([ + control_flow_ops.Assert( + math_ops.logical_and( + math_ops.equal(value_length % num_unroll, 0), + math_ops.not_equal(value_length, 0)), + [ + string_ops.string_join([ + "SparseTensor %s first dimension should be a " + "multiple of: " % key, + string_ops.as_string(num_unroll), + ", but saw value: ", + string_ops.as_string(value_length), + ". Consider setting pad=True."])])]): + input_sequences[key] = sparse_tensor.SparseTensor( + indices=value.indices, + values=array_ops.identity( + value.values, name="multiple_of_checked"), + dense_shape=value.dense_shape) + else: + if not isinstance(value, ops.Tensor): + try: + value = ops.convert_to_tensor(value) + except TypeError: + raise TypeError( + "Unsupported input_sequences expected Tensor or SparseTensor " + "values, got: %s for key %s" % (str(type(value)), key)) + value_length = array_ops.shape(value)[0] + with ops.control_dependencies([ + control_flow_ops.Assert( + math_ops.logical_and( + math_ops.equal(value_length % num_unroll, 0), + math_ops.not_equal(value_length, 0)), + [ + string_ops.string_join([ + "Tensor %s first dimension should be a multiple " + "of: " % key, + string_ops.as_string(num_unroll), + ", but saw value: ", + string_ops.as_string(value_length), + ". Consider setting pad=True." + ]) + ]) + ]): + input_sequences[key] = array_ops.identity( + value, name="multiple_of_checked") + + # Deconstruct SparseTensors in sequence into a dense Tensor before inputting + # to SQSS. + (transformed_input_seq, + sparse_tensor_keys, + tensor_list) = _deconstruct_sparse_tensor_seq(input_sequences) # setup stateful queue reader stateful_reader = SequenceQueueingStateSaver( @@ -1442,7 +1486,7 @@ def batch_sequences_with_states(input_key, num_unroll, input_length=input_length, input_key=input_key, - input_sequences=input_sequences, + input_sequences=transformed_input_seq, input_context=input_context, initial_states=initial_states, capacity=capacity, @@ -1457,7 +1501,16 @@ def batch_sequences_with_states(input_key, queue_closed_exception_types=(errors.OutOfRangeError, errors.CancelledError)) queue_runner.add_queue_runner(q_runner) - return stateful_reader.next_batch + batch = stateful_reader.next_batch + + # Reconstruct SparseTensors in sequence. + _reconstruct_sparse_tensor_seq( + batch.sequences, + sparse_tensor_keys, + tensor_list, + batch_size, + num_unroll) + return batch def _padding(sequences, num_unroll): @@ -1489,38 +1542,187 @@ def _padding(sequences, num_unroll): sequences_dict = {} for key, value in sequences.items(): - sequences_dict[key] = ops.convert_to_tensor(value) - - lengths = [array_ops.shape(value)[0] for value in sequences_dict.values()] - length = lengths[0] - all_lengths_equal = [ - control_flow_ops.Assert( - math_ops.equal(l, length), [ - string_ops.string_join([ - "All sequence lengths must match, but received lengths: ", - string_ops.as_string(lengths) - ]) - ]) for l in lengths - ] + if not (isinstance(value, sparse_tensor.SparseTensor) or + isinstance(value, sparse_tensor.SparseTensorValue)): + sequences_dict[key] = ops.convert_to_tensor(value) + else: + sequences_dict[key] = value + + lengths = [array_ops.shape(value)[0] for value in sequences_dict.values() + if isinstance(value, ops.Tensor)] + if lengths: + length = lengths[0] + all_lengths_equal = [ + control_flow_ops.Assert( + math_ops.equal(l, length), [string_ops.string_join( + ["All sequence lengths must match, but received lengths: ", + string_ops.as_string(lengths)])]) + for l in lengths] + length = control_flow_ops.with_dependencies(all_lengths_equal, length) + else: # Only have SparseTensors + sparse_lengths = [value.dense_shape[0] for value in sequences_dict.values() + if isinstance(value, sparse_tensor.SparseTensor)] + length = math_ops.maximum(sparse_lengths) - length = control_flow_ops.with_dependencies(all_lengths_equal, length) unroll = array_ops.constant(num_unroll) padded_length = length + ((unroll - (length % unroll)) % unroll) padded_sequences = {} for key, value in sequences_dict.items(): - # 1. create shape of paddings - # first dimension of value will be increased by num_paddings to - # padded_length - num_paddings = [padded_length - array_ops.shape(value)[0]] - # the shape of the paddings that we concat with the original value will be - # [num_paddings, tf.shape(value)[1], tf.shape(value)[2], ..., - # tf.shape(value)[tf.rank(value) - 1])] - padding_shape = array_ops.concat((num_paddings, array_ops.shape(value)[1:]), - 0) - # 2. fill padding shape with dummies - dummy = array_ops.constant( - "" if value.dtype == dtypes.string else 0, dtype=value.dtype) - paddings = array_ops.fill(dims=padding_shape, value=dummy) - # 3. concat values with paddings - padded_sequences[key] = array_ops.concat([value, paddings], 0) + if isinstance(value, ops.Tensor): + # 1. create shape of paddings + # first dimension of value will be increased by num_paddings to + # padded_length + num_paddings = [padded_length - array_ops.shape(value)[0]] + # the shape of the paddings that we concat with the original value will be + # [num_paddings, tf.shape(value)[1], tf.shape(value)[2], ..., + # tf.shape(value)[tf.rank(value) - 1])] + padding_shape = array_ops.concat( + (num_paddings, array_ops.shape(value)[1:]), 0) + # 2. fill padding shape with dummies + dummy = array_ops.constant( + "" if value.dtype == dtypes.string else 0, dtype=value.dtype) + paddings = array_ops.fill(dims=padding_shape, value=dummy) + # 3. concat values with paddings + padded_sequences[key] = array_ops.concat([value, paddings], 0) + else: + padded_shape = array_ops.concat([[math_ops.to_int64(padded_length)], + value.dense_shape[1:]], 0) + padded_sequences[key] = sparse_tensor.SparseTensor( + indices=value.indices, + values=value.values, + dense_shape=padded_shape) return length, padded_sequences + + +def _deconstruct_sparse_tensor_seq(input_sequence, shared_name=None): + """Converts `SparseTensor` values into `Tensors` of IDs and meta data. + + Given a dict of keys -> `Tensor` or `SparseTensor` transforms the + `SparseTensor` values into `Tensor` values of IDs by calling `_store_sparse`. + The IDs are pointers into and underlying `SparseTensorsMap` that is being + constructed. Additional meta data is returned in order to be able to + reconstruct `SparseTensor` values after batching and segmenting the IDs + `Tensor`. + + Args: + input_sequence: dictionary with `Tensor` or `SparseTensor` values. + shared_name: The shared name for the underlying `SparseTensorsMap` + (optional, defaults to the name of the newly created op). + Returns: + A tuple `(sequence, sparse_tensor_keys, tensor_list)` where `sequence` is + dictionary with the same keys as `input_sequence` but only `Tensor` values, + `sparse_tensor_keys` is a list of the keys of the `SparseTensor` values that + were converted, and `tensor_list` is a list of the same length with + `Tensor` objects. + """ + sparse_tensor_keys = [ + k for k in sorted(input_sequence.keys()) + if isinstance(input_sequence[k], sparse_tensor.SparseTensor)] + if not sparse_tensor_keys: + return input_sequence, None, sparse_tensor_keys + sparse_tensor_list = [input_sequence[k] for k in sparse_tensor_keys] + tensor_list = [_store_sparse(sp_tensor, shared_name=shared_name) + for sp_tensor in sparse_tensor_list] + transformed_input_seq = dict(input_sequence) + tensor_op_list = [] + for i, k in enumerate(sparse_tensor_keys): + transformed_input_seq[k] = tensor_list[i] + tensor_op_list += [tensor_list[i].op] + return transformed_input_seq, sparse_tensor_keys, tensor_op_list + + +def _reconstruct_sparse_tensor_seq(sequence, + sparse_tensor_keys, + tensor_op_list, + batch_size, + num_unroll): + """Inverse of _deconstruct_sparse_tensor_seq. + + Given a dict of keys -> `Tensor` reconstructs `SparseTensor` values for keys + in `sparse_tensor_keys`. Their `Tensor` values are assumed to be IDs into the + underlying `SparseTensorsMap`. The `dense_shape` of the `SparseTensor`s is + `[batch_size, num_unroll, d_0, d_1, ..., d_n]` when the original + `SparseTensor` that got deconstructed with `_deconstruct_sparse_tensor_seq` + has a `dense_shape` of `[None, d_0, d_1, ..., d_n]`. + + Args: + sequence: dictionary with only `Tensor` values that is being updated. + sparse_tensor_keys: list of the keys present in `sequence` identifying + `SparseTensor` values that should be reconstructed. + tensor_op_list: list of the same length as `sparse_tensor_keys` with + `Tensor` objects. + batch_size: int or int32 scalar `Tensor`, how large minibatches should + be. + num_unroll: Python integer, how many time steps were unrolled at a time. + """ + def _flatten_tensor(tensor): + """Flattens `Tensor` of `shape [batch_size, num_unroll]` into 1D `Tensor`. + + The main use of this function is to work around the limitation of + `_restore_sparse` to only accept 1D handles. + + Args: + tensor: 2D `Tensor` of `shape [batch_size, num_unroll]` + Returns: + 1D `Tensor`. + """ + return array_ops.reshape(tensor, [-1]) + + def _unflatten_sparse_tensor(sp_tensor): + """Recreates `[batch_size, num_unroll]` dimensions in the `SparseTensor`. + + Counter-part of `_flatten_tensor` which is called on the input of + `_restore_sparse` while this method is called on the output of it. + Together they work around the limitation of `_restore_sparse` to only + accept 1D handles. + + The `indices` in `sp_tensor` is a 2D `Tensor` of `shape [N, ndims]`, where + `N` is the number of `values` and `ndims` is the number of dimension in its + dense counterpart. Among `ndims` the first entry corresponds to the batch + dimension `[0, num_unroll * batch_size)` from which we need to recreate the + 2 dimensions `batch_size` and `num_unroll`. + + The reason this reconstruction works is because the output of + `_restore_sparse` despite being a `SparseTensor` is actually dense w.r.t. + that first entry. + + Args: + sp_tensor: A SparseTensor. + Returns: + A SparseTensor with a +1 higher rank than the input. + """ + idx_batch = math_ops.to_int64(math_ops.floor(s.indices[:, 0] / num_unroll)) + idx_time = math_ops.mod(s.indices[:, 0], num_unroll) + indices = array_ops.concat_v2([array_ops.expand_dims(idx_batch, 1), + array_ops.expand_dims(idx_time, 1), + s.indices[:, 1:]], axis=1) + dense_shape = array_ops.concat_v2( + [[batch_size], [num_unroll], s.dense_shape[1:]], axis=0) + return sparse_tensor.SparseTensor( + indices=indices, + values=sp_tensor.values, + dense_shape=dense_shape) + + if not sparse_tensor_keys: + return + tensor_list = [sequence[k] for k in sparse_tensor_keys] + sp_tensors = [ + _restore_sparse(sparse_map_op=i, + # Flatten the 2D Tensor [batch_size, num_unroll] of + # handles to a 1D Tensor. + # Reconstruct the dimensions later. + # TODO(b/34247140): Remove this workaround. + sparse_handles=_flatten_tensor(s), rank=None) + for i, s in zip(tensor_op_list, tensor_list)] + num_unroll = ops.convert_to_tensor(num_unroll, dtype=dtypes.int64, + name="num_unroll_int64") + + # Recreate the [batch_size, num_unroll] dimensions in the SparseTensors. + # The dense_shape will have a +1 higher rank. + # TODO(b/34247140): Remove this workaround. + sp_tensors_higher_dim = [_unflatten_sparse_tensor(s) for s in sp_tensors] + + # Set values to SparseTensors for sparse_tensor_keys. + for i, key in enumerate(sparse_tensor_keys): + sequence[key] = sp_tensors_higher_dim[i] + return diff --git a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc index cb1e7577cf..096ca0f0cf 100644 --- a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc +++ b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc @@ -52,7 +52,7 @@ TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) { test::FillFn<float>(&test_tensor2, [](int) -> float { return 3.0; }); auto root = Scope::NewRootScope().ExitOnError(); - ops::Output m = ops::MatMul(root, test_tensor1, test_tensor2); + Output m = ops::MatMul(root, test_tensor1, test_tensor2); const string result_name = m.node()->name(); GraphDef graph_def; @@ -103,7 +103,7 @@ TEST(ConvertGraphdefMemmappedFormatTest, NotSupportedTypesConvert) { Tensor test_tensor2(DT_STRING, kTestTensorShape); test::FillFn<string>(&test_tensor2, [](int) -> string { return "XYZ"; }); auto root = Scope::NewRootScope().ExitOnError(); - ops::Output m = ops::Add(root, test_tensor1, test_tensor2); + Output m = ops::Add(root, test_tensor1, test_tensor2); const string result_name = m.node()->name(); GraphDef graph_def; diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c27cc48805..72268c8824 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -258,6 +258,7 @@ cc_library( "platform/net.h", "platform/notification.h", "platform/prefetch.h", + "platform/profile_utils/clock_cycle_profiler.h", "platform/profile_utils/cpu_utils.h", "platform/protobuf.h", "platform/stacktrace.h", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 85ce9d772a..38eb283b10 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -739,8 +739,7 @@ Status DirectSession::SendInputs(const NamedTensorList& inputs, for (const auto& input : inputs) { auto it = executors_and_keys->input_keys.find(input.first); if (it == executors_and_keys->input_keys.end()) { - return errors::InvalidArgument("'", input.first, - "' is not a pre-defined feed!"); + return errors::Internal("'", input.first, "' is not a pre-defined feed."); } const string& input_key = it->second; @@ -775,9 +774,8 @@ Status DirectSession::RecvOutputs(const std::vector<string>& output_names, const string& output_name = output_names[output_offset]; auto it = executors_and_keys->output_keys.find(output_name); if (it == executors_and_keys->output_keys.end()) { - return errors::InvalidArgument("'", output_name, - "' was not defined as a fetch" - " target in PRunSetup."); + return errors::Internal("'", output_name, + "' is not a pre-defined fetch."); } const string& output_key = it->second; Tensor output_tensor; diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc index 3aaaf87e79..b186c9d88c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc @@ -107,7 +107,7 @@ TEST_F(GpuStreamUtilTest, StreamOverrides) { auto root = Scope::NewRootScope().ExitOnError(); ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0, "/gpu:0"); - ops::Output n = ops::MatMul(root, {}, {}); + Output n = ops::MatMul(root, {}, {}); ops::_Send(root.WithOpName("output"), n, "output", "/gpu:0", 0, "/cpu:0"); Graph g(OpRegistry::Global()); TF_ASSERT_OK(root.ToGraph(&g)); diff --git a/tensorflow/core/common_runtime/graph_optimizer.cc b/tensorflow/core/common_runtime/graph_optimizer.cc index cd4bf579c9..e2be3a6086 100644 --- a/tensorflow/core/common_runtime/graph_optimizer.cc +++ b/tensorflow/core/common_runtime/graph_optimizer.cc @@ -18,131 +18,10 @@ limitations under the License. #include "tensorflow/core/common_runtime/constant_folding.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/node_builder.h" #include "tensorflow/core/graph/optimizer_cse.h" namespace tensorflow { -namespace { - -// Replaces occurrences of parallel_concat with the implementation based on -// unsafe ops. Sets removed_any to true if any parallel_concats were removed; -// leaves it untouched otherwise. -// TODO(apassos) Use NodeBuilder. -Status RemoveParallelConcat(bool* removed_any, Graph* g) { - gtl::InlinedVector<Node*, 2> matches; - for (Node* n : g->nodes()) { - if (n->type_string() == "ParallelConcat") { - matches.push_back(n); - } - } - for (Node* n : matches) { - AttrSlice n_attrs(n->def()); - auto make_node = [n, g, &n_attrs](string op) { - NodeDef node; - node.set_op(op); - node.set_name(g->NewName(n->name())); - node.set_device(n->def().device()); - string colo; - if (GetNodeAttr(n_attrs, "_class", &colo).ok()) { - AddNodeAttr("_class", colo, &node); - } - return node; - }; - DataType dtype; - TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "T", &dtype)); - TensorShapeProto shape; - TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "shape", &shape)); - // Add the constant shape input to the start node. - NodeDef shape_node_def = make_node("Const"); - AddNodeAttr("dtype", DT_INT32, &shape_node_def); - TensorProto shape_tensor; - shape_tensor.set_dtype(DT_INT32); - shape_tensor.mutable_tensor_shape()->add_dim()->set_size(shape.dim_size()); - for (int i = 0; i < shape.dim_size(); ++i) { - shape_tensor.add_int_val(shape.dim(i).size()); - } - AddNodeAttr("value", shape_tensor, &shape_node_def); - Status status = Status::OK(); - Node* shape_node = g->AddNode(shape_node_def, &status); - if (!status.ok()) return status; - - // Add the start node - NodeDef start_def = make_node("_ParallelConcatStart"); - AddNodeAttr("dtype", dtype, &start_def); - AddNodeAttr("Tshape", DT_INT32, &start_def); - AddNodeAttr("init", false, &start_def); - start_def.add_input(shape_node_def.name()); - Node* start = g->AddNode(start_def, &status); - if (!status.ok()) return status; - // TODO(apassos): make the shape an attr of _ParallelStackBegin. - g->AddEdge(shape_node, 0, start, 0); - - // Add all the inplace_updates. - std::vector<string> control_dependencies; - std::vector<Node*> control_nodes; - int i = 0; - for (const Edge* input_edge : n->in_edges()) { - if (input_edge->IsControlEdge()) { - g->AddControlEdge(input_edge->src(), start); - continue; - } - // Constant index for the update node. - // TODO(apassos): make _ParallelStackUpdate take this as an attr. - NodeDef update_idx_def = make_node("Const"); - AddNodeAttr("dtype", DT_INT64, &update_idx_def); - TensorProto index_tensor; - index_tensor.set_dtype(DT_INT64); - index_tensor.mutable_tensor_shape()->add_dim()->set_size(1); - index_tensor.add_int64_val(i); - AddNodeAttr("value", index_tensor, &update_idx_def); - Node* index = g->AddNode(update_idx_def, &status); - if (!status.ok()) return status; - - NodeDef update_def = make_node("_ParallelConcatUpdate"); - control_dependencies.push_back(update_def.name()); - AddNodeAttr("T", dtype, &update_def); - AddNodeAttr("Tshape", DT_INT64, &update_def); - update_def.add_input(start_def.name()); - update_def.add_input(update_idx_def.name()); - update_def.add_input(strings::StrCat(input_edge->src()->name(), ":", - input_edge->src_output())); - Node* update = g->AddNode(update_def, &status); - if (!status.ok()) return status; - g->AddEdge(start, 0, update, 0); - g->AddEdge(index, 0, update, 1); - g->AddEdge(input_edge->src(), input_edge->src_output(), update, 2); - control_nodes.push_back(update); - - ++i; - } - - // Add the final identity. - NodeDef identity_def = make_node("Identity"); - AddNodeAttr("T", dtype, &identity_def); - identity_def.add_input(start_def.name()); - for (const string& s : control_dependencies) { - identity_def.add_input(strings::StrCat("^", s)); - } - Node* identity_node = g->AddNode(identity_def, &status); - if (!status.ok()) return status; - g->AddEdge(start, 0, identity_node, 0); - for (Node* inp : control_nodes) { - g->AddControlEdge(inp, identity_node); - } - - // Remove the node and redirect edges. - for (auto* e : n->out_edges()) { - if (e->IsControlEdge()) { - g->AddControlEdge(identity_node, e->dst()); - } else { - g->AddEdge(identity_node, 0, e->dst(), e->dst_input()); - } - } - g->RemoveNode(n); - *removed_any = true; - } - return Status::OK(); -} -} GraphOptimizer::GraphOptimizer(const OptimizerOptions& opts) : opts_(opts) { if (opts_.opt_level() >= OptimizerOptions::L1) { @@ -166,11 +45,6 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env, DumpGraph("RemoveListArrayConverter", g); changed = true; } - auto s = RemoveParallelConcat(&changed, g); - if (!s.ok()) { - // TODO(apassos): figure out how to halt here. - LOG(WARNING) << s; - } if (opts_.do_function_inlining() && RemoveDeadNodes(g)) { DumpGraph("RemoveDeadNodes", g); changed = true; diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc new file mode 100644 index 0000000000..ffbfbc74f1 --- /dev/null +++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc @@ -0,0 +1,126 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/graph_optimizer.h" + +#include "tensorflow/core/common_runtime/constant_folding.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/core/graph/optimizer_cse.h" + +namespace tensorflow { +namespace { + +// Replaces occurrences of parallel_concat with the implementation based on +// unsafe ops. Sets removed_any to true if any parallel_concats were removed; +// leaves it untouched otherwise. +class ParallelConcatRemovePass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override { + if (options.graph == nullptr) { + // TODO(apassos) returning OK feels weird here as we can't do anything + // without a graph, but some tests require this. + return Status::OK(); + } + Graph* g = options.graph->get(); + if (g == nullptr) { + return errors::Internal( + "Parallel concat removal should happen before partitioning and a " + "graph should be available."); + } + gtl::InlinedVector<Node*, 2> matches; + for (Node* n : g->nodes()) { + if (n->type_string() == "ParallelConcat") { + matches.push_back(n); + } + } + for (Node* n : matches) { + AttrSlice n_attrs(n->def()); + auto base_make_node = [n, g, &n_attrs](const string& op, + const string& name) { + NodeBuilder node_builder(name, op); + node_builder.Device(n->def().device()); + string colo; + if (GetNodeAttr(n_attrs, "_class", &colo).ok()) { + node_builder.Attr("_class", colo); + } + return node_builder; + }; + auto make_node = [n, g, &n_attrs, &base_make_node](string op) { + return base_make_node( + op, g->NewName(strings::StrCat(n->name(), "/Internal"))); + }; + DataType dtype; + TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "T", &dtype)); + TensorShapeProto shape; + TF_RETURN_IF_ERROR(GetNodeAttr(n_attrs, "shape", &shape)); + + // Add the start node + Node* start; + TF_RETURN_IF_ERROR(make_node("_ParallelConcatStart") + .Attr("shape", shape) + .Attr("dtype", dtype) + .Finalize(g, &start)); + + // Add all the inplace_updates. + std::vector<Node*> control_nodes; + int64 i = 0; + for (const Edge* input_edge : n->in_edges()) { + if (input_edge->IsControlEdge()) { + g->AddControlEdge(input_edge->src(), start); + continue; + } + + Node* update; + TF_RETURN_IF_ERROR( + make_node("_ParallelConcatUpdate") + .Attr("loc", i) + .Input(start) + .Input(input_edge->src(), input_edge->src_output()) + .Finalize(g, &update)); + control_nodes.push_back(update); + + ++i; + } + + // Add the final identity. + NodeBuilder identity_def = base_make_node("Identity", n->name()); + identity_def.Input(start, 0); + for (Node* s : control_nodes) { + identity_def.ControlInput(s); + } + Node* identity_node; + TF_RETURN_IF_ERROR(identity_def.Finalize(g, &identity_node)); + + // Remove the node and redirect edges. + for (auto* e : n->out_edges()) { + if (e->IsControlEdge()) { + g->AddControlEdge(identity_node, e->dst()); + } else { + g->AddEdge(identity_node, 0, e->dst(), e->dst_input()); + } + } + g->RemoveNode(n); + } + return Status::OK(); + } +}; +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 0, + ParallelConcatRemovePass); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc index 420594d98a..f7d5a9cfc9 100644 --- a/tensorflow/core/common_runtime/shape_refiner_test.cc +++ b/tensorflow/core/common_runtime/shape_refiner_test.cc @@ -492,7 +492,7 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_Shape) { TF_ASSERT_OK( NodeBuilder("in", pass == 0 ? "WithPartialShape" : "WithUnknownShape") .Finalize(root.graph(), &input)); - auto shape = ops::Shape(root, ops::Output(input)); + auto shape = ops::Shape(root, Output(input)); Node* result; TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") .Input(shape.node()) @@ -518,12 +518,13 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt32) { TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32") .Finalize(root.graph(), &scalar_non_const)); - ops::InputList inputs{ - ops::Input(ops::Const<int32>(root, 10)), - ops::Input(ops::Const<int32>(root, 20)), - ops::Input(ops::Output(scalar_non_const)), - ops::Input(ops::Const<int32>(root, 40)), - }; + InputList inputs{ + // clang-format off + Input(ops::Const<int32>(root, 10)), + Input(ops::Const<int32>(root, 20)), + Input(Output(scalar_non_const)), + Input(ops::Const<int32>(root, 40)), + }; // clang-format on auto pack = ops::Pack(root, inputs); TF_ASSERT_OK(root.status()); @@ -549,12 +550,13 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt64) { TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt64") .Finalize(root.graph(), &scalar_non_const)); - ops::InputList inputs{ - ops::Input(ops::Const<int64>(root, 10LL)), - ops::Input(ops::Const<int64>(root, 20LL)), - ops::Input(ops::Output(scalar_non_const)), - ops::Input(ops::Const<int64>(root, 1LL << 40)), - }; + InputList inputs{ + // clang-format off + Input(ops::Const<int64>(root, 10LL)), + Input(ops::Const<int64>(root, 20LL)), + Input(Output(scalar_non_const)), + Input(ops::Const<int64>(root, 1LL << 40)), + }; // clang-format on auto pack = ops::Pack(root, inputs); TF_ASSERT_OK(root.status()); @@ -577,9 +579,9 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt64) { TEST(ShapeRefinerTest, ConstantValueAsShape_PackUnknownDim) { Scope root = Scope::NewRootScope(); - ops::InputList inputs{ - ops::Input(ops::Const<int64>(root, 10LL)), - ops::Input(ops::Const<int64>(root, -1LL)), + InputList inputs{ + Input(ops::Const<int64>(root, 10LL)), + Input(ops::Const<int64>(root, -1LL)), }; auto pack = ops::Pack(root, inputs); TF_ASSERT_OK(root.status()); @@ -604,9 +606,9 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_PackInvalidInput) { Scope root = Scope::NewRootScope(); // Inputs are length 2 vectors instead of scalars. - ops::InputList inputs{ - ops::Input(ops::Const<int64>(root, {10LL, 20LL})), - ops::Input(ops::Const<int64>(root, {10LL, 21LL})), + InputList inputs{ + Input(ops::Const<int64>(root, {10LL, 20LL})), + Input(ops::Const<int64>(root, {10LL, 21LL})), }; auto pack = ops::Pack(root, inputs); TF_ASSERT_OK(root.status()); @@ -633,10 +635,12 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_Concat) { TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1)); TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2)); auto const_input = ops::Const(root, {9, 10, 11}); - ops::OutputList concat_inputs{ - ops::Shape(root, ops::Output(partial_1)), - ops::Shape(root, ops::Output(partial_2)), const_input, - }; + OutputList concat_inputs{ + // clang-format off + ops::Shape(root, Output(partial_1)), + ops::Shape(root, Output(partial_2)), + const_input, + }; // clang-format on auto concat_dim = ops::Const(root, 0); auto concat = ops::Concat(root, concat_dim, concat_inputs); TF_ASSERT_OK(root.status()); @@ -673,11 +677,12 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatWithUnknown) { TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1)); TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2)); TF_ASSERT_OK(NodeBuilder("in", "WithUnknownShape").Finalize(g, &unknown)); - ops::OutputList concat_inputs{ - ops::Shape(root, ops::Output(partial_1)), - ops::Shape(root, ops::Output(partial_2)), - ops::Shape(root, ops::Output(unknown)), - }; + OutputList concat_inputs{ + // clang-format off + ops::Shape(root, Output(partial_1)), + ops::Shape(root, Output(partial_2)), + ops::Shape(root, Output(unknown)), + }; // clang-format on auto concat_dim = ops::Const(root, 0); auto concat = ops::Concat(root, concat_dim, concat_inputs); TF_ASSERT_OK(root.status()); @@ -714,11 +719,12 @@ TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatInvalidDimValue) { TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1)); TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2)); auto const_input = ops::Const(root, {9, -2, 11}); - ops::OutputList concat_inputs{ - ops::Shape(root, ops::Output(partial_1)), - ops::Shape(root, ops::Output(partial_2)), // + OutputList concat_inputs{ + // clang-format off + ops::Shape(root, Output(partial_1)), + ops::Shape(root, Output(partial_2)), const_input, - }; + }; // clang-format on auto concat_dim = ops::Const(root, 0); auto concat = ops::Concat(root, concat_dim, concat_inputs); TF_ASSERT_OK(root.status()); diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 73018ec258..e267414654 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -392,7 +392,7 @@ tf_cuda_cc_test( name = "rpcbench_test", size = "small", srcs = ["rpcbench_test.cc"], - linkstatic = tf_kernel_tests_linkstatic(), + linkstatic = 1, tags = tf_cuda_tests_tags(), deps = [ "//tensorflow/cc:cc_ops", diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index d155051273..44646e9241 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -67,6 +67,7 @@ class GraphConstructor { : in.prefix + "/"), input_map(in.input_map), control_dependencies(in.control_dependencies), + return_tensors(in.return_tensors), importing(true) {} bool allow_internal_ops; @@ -75,6 +76,7 @@ class GraphConstructor { string prefix; std::map<TensorId, TensorId> input_map; std::vector<string> control_dependencies; + std::vector<TensorId> return_tensors; // TODO(ashankar): This bool exists to separate out functionality required // to make ImportGraphDef a close equivalent of Python's import_graph_def @@ -88,11 +90,12 @@ class GraphConstructor { }; static Status Construct(const Options& opts, const GraphDef* gdef, Graph* g, - ShapeRefiner* refiner) { + ShapeRefiner* refiner, + std::vector<std::pair<Node*, int>>* return_tensors) { TF_RETURN_IF_ERROR(CheckVersions(gdef->versions(), TF_GRAPH_DEF_VERSION, TF_GRAPH_DEF_VERSION_MIN_PRODUCER, "GraphDef", "graph")); - GraphConstructor c(opts, gdef, g, refiner); + GraphConstructor c(opts, gdef, g, refiner, return_tensors); const Status s = c.TryImport(); if (!s.ok()) c.Undo(); return s; @@ -100,12 +103,14 @@ class GraphConstructor { private: GraphConstructor(const Options& opts, const GraphDef* gdef, Graph* g, - ShapeRefiner* refiner) + ShapeRefiner* refiner, + std::vector<std::pair<Node*, int>>* return_tensors) : opts_(opts), gdef_(gdef), g_(g), original_versions_(g->versions()), - refiner_(refiner) {} + refiner_(refiner), + return_tensors_(return_tensors) {} Status TryImport() { TF_RETURN_IF_ERROR(EnsureNoNameCollisions()); @@ -115,6 +120,7 @@ class GraphConstructor { TF_RETURN_IF_ERROR(Convert()); TF_RETURN_IF_ERROR(AddBackEdges()); TF_RETURN_IF_ERROR(UpdateVersionDef()); + TF_RETURN_IF_ERROR(PopulateReturnTensors()); FixupSourceAndSinkEdges(g_); return Status::OK(); } @@ -126,6 +132,7 @@ class GraphConstructor { Status Convert(); Status AddBackEdges(); Status UpdateVersionDef(); + Status PopulateReturnTensors(); void Undo(); @@ -156,6 +163,9 @@ class GraphConstructor { ShapeRefiner* refiner_; + // May be null. Not owned. + std::vector<std::pair<Node*, int>>* return_tensors_; + // Mapping from node name to the index within gdef_ struct NodeInfo { explicit NodeInfo(int i) : gdef_index(i), node(nullptr) {} @@ -752,6 +762,36 @@ Status GraphConstructor::UpdateVersionDef() { return Status::OK(); } +Status GraphConstructor::PopulateReturnTensors() { + if (opts_.return_tensors.empty()) return Status::OK(); + for (const TensorId& id : opts_.return_tensors) { + auto iter = opts_.input_map.find(id); + if (iter == opts_.input_map.end()) { + // Locate id in imported nodes + auto iter = gdef_nodes_.find(id.first); + if (iter == gdef_nodes_.end()) { + return errors::InvalidArgument( + "Requested return node '", id.first, "' not found in graph def"); + } + int num_outputs = iter->second.node->num_outputs(); + if ((id.second < 0 || id.second >= num_outputs) && + id.second != Graph::kControlSlot) { + return errors::InvalidArgument( + "Invalid return output ", id.second, " of node '", id.first, + "', which has ", num_outputs, " outputs"); + } + return_tensors_->push_back({iter->second.node, id.second}); + } else { + // id was remapped to existing node + TensorId remapped_id = iter->second; + DCHECK_GT(existing_nodes_.count(remapped_id.first), 0); + Node* node = existing_nodes_[remapped_id.first]; + return_tensors_->push_back({node, remapped_id.second}); + } + } + return Status::OK(); +} + void GraphConstructor::Undo() { for (const auto& iter : gdef_nodes_) { if (iter.second.node != nullptr) { @@ -780,16 +820,30 @@ Status GraphConstructor::MakeEdge(Node* src, int output_index, Node* dst, Status ConvertGraphDefToGraph(const GraphConstructorOptions& opts, const GraphDef& gdef, Graph* g) { ShapeRefiner refiner(g->op_registry()); - return GraphConstructor::Construct(opts, &gdef, g, &refiner); + return GraphConstructor::Construct(opts, &gdef, g, &refiner, nullptr); } Status ImportGraphDef(const ImportGraphDefOptions& opts, const GraphDef& gdef, - Graph* g, ShapeRefiner* refiner) { + Graph* g, ShapeRefiner* refiner, + std::vector<std::pair<Node*, int>>* return_tensors) { ShapeRefiner default_refiner(g->op_registry()); if (refiner == nullptr) { refiner = &default_refiner; } - return GraphConstructor::Construct(opts, &gdef, g, refiner); + + if (!opts.return_tensors.empty()) { + if (return_tensors == nullptr) { + return errors::InvalidArgument( + "return_tensors argument to ImportNodeDef() must be non-null if " + "opts.return_tensors is non-empty"); + } + if (!return_tensors->empty()) { + return errors::InvalidArgument( + "return_tensors argument to ImportNodeDef() should be empty (has " + "size ", return_tensors->size(), ")"); + } + } + return GraphConstructor::Construct(opts, &gdef, g, refiner, return_tensors); } void CopyGraph(const Graph& src, Graph* dest) { diff --git a/tensorflow/core/graph/graph_constructor.h b/tensorflow/core/graph/graph_constructor.h index 61704913c3..186859d132 100644 --- a/tensorflow/core/graph/graph_constructor.h +++ b/tensorflow/core/graph/graph_constructor.h @@ -97,14 +97,31 @@ struct ImportGraphDefOptions { // other nodes in `gdef`. std::vector<string> control_dependencies; + // Tensors in `gdef` that will be returned via the `return_tensors` output + // parameter of `ImportGraphDef()`. If this list is non-empty, the caller must + // pass an empty vector to `ImportGraphDef()`. The vector will be populated + // with the imported nodes in `g`. + // + // Entries should not include `prefix`, i.e., each TensorId's name should be + // the name as it originally appears in `gdef`. + // + // If this contains a tensor that's also being remapped via `input_map`, the + // corresponding existing tensor in `g` will be returned. + std::vector<TensorId> return_tensors; + // TODO(ashankar): Enable handling of GraphDefs produced by newer binaries // with ops that are not defined in the binary calling ImportGraphDef. // Similar to the producer_op_list argument to import_graph_def in the // python API. }; -extern Status ImportGraphDef(const ImportGraphDefOptions& opts, - const GraphDef& gdef, Graph* g, - ShapeRefiner* refiner); + +// Each `return_tensors` entry is the requested node and output index. The index +// is included in case the returned tensor has been remapped according to +// `input_map`. +extern Status ImportGraphDef( + const ImportGraphDefOptions& opts, const GraphDef& gdef, Graph* g, + ShapeRefiner* refiner, + std::vector<std::pair<Node*, int>>* return_tensors = nullptr); // Make a copy of "src" into "*dest". // diff --git a/tensorflow/core/graph/graph_constructor_test.cc b/tensorflow/core/graph/graph_constructor_test.cc index a173d3a627..9ce7a0fdf8 100644 --- a/tensorflow/core/graph/graph_constructor_test.cc +++ b/tensorflow/core/graph/graph_constructor_test.cc @@ -65,14 +65,17 @@ class GraphConstructorTest : public ::testing::Test { EXPECT_EQ(original_graph_description, GraphDebugString()); } - void ExpectError(const string& gdef_ascii, const ImportGraphDefOptions& opts, - const std::vector<string>& expected_error_strs, - ShapeRefiner* refiner = nullptr) { + void ExpectError( + const string& gdef_ascii, const ImportGraphDefOptions& opts, + const std::vector<string>& expected_error_strs, + ShapeRefiner* refiner = nullptr, + std::vector<std::pair<Node*, int>>* return_tensors = nullptr) { // Used to verify that errors don't change graph const string original_graph_description = GraphDebugString(); Convert(gdef_ascii); - Status status = ImportGraphDef(opts, gdef_, &graph_, refiner); + Status status = + ImportGraphDef(opts, gdef_, &graph_, refiner, return_tensors); EXPECT_FALSE(status.ok()); for (const string& error : expected_error_strs) { @@ -90,9 +93,10 @@ class GraphConstructorTest : public ::testing::Test { } void ExpectOK(const string& gdef_ascii, const ImportGraphDefOptions& opts, - ShapeRefiner* refiner = nullptr) { + ShapeRefiner* refiner = nullptr, + std::vector<std::pair<Node*, int>>* return_tensors = nullptr) { Convert(gdef_ascii); - Status s = ImportGraphDef(opts, gdef_, &graph_, refiner); + Status s = ImportGraphDef(opts, gdef_, &graph_, refiner, return_tensors); EXPECT_EQ(Status::OK(), s) << s; } @@ -981,6 +985,104 @@ TEST_F(GraphConstructorTest, ImportGraphDef_InputMapDuplicateNodeNames) { &refiner); } +TEST_F(GraphConstructorTest, ImportGraphDef_ReturnTensors) { + ShapeRefiner refiner(graph_.op_registry()); + + ImportGraphDefOptions opts; + opts.return_tensors.push_back({"input", 1}); + opts.return_tensors.push_back({"t1", 0}); + opts.return_tensors.push_back({"input", 0}); + std::vector<std::pair<Node*, int>> return_tensors; + ExpectOK( + "node { name: 'input' op: 'TestInput' }" + "node { name: 't1' op: 'TestMul' input: ['input:0', 'input:1'] }", + opts, &refiner, &return_tensors); + + // Sanity checks + EXPECT_TRUE(HasNode("input")); + EXPECT_TRUE(HasNode("t1")); + EXPECT_TRUE(HasEdge("input", 0, "t1", 0)); + EXPECT_TRUE(HasEdge("input", 1, "t1", 1)); + + // Check return tensors + ASSERT_EQ(return_tensors.size(), 3); + EXPECT_EQ(return_tensors[0].first->name(), "input"); + EXPECT_EQ(return_tensors[0].second, 1); + EXPECT_EQ(return_tensors[1].first->name(), "t1"); + EXPECT_EQ(return_tensors[1].second, 0); + EXPECT_EQ(return_tensors[2].first->name(), "input"); + EXPECT_EQ(return_tensors[2].second, 0); + + // Test using prefix and returning element from input_map + opts.return_tensors.clear(); + return_tensors.clear(); + opts.prefix = "import"; + opts.input_map[{"new_input", 1}] = {"input", 0}; + opts.return_tensors.push_back({"new_input", 0}); + opts.return_tensors.push_back({"new_input", 1}); + ExpectOK("node { name: 'new_input' op: 'TestInput' }", opts, &refiner, + &return_tensors); + + EXPECT_TRUE(HasNode("import/new_input")); + + ASSERT_EQ(return_tensors.size(), 2); + EXPECT_EQ(return_tensors[0].first->name(), "import/new_input"); + EXPECT_EQ(return_tensors[0].second, 0); + EXPECT_EQ(return_tensors[1].first->name(), "input"); + EXPECT_EQ(return_tensors[1].second, 0); + + // Test returning node remapped to source node + opts.prefix.clear(); + opts.input_map.clear(); + opts.return_tensors.clear(); + return_tensors.clear(); + opts.input_map[{"new_input", 0}] = {"_SOURCE", 0}; + opts.return_tensors.push_back({"new_input", 0}); + ExpectOK("node { name: 'new_input' op: 'TestInput' }", opts, &refiner, + &return_tensors); + + EXPECT_TRUE(HasNode("new_input")); + + ASSERT_EQ(return_tensors.size(), 1); + EXPECT_EQ(return_tensors[0].first->name(), "_SOURCE"); + EXPECT_EQ(return_tensors[0].second, 0); +} + +TEST_F(GraphConstructorTest, ImportGraphDef_ReturnTensorsErrors) { + // Passing in return_tensors with empty opts.return_tensors is OK + ImportGraphDefOptions opts; + std::vector<std::pair<Node*, int>> return_tensors; + ExpectOK("node { name: 'input' op: 'TestInput' }", opts, nullptr, + &return_tensors); + + // Null return_tensors with non-empty opts.return_tensors + opts.return_tensors.push_back({"new_input", 0}); + ExpectError("node { name: 'new_input' op: 'TestInput' }", opts, + {"return_tensors argument to ImportNodeDef() must be non-null " + "if opts.return_tensors is non-empty"}); + + // Non-empty return_tensors + return_tensors.push_back({nullptr, 0}); + ExpectError("node { name: 'new_input' op: 'TestInput' }", opts, + {"return_tensors argument to ImportNodeDef() should be empty " + "(has size 1)"}, + nullptr, &return_tensors); + + // Requesting tensor that isn't in graph def + return_tensors.clear(); + ExpectError("node { name: 'W1' op: 'TestParams' }", opts, + {"Requested return node 'new_input' not found in graph def"}, + nullptr, &return_tensors); + + // Requesting invalid node index + opts.return_tensors.clear(); + opts.return_tensors.push_back({"new_input", 2}); + ExpectError("node { name: 'new_input' op: 'TestInput' }", opts, + {"Invalid return output 2 of node 'new_input', which has 2 " + "outputs"}, + nullptr, &return_tensors); +} + TEST_F(GraphConstructorTest, ImportGraphDef_WithCycle) { // Test graph produced in python using: /* diff --git a/tensorflow/core/graph/graph_partition_test.cc b/tensorflow/core/graph/graph_partition_test.cc index d8322e6077..6d3dbc0abb 100644 --- a/tensorflow/core/graph/graph_partition_test.cc +++ b/tensorflow/core/graph/graph_partition_test.cc @@ -128,13 +128,13 @@ void CheckLoopConstruction(const GraphDef& graph_def) { } } -REGISTER_OP("Input").Output("o: float"); +REGISTER_OP("FloatInput").Output("o: float"); REGISTER_OP("BoolInput").Output("o: bool"); REGISTER_OP("Combine").Input("a: float").Input("b: float").Output("o: float"); -ops::Output ConstructOp(const Scope& scope, const string& op_type, - const gtl::ArraySlice<ops::Input>& inputs) { - if (!scope.ok()) return ops::Output(); +Output ConstructOp(const Scope& scope, const string& op_type, + const gtl::ArraySlice<Input>& inputs) { + if (!scope.ok()) return Output(); const string unique_name = scope.GetUniqueNameForOp(op_type); auto builder = NodeBuilder(unique_name, op_type); for (auto const& input : inputs) { @@ -143,19 +143,19 @@ ops::Output ConstructOp(const Scope& scope, const string& op_type, scope.UpdateBuilder(&builder); Node* ret; scope.UpdateStatus(builder.Finalize(scope.graph(), &ret)); - if (!scope.ok()) return ops::Output(); - return ops::Output(ret); + if (!scope.ok()) return Output(); + return Output(ret); } -ops::Output Input(const Scope& scope) { - return ConstructOp(scope, "Input", {}); +Output FloatInput(const Scope& scope) { + return ConstructOp(scope, "FloatInput", {}); } -ops::Output BoolInput(const Scope& scope) { +Output BoolInput(const Scope& scope) { return ConstructOp(scope, "BoolInput", {}); } -ops::Output Combine(const Scope& scope, ops::Input a, ops::Input b) { +Output Combine(const Scope& scope, Input a, Input b) { return ConstructOp(scope, "Combine", {a, b}); } @@ -196,21 +196,21 @@ class GraphPartitionTest : public ::testing::Test { TEST_F(GraphPartitionTest, SingleDevice) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto a1 = Input(in_.WithOpName("A1")); + auto a1 = FloatInput(in_.WithOpName("A1")); Combine(in_.WithOpName("A2"), a1, a1); Partition(ToGraphDef(), &partitions_); EXPECT_EQ(1, partitions_.size()); - a1 = Input(scope_a_.WithOpName("A1")); + a1 = FloatInput(scope_a_.WithOpName("A1")); Combine(scope_a_.WithOpName("A2"), a1, a1); ExpectMatchA(); } TEST_F(GraphPartitionTest, CrossDeviceData) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto a1 = Input(in_.WithOpName("A1")); - auto b1 = Input(in_.WithOpName("B1")); + auto a1 = FloatInput(in_.WithOpName("A1")); + auto b1 = FloatInput(in_.WithOpName("B1")); Combine(in_.WithOpName("B2"), a1, b1); Partition(ToGraphDef(), &partitions_); @@ -218,11 +218,11 @@ TEST_F(GraphPartitionTest, CrossDeviceData) { string a = "/job:a/replica:0/task:0/cpu:0"; string b = "/job:a/replica:0/task:0/cpu:1"; - a1 = Input(scope_a_.WithOpName("A1")); + a1 = FloatInput(scope_a_.WithOpName("A1")); _Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b); ExpectMatchA(); - b1 = Input(scope_b_.WithOpName("B1")); + b1 = FloatInput(scope_b_.WithOpName("B1")); auto recv = _Recv(scope_b_.WithOpName("A1/_1"), DT_FLOAT, "edge_1_A1", a, 82, b); Combine(scope_b_.WithOpName("B2"), recv, b1); @@ -231,8 +231,8 @@ TEST_F(GraphPartitionTest, CrossDeviceData) { TEST_F(GraphPartitionTest, CrossDeviceControl) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto a1 = Input(in_.WithOpName("A1")); - auto b1 = Input(in_.WithOpName("B1")); + auto a1 = FloatInput(in_.WithOpName("A1")); + auto b1 = FloatInput(in_.WithOpName("B1")); Combine(in_.WithOpName("B2").WithControlDependencies(a1), b1, b1); Partition(ToGraphDef(), &partitions_); @@ -240,7 +240,7 @@ TEST_F(GraphPartitionTest, CrossDeviceControl) { string a = "/job:a/replica:0/task:0/cpu:0"; string b = "/job:a/replica:0/task:0/cpu:1"; - a1 = Input(scope_a_.WithOpName("A1")); + a1 = FloatInput(scope_a_.WithOpName("A1")); auto c = Const(scope_a_.WithOpName("A1/_0").WithControlDependencies(a1), {}); _Send(scope_a_.WithOpName("A1/_1"), c, "edge_3_A1", a, 82, b); ExpectMatchA(); @@ -248,15 +248,15 @@ TEST_F(GraphPartitionTest, CrossDeviceControl) { auto recv = _Recv(scope_b_.WithOpName("A1/_2"), DT_FLOAT, "edge_3_A1", a, 82, b); auto id = Identity(scope_b_.WithOpName("A1/_3"), recv); - b1 = Input(scope_b_.WithOpName("B1")); + b1 = FloatInput(scope_b_.WithOpName("B1")); Combine(scope_b_.WithOpName("B2").WithControlDependencies(id), b1, b1); ExpectMatchB(); } TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto a1 = Input(in_.WithOpName("A1")); - auto b1 = Input(in_.WithOpName("B1")); + auto a1 = FloatInput(in_.WithOpName("A1")); + auto b1 = FloatInput(in_.WithOpName("B1")); Combine(in_.WithOpName("B2"), a1, b1); Combine(in_.WithOpName("B3"), a1, a1); @@ -265,13 +265,13 @@ TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) { string a = "/job:a/replica:0/task:0/cpu:0"; string b = "/job:a/replica:0/task:0/cpu:1"; - a1 = Input(scope_a_.WithOpName("A1")); + a1 = FloatInput(scope_a_.WithOpName("A1")); _Send(scope_a_.WithOpName("A1/_0"), a1, "edge_1_A1", a, 82, b); ExpectMatchA(); auto recv = _Recv(scope_b_.WithOpName("A1/_1"), DT_FLOAT, "edge_1_A1", a, 82, b); - b1 = Input(scope_b_.WithOpName("B1")); + b1 = FloatInput(scope_b_.WithOpName("B1")); Combine(scope_b_.WithOpName("B2"), recv, b1); Combine(scope_b_.WithOpName("B3"), recv, recv); ExpectMatchB(); @@ -279,17 +279,17 @@ TEST_F(GraphPartitionTest, CrossDeviceData_MultiUse) { TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto a1 = Input(in_.WithOpName("A1")); - auto b1 = Input(in_.WithOpName("B1")); + auto a1 = FloatInput(in_.WithOpName("A1")); + auto b1 = FloatInput(in_.WithOpName("B1")); Combine(in_.WithOpName("B2").WithControlDependencies(a1), b1, b1); - Input(in_.WithOpName("B3").WithControlDependencies(a1)); + FloatInput(in_.WithOpName("B3").WithControlDependencies(a1)); Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); string a = "/job:a/replica:0/task:0/cpu:0"; string b = "/job:a/replica:0/task:0/cpu:1"; - a1 = Input(scope_a_.WithOpName("A1")); + a1 = FloatInput(scope_a_.WithOpName("A1")); auto c = Const(scope_a_.WithOpName("A1/_0").WithControlDependencies(a1), {}); _Send(scope_a_.WithOpName("A1/_1"), c, "edge_1_A1", a, 82, b); ExpectMatchA(); @@ -297,25 +297,25 @@ TEST_F(GraphPartitionTest, CrossDeviceControl_MultiUse) { auto recv = _Recv(scope_b_.WithOpName("A1/_2"), DT_FLOAT, "edge_1_A1", a, 82, b); auto id = Identity(scope_b_.WithOpName("A1/_3"), recv); - b1 = Input(scope_b_.WithOpName("B1")); + b1 = FloatInput(scope_b_.WithOpName("B1")); Combine(scope_b_.WithOpName("B2").WithControlDependencies(id), b1, b1); - Input(scope_b_.WithOpName("B3").WithControlDependencies(id)); + FloatInput(scope_b_.WithOpName("B3").WithControlDependencies(id)); ExpectMatchB(); } TEST_F(GraphPartitionTest, CrossDevice_DataControl) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - auto a1 = Input(in_.WithOpName("A1")); - auto b1 = Input(in_.WithOpName("B1")); + auto a1 = FloatInput(in_.WithOpName("A1")); + auto b1 = FloatInput(in_.WithOpName("B1")); Combine(in_.WithOpName("B2"), a1, b1); - Input(in_.WithOpName("B3").WithControlDependencies(a1)); + FloatInput(in_.WithOpName("B3").WithControlDependencies(a1)); Partition(ToGraphDef(), &partitions_); EXPECT_EQ(2, partitions_.size()); string a = "/job:a/replica:0/task:0/cpu:0"; string b = "/job:a/replica:0/task:0/cpu:1"; - a1 = Input(scope_a_.WithOpName("A1")); + a1 = FloatInput(scope_a_.WithOpName("A1")); auto c = Const(scope_a_.WithOpName("A1/_0").WithControlDependencies(a1), {}); // NOTE: Send 0 A1/_1 -> A1/_2 is not necessarily needed. We could // use A1/_0 -> A1/_4 as the control as a minor optimization. @@ -328,9 +328,9 @@ TEST_F(GraphPartitionTest, CrossDevice_DataControl) { auto id1 = Identity(scope_b_.WithOpName("A1/_3"), recv1); auto recv2 = _Recv(scope_b_.WithOpName("A1/_5"), DT_FLOAT, "edge_2_A1", a, 82, b); - b1 = Input(scope_b_.WithOpName("B1")); + b1 = FloatInput(scope_b_.WithOpName("B1")); Combine(scope_b_.WithOpName("B2"), recv2, b1); - Input(scope_b_.WithOpName("B3").WithControlDependencies(id1)); + FloatInput(scope_b_.WithOpName("B3").WithControlDependencies(id1)); ExpectMatchB(); } @@ -338,8 +338,7 @@ TEST_F(GraphPartitionTest, CrossDeviceLoop) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) auto a1 = BoolInput(in_.WithOpName("A1")); auto a2 = Enter(in_.WithOpName("A2"), a1, "foo"); - auto a3 = - Merge(in_.WithOpName("A3"), {a2, ops::Input("A5", 0, DT_BOOL)}).output; + auto a3 = Merge(in_.WithOpName("A3"), {a2, Input("A5", 0, DT_BOOL)}).output; LoopCond(in_.WithOpName("A4"), a3); auto b1 = Identity(in_.WithOpName("B1"), a3); NextIteration(in_.WithOpName("A5"), b1); @@ -351,8 +350,7 @@ TEST_F(GraphPartitionTest, CrossDeviceLoop1) { using namespace ::tensorflow::ops; // NOLINT(build/namespaces) auto a1 = BoolInput(in_.WithOpName("A1")); auto a2 = Enter(in_.WithOpName("B2"), a1, "foo"); - auto a3 = - Merge(in_.WithOpName("A3"), {a2, ops::Input("B5", 0, DT_BOOL)}).output; + auto a3 = Merge(in_.WithOpName("A3"), {a2, Input("B5", 0, DT_BOOL)}).output; LoopCond(in_.WithOpName("A4"), a3); auto b1 = Identity(in_.WithOpName("B1"), a3); NextIteration(in_.WithOpName("B5"), b1); diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 82ed7d6b42..fb663e5f58 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -318,6 +318,19 @@ cc_library( ) cc_library( + name = "record_input_op", + srcs = [ + "record_input_op.cc", + "record_yielder.cc", + "record_yielder.h", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +cc_library( name = "save_restore_tensor", srcs = ["save_restore_tensor.cc"], hdrs = ["save_restore_tensor.h"], @@ -1177,6 +1190,7 @@ cc_library( ":priority_queue_op", ":queue_ops", ":random_shuffle_queue_op", + ":record_input_op", ":session_ops", ":sparse_conditional_accumulator_op", ":stack_ops", @@ -1679,6 +1693,7 @@ tf_cc_tests( ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", @@ -3735,10 +3750,7 @@ filegroup( "ctc_loss_op.*", # Excluded due to experimental status: "debug_ops.*", - # Ops excluded because they do not build correctly for Android. - # See b/29213790 "scatter_nd_op*", - "sparse_matmul_op.*", # Lib CURL is not supported on Android. "bigquery*", ], diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 1222093a7a..9263c062ba 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -72,12 +72,14 @@ tf_cc_test( tf_kernel_library( name = "graph_transferer", srcs = [ + "graph_transfer_utils.cc", "graph_transferer.cc", "hexagon_control_wrapper.cc", "hexagon_ops_definitions.cc", "i_graph_transfer_ops_definitions.cc", ], hdrs = [ + "graph_transfer_utils.h", "graph_transferer.h", "hexagon_control_wrapper.h", "hexagon_ops_definitions.h", diff --git a/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc new file mode 100644 index 0000000000..c37e49f242 --- /dev/null +++ b/tensorflow/core/kernels/hexagon/graph_transfer_utils.cc @@ -0,0 +1,49 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h" + +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +/* static */ std::priority_queue<std::tuple<float, int, string>> +GraphTransferUtils::GetTopNFloatResults(const float *const data, + const string *const labels, + const int element_count) { + CHECK(data != nullptr); + CHECK(labels != nullptr); + std::priority_queue<std::tuple<float, int, string>> queue; + for (int i = 0; i < element_count; ++i) { + queue.emplace(data[i], i, labels[i]); + } + return queue; +} + +/* static */ void GraphTransferUtils::DumpTopNFloatResults( + const float *const data, const string *const labels, + const int element_count, const int top_n) { + std::priority_queue<std::tuple<float, int, string>> queue = + GetTopNFloatResults(data, labels, element_count); + LOG(INFO) << "=== Dump ranking ==="; + for (int i = 0; i < top_n; ++i) { + const std::tuple<float, int, string> &entry = queue.top(); + LOG(INFO) << i << ": " << std::get<1>(entry) << ", " << std::get<2>(entry) + << ", " << std::get<0>(entry); + queue.pop(); + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/hexagon/graph_transfer_utils.h b/tensorflow/core/kernels/hexagon/graph_transfer_utils.h new file mode 100644 index 0000000000..85af9b5ce3 --- /dev/null +++ b/tensorflow/core/kernels/hexagon/graph_transfer_utils.h @@ -0,0 +1,41 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLATFORM_HEXAGON_GRAPH_TRANSFER_UTILS_H_ +#define TENSORFLOW_PLATFORM_HEXAGON_GRAPH_TRANSFER_UTILS_H_ + +#include <queue> + +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { + +class GraphTransferUtils { + public: + static std::priority_queue<std::tuple<float, int, string>> + GetTopNFloatResults(const float *const data, const string *const labels, + const int element_count); + static void DumpTopNFloatResults(const float *const data, + const string *const labels, + const int element_count, const int top_n); + + private: + TF_DISALLOW_COPY_AND_ASSIGN(GraphTransferUtils); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_PLATFORM_HEXAGON_GRAPH_TRANSFER_UTILS_H_ diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.cc b/tensorflow/core/kernels/hexagon/graph_transferer.cc index 5b2a95a371..662b935b90 100644 --- a/tensorflow/core/kernels/hexagon/graph_transferer.cc +++ b/tensorflow/core/kernels/hexagon/graph_transferer.cc @@ -38,14 +38,11 @@ const string INPUTS_NODE_PREFIX = "inputs_for_"; const string OUTPUTS_NODE_PREFIX = "outputs_for_"; const string DATA_NODE_PREFIX = "data_for_op_"; const string CONST_SHAPE_PREFIX = "const_shape_"; -const string PADDING_PREFIX = "NN_PAD_"; const string PADDING_ATTR_NAME = "padding"; const string STRIDES_ATTR_NAME = "strides"; const string KSIZE_ATTR_NAME = "ksize"; -const string PADDING_VALID_STR = "VALID"; -const string PADDING_SAME_STR = "SAME"; -const string PADDING_NA = "NA"; const string NULL_OUTPUT_NAME = "NULL"; +const int PADDING_NA_ID = 0; // VALID = 1, SAME = 2 // This is a temporary workaround to support android build // where std::string is not supported even with c++11 option. @@ -413,7 +410,6 @@ void GraphTransferer::RegisterConstantNode( VLOG(1) << "Register constant node: " << node.name(); CHECK(node_name_to_id_cache_map_.count(node.name()) == 1); const int id = node_name_to_id_cache_map_[node.name()]; - const string data_name = DATA_NODE_PREFIX + ToString(id); const int output_node_size = node.num_outputs(); CHECK(output_node_size == 1); // TODO(satok): support multiple outputs? @@ -448,7 +444,6 @@ void GraphTransferer::RegisterConstantNode( ConstNodeTransferParams{node.name(), id, {{shape[0], shape[1], shape[2], shape[3]}}, - data_name, data_size}); // TODO(satok): Remove. Determine constant value without dryrun if (!output_tensor_map.empty() && data_size != 0) { @@ -474,7 +469,7 @@ int GraphTransferer::RegisterConstantShape(const std::vector<int>& shape) { const int id = node_name_cache_list_.size() - 1; node_name_to_id_cache_map_.emplace(shape_name, id); const_node_transfer_params_list_.emplace_back(ConstNodeTransferParams{ - shape_name, id, {{shape[0], shape[1], shape[2], shape[3]}}, "", 0}); + shape_name, id, {{shape[0], shape[1], shape[2], shape[3]}}, 0}); } return node_name_to_id_cache_map_[shape_name]; } @@ -545,17 +540,17 @@ void GraphTransferer::RegisterNodeWithPaddingAndStrides( const int ksize_id = RegisterConstantShape(kernel_sizes); extra_inputs.insert(extra_inputs.begin(), ksize_id); } - const std::string padding_str = - padding == VALID ? PADDING_VALID_STR : PADDING_SAME_STR; const int op_type_id = ops_definitions.GetOpIdFor(node.type_string()); CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount()) << "Op " << node.type_string() << " not found in map(id = " << op_type_id << ")"; - AppendNodeParamsWithIoParams(shape_refiner, output_tensor_map, node, - node.name(), id, node.type_string(), op_type_id, - padding_str, node.num_inputs(), extra_inputs, - node.num_outputs(), true /* append_input */, - true /* append_output */); + // Safety check of padding id + CHECK(padding == Padding::VALID ? 1 : 2); + AppendNodeParamsWithIoParams( + shape_refiner, output_tensor_map, node, node.name(), id, + node.type_string(), op_type_id, static_cast<int>(padding), + node.num_inputs(), extra_inputs, node.num_outputs(), + true /* append_input */, true /* append_output */); } void GraphTransferer::RegisterInputNode( @@ -570,7 +565,7 @@ void GraphTransferer::RegisterInputNode( CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount()); AppendNodeParamsWithIoParams( shape_refiner, output_tensor_map, node, node.name(), id, - node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {}, + node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(), true /* append_input */, true /* append_output */); } @@ -587,7 +582,7 @@ void GraphTransferer::RegisterOutputNode( // TODO(satok): Set output for output node? AppendNodeParamsWithIoParams( shape_refiner, output_tensor_map, node, node.name(), id, - node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {}, + node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {}, 0 /* outputs_size */, true /* append_input */, false /* append_output */); } @@ -604,7 +599,7 @@ void GraphTransferer::RegisterFlattenNode( AppendNodeParamsWithIoParams( shape_refiner, output_tensor_map, node, node.name(), id, - node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {}, + node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(), true /* append_input */, true /* append_output */); } @@ -620,7 +615,7 @@ void GraphTransferer::RegisterGenericNode( AppendNodeParamsWithIoParams( shape_refiner, output_tensor_map, node, node.name(), id, - node.type_string(), op_type_id, PADDING_NA, node.num_inputs(), {}, + node.type_string(), op_type_id, PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(), true /* append_input */, true /* append_output */); } @@ -644,18 +639,13 @@ Status GraphTransferer::RegisterNodeIfAllInputsAreCached( // CAVEAT: Append inputs and outputs params accordingly void GraphTransferer::AppendNodeParams(const string& name, const int id, const string& type, const int type_id, - const string& padding_str, - const int inputs_size, + const int padding, const int inputs_size, const std::vector<int>& extra_inputs, const int outputs_size) { VLOG(1) << "Append node params: " << name; - // TODO(satok): store padding as Padding? - const string output_name = OUTPUTS_NODE_PREFIX + ToString(id); node_transfer_params_list_.emplace_back( - NodeTransferParams{name, id, type, type_id, PADDING_PREFIX + padding_str, - INPUTS_NODE_PREFIX + ToString(id), + NodeTransferParams{name, id, type, type_id, padding, inputs_size + static_cast<int>(extra_inputs.size()), - outputs_size <= 0 ? NULL_OUTPUT_NAME : output_name, static_cast<int>(outputs_size)}); } @@ -738,7 +728,7 @@ void GraphTransferer::AppendNodeOutputParams( void GraphTransferer::AppendNodeParamsWithIoParams( const ShapeRefiner& shape_refiner, const OutputTensorMap& output_tensor_map, const Node& node, const string& name, const int id, const string& type, - const int type_id, const string& padding_str, const int inputs_size, + const int type_id, const int padding, const int inputs_size, const std::vector<int>& extra_inputs, const int outputs_size, const bool append_input_params, const bool append_output_params) { VLOG(1) << "Append node with io params: " << node.name(); @@ -748,8 +738,8 @@ void GraphTransferer::AppendNodeParamsWithIoParams( if (append_output_params) { AppendNodeOutputParams(shape_refiner, output_tensor_map, id, node); } - AppendNodeParams(name, id, type, type_id, padding_str, inputs_size, - extra_inputs, outputs_size); + AppendNodeParams(name, id, type, type_id, padding, inputs_size, extra_inputs, + outputs_size); } /* static */ std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE> @@ -808,6 +798,20 @@ GraphTransferer::ToTensorShapeArray(const TensorShape& shape) { } } +/* static */ string GraphTransferer::ToPaddingDebugString(const int padding) { + switch (padding) { + case 0: + return "NN_PAD_NA"; + case Padding::VALID: + return "NN_PAD_VALID"; + case Padding::SAME: + return "NN_PAD_SAME"; + default: + CHECK(false); + return ""; + } +} + /* static */ void GraphTransferer::CheckShape( const OutputTensorMap& output_tensor_map, const string& node_name, const std::array<int64, SHAPE_ARRAY_SIZE>& expected) { @@ -903,7 +907,10 @@ void GraphTransferer::DumpNodeTransferParams() const { LOG(INFO) << "[ " << params.node_id << " \"" << params.name << "\" (Const)"; LOG(INFO) << " shape: " << params.shape[0] << params.shape[1] << params.shape[2] << params.shape[3]; - LOG(INFO) << " data_name: " << params.data_name; + LOG(INFO) << " data_name: " + << (params.data_size <= 0 + ? "" + : DATA_NODE_PREFIX + ToString(params.node_id)); LOG(INFO) << " data_size: " << params.data_size << " bytes" << " ]"; } @@ -911,11 +918,14 @@ void GraphTransferer::DumpNodeTransferParams() const { LOG(INFO) << "*** Op Nodes ***"; for (const NodeTransferParams& params : node_transfer_params_list_) { LOG(INFO) << "[ " << params.node_id << " \"" << params.name; - LOG(INFO) << " type: " << params.type; - LOG(INFO) << " padding: " << params.padding; - LOG(INFO) << " inputs: " << params.inputs_name + LOG(INFO) << " type: " << params.type_name; + LOG(INFO) << " padding: " << ToPaddingDebugString(params.padding); + LOG(INFO) << " inputs: " << INPUTS_NODE_PREFIX + ToString(params.node_id) << ", size = " << params.inputs_size; - LOG(INFO) << " outputs: " << params.outputs_name + LOG(INFO) << " outputs: " + << (params.outputs_size <= 0 + ? NULL_OUTPUT_NAME + : (OUTPUTS_NODE_PREFIX + ToString(params.node_id))) << ", size = " << params.outputs_size << " ]"; } LOG(INFO) << "******\n"; @@ -946,8 +956,10 @@ void GraphTransferer::DumpVerificationStringOfNodeTransferParams() const { sstream << "---(CONST) [" << std::hex << params.node_id << std::dec << "," << params.shape[0] << "," << params.shape[1] << "," << params.shape[2] << "," << params.shape[3] << "," - << params.data_name << "," << params.data_size << "," << params.name - << "]"; + << (params.data_size <= 0 + ? "" + : DATA_NODE_PREFIX + ToString(params.node_id)) + << "," << params.data_size << "," << params.name << "]"; LOG(INFO) << sstream.str(); } LOG(INFO) << "Const node count = " << const_node_transfer_params_list_.size(); @@ -955,9 +967,13 @@ void GraphTransferer::DumpVerificationStringOfNodeTransferParams() const { std::stringstream sstream; sstream << "---(OP) [" << params.name.c_str() << "," << std::hex << params.node_id << std::dec << "," << params.soc_op_id << "," - << params.padding << "," << params.inputs_name << "," - << params.inputs_size << "," << params.outputs_name << "," - << params.outputs_size << "," << params.type << "]"; + << ToPaddingDebugString(params.padding) << "," + << INPUTS_NODE_PREFIX + ToString(params.node_id) << "," + << params.inputs_size << "," + << (params.outputs_size <= 0 + ? NULL_OUTPUT_NAME + : (OUTPUTS_NODE_PREFIX + ToString(params.node_id))) + << "," << params.outputs_size << "," << params.type_name << "]"; LOG(INFO) << sstream.str(); } LOG(INFO) << "Op node count = " << node_transfer_params_list_.size(); diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.h b/tensorflow/core/kernels/hexagon/graph_transferer.h index 7bc6293be8..d86452905f 100644 --- a/tensorflow/core/kernels/hexagon/graph_transferer.h +++ b/tensorflow/core/kernels/hexagon/graph_transferer.h @@ -52,21 +52,18 @@ class GraphTransferer { struct NodeTransferParams { string name; int node_id; - string type; // for debug info + string type_name; int soc_op_id; - string padding; - string inputs_name; // for debug info TODO(satok): remove + int padding; int inputs_size; - string outputs_name; // for debug info TODO(satok): remove int outputs_size; }; // Const node parameters for transfer struct ConstNodeTransferParams { - string name; // for debug info + string name; int node_id; std::array<int64, MAX_SUPPORTED_RANK> shape; - string data_name; // for debug info TODO(satok): remove int data_size; std::vector<uint8> data; }; @@ -215,7 +212,7 @@ class GraphTransferer { const OutputTensorMap& output_tensor_map); void AppendNodeParams(const string& name, const int id, const string& type, - const int type_id, const string& padding_str, + const int type_id, const int padding, const int inputs_size, const std::vector<int>& extra_inputs, const int outputs_size); @@ -235,13 +232,15 @@ class GraphTransferer { const ShapeRefiner& shape_refiner, const OutputTensorMap& output_tensor_map, const Node& node, const string& name, const int id, const string& type, const int type_id, - const string& padding_str, const int inputs_size, + const int padding, const int inputs_size, const std::vector<int>& extra_inputs, const int outputs_size, const bool append_input_params, const bool append_output_params); static std::array<int64, SHAPE_ARRAY_SIZE> ToTensorShapeArray( const TensorShape& shape); + static string ToPaddingDebugString(int padding); + static void CheckShape(const OutputTensorMap& output_tensor_map, const string& node_name, const std::array<int64, SHAPE_ARRAY_SIZE>& actual); diff --git a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc index b9a4c8aff0..92b58083b9 100644 --- a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc +++ b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc @@ -69,10 +69,9 @@ class TestGraphTransferOpsDefinitions : public IGraphTransferOpsDefinitions { static GraphDef CreateAddGraphDef() { Scope root = Scope::NewRootScope(); - ops::Output node_a = ops::Const(root.WithOpName(NAME_A), NODE_A_VAL); - ops::Output node_b = ops::Const(root.WithOpName(NAME_B), NODE_B_VAL); - ops::Output node_add = - ops::Add(root.WithOpName(NAME_A_PLUS_B), node_a, node_b); + Output node_a = ops::Const(root.WithOpName(NAME_A), NODE_A_VAL); + Output node_b = ops::Const(root.WithOpName(NAME_B), NODE_B_VAL); + Output node_add = ops::Add(root.WithOpName(NAME_A_PLUS_B), node_a, node_b); GraphDef def; TF_CHECK_OK(root.ToGraphDef(&def)); return def; @@ -82,16 +81,16 @@ static GraphDef CreateConvGraphDef() { Scope root = Scope::NewRootScope(); Tensor input_data(DT_FLOAT, TensorShape({1, 1, 1, 1})); test::FillIota<float>(&input_data, 1.0f); - ops::Output input = - ops::Const(root.WithOpName("input"), ops::Input::Initializer(input_data)); + Output input = + ops::Const(root.WithOpName("input"), Input::Initializer(input_data)); Tensor filter_data(DT_FLOAT, TensorShape({1, 1, 1, 1})); test::FillIota<float>(&filter_data, 1.0f); - ops::Output filter = ops::Const(root.WithOpName("filter"), - ops::Input::Initializer(filter_data)); + Output filter = + ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)); const std::vector<int> strides{1, 1, 1, 1}; - ops::Output conv = + Output conv = ops::Conv2D(root.WithOpName("conv"), input, filter, strides, "SAME"); - ops::Output softmax = ops::Softmax(root.WithOpName("softmax"), conv); + Output softmax = ops::Softmax(root.WithOpName("softmax"), conv); GraphDef def; TF_CHECK_OK(root.ToGraphDef(&def)); return def; @@ -101,18 +100,18 @@ static GraphDef CreatePoolGraphDef() { Scope root = Scope::NewRootScope(); Tensor input_data(DT_FLOAT, TensorShape({1, 1, 1, 1})); test::FillIota<float>(&input_data, 1.0f); - ops::Output input = - ops::Const(root.WithOpName("input"), ops::Input::Initializer(input_data)); + Output input = + ops::Const(root.WithOpName("input"), Input::Initializer(input_data)); Tensor filter_data(DT_FLOAT, TensorShape({1, 1, 1, 1})); test::FillIota<float>(&filter_data, 1.0f); - ops::Output filter = ops::Const(root.WithOpName("filter"), - ops::Input::Initializer(filter_data)); + Output filter = + ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)); const std::vector<int> ksize{1, 1, 1, 1}; const std::vector<int> padding{0, 0, 0, 0}; const std::vector<int> strides{1, 1, 1, 1}; - ops::Output max_pool = + Output max_pool = ops::MaxPool(root.WithOpName("maxpool"), input, ksize, strides, "SAME"); - ops::Output softmax = ops::Softmax(root.WithOpName("softmax"), max_pool); + Output softmax = ops::Softmax(root.WithOpName("softmax"), max_pool); GraphDef def; TF_CHECK_OK(root.ToGraphDef(&def)); return def; @@ -352,10 +351,10 @@ TEST_F(GraphTransfererTest, LoadConvGraph) { ASSERT_TRUE(params_conv != nullptr); const int id = params_conv->node_id; EXPECT_GE(id, 0); - EXPECT_EQ("Conv2D", params_conv->type); + EXPECT_EQ("Conv2D", params_conv->type_name); EXPECT_EQ(3, params_conv->inputs_size); EXPECT_EQ(1, params_conv->outputs_size); - EXPECT_EQ("NN_PAD_SAME", params_conv->padding); + EXPECT_EQ(Padding::SAME, params_conv->padding); } TEST_F(GraphTransfererTest, LoadMaxPoolGraph) { @@ -378,10 +377,10 @@ TEST_F(GraphTransfererTest, LoadMaxPoolGraph) { ASSERT_TRUE(params_max_pool != nullptr); const int id = params_max_pool->node_id; EXPECT_GE(id, 0); - EXPECT_EQ("MaxPool", params_max_pool->type); + EXPECT_EQ("MaxPool", params_max_pool->type_name); EXPECT_EQ(3, params_max_pool->inputs_size); EXPECT_EQ(1, params_max_pool->outputs_size); - EXPECT_EQ("NN_PAD_SAME", params_max_pool->padding); + EXPECT_EQ(Padding::SAME, params_max_pool->padding); } TEST(HexagonOpsDefinitions, CheckOpsDefinitions) { diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc index ecebd3c599..ca29fcdd47 100644 --- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc +++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc @@ -15,12 +15,9 @@ limitations under the License. #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h" -#include <queue> - #ifdef USE_HEXAGON_LIBS #include "tensorflow/core/platform/hexagon/soc_interface.h" #include "tensorflow/core/platform/profile_utils/cpu_utils.h" -#include "tensorflow/core/platform/types.h" #endif namespace tensorflow { @@ -28,7 +25,6 @@ namespace tensorflow { const bool SHOW_DBG_IN_SOC = false; const bool DBG_USE_DUMMY_INPUT = false; const bool DBG_USE_SAMPLE_INPUT = false; -const bool DBG_SHOW_RESULT = false; const int64 FLAG_ENABLE_PANDA_BINARY_INPUT = 0x01; #ifdef USE_HEXAGON_LIBS @@ -145,18 +141,15 @@ bool HexagonControlWrapper::SetupGraph( output_count = std::get<1>(output_ptr_and_count); CHECK(output_count > 0); } - - // TODO(satok): Do not use string. Use enum instead. - const string padding = params.padding; int padding_id = -1; - if (padding == "NN_PAD_NA") { + if (params.padding == 0) { padding_id = 0; - } else if (padding == "NN_PAD_SAME") { + } else if (params.padding == Padding::SAME) { padding_id = 1; - } else if (padding == "NN_PAD_VALID") { + } else if (params.padding == Padding::VALID) { padding_id = 2; } else { - CHECK(false) << "Unsupported padding " << padding; + CHECK(false); } soc_interface_AppendNode(params.name.c_str(), node_id + NODE_ID_OFFSET, op_id, padding_id, input_ptr, input_count, @@ -213,12 +206,6 @@ bool HexagonControlWrapper::ReadOutputNode( // TODO: Accept all results std::get<2>(output) = DT_FLOAT; outputs->emplace_back(output); - if (DBG_SHOW_RESULT) { - const int byte_size = std::get<1>(output); - const int element_count = byte_size / sizeof(float); - const float* float_array = reinterpret_cast<float*>(std::get<0>(output)); - DumpTopNFloatResults(float_array, element_count, 10 /* top_n */); - } return true; } @@ -240,19 +227,4 @@ bool HexagonControlWrapper::ReadOutputNode(const string, } #endif -void HexagonControlWrapper::DumpTopNFloatResults(const float* data, - const float element_count, - const int top_n) { - std::priority_queue<std::tuple<float, int>> queue; - for (int i = 0; i < element_count; ++i) { - queue.emplace(data[i], i); - } - LOG(INFO) << "=== Dump ranking ==="; - for (int i = 0; i < top_n; ++i) { - const std::tuple<float, int>& entry = queue.top(); - LOG(INFO) << i << ": " << std::get<1>(entry) << ", " << std::get<0>(entry); - queue.pop(); - } -} - } // namespace tensorflow diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h index dfae5aa5e2..0ba0b323cb 100644 --- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h +++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h @@ -46,9 +46,6 @@ class HexagonControlWrapper final : public ISocControlWrapper { // CAVEAT: Need offset as HVX library reserves some ids static constexpr int NODE_ID_OFFSET = 0x10000; - void DumpTopNFloatResults(const float *data, const float element_count, - const int top_n); - // Dummy float array for input node. // TODO(satok): Use actual data passed by FillInputNode and remove std::vector<float> dummy_input_float_; diff --git a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc index d06fb5fabc..81e49bd147 100644 --- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc +++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc @@ -17,10 +17,15 @@ limitations under the License. // -o /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb // adb push /tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb \ // /data/local/tmp +// $ curl +// https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_graph_label_strings.txt +// -o /tmp/imagenet_comp_graph_label_strings.txt +// adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp #include <memory> #include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/kernels/hexagon/graph_transfer_utils.h" #include "tensorflow/core/kernels/hexagon/graph_transferer.h" #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h" #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h" @@ -29,7 +34,9 @@ limitations under the License. #include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -40,6 +47,43 @@ const bool DBG_DUMP_FLOAT_DATA = false; const int WIDTH = 299; const int HEIGHT = 299; const int DEPTH = 3; +const int EXPECTED_FIRST_RESULT_ID = 59; +const int EXECUTION_REPEAT_COUNT = 3; + +static void DumpTop10Results( + const std::vector<ISocControlWrapper::ByteArray>& outputs) { + CHECK(outputs.size() == 1); + const int byte_size = std::get<1>(outputs.at(0)); + const int element_count = byte_size / sizeof(float); + const float* float_array = + reinterpret_cast<float*>(std::get<0>(outputs.at(0))); + const string label_filename = + "/data/local/tmp/imagenet_comp_graph_label_strings.txt"; + string label_str; + TF_CHECK_OK(ReadFileToString(Env::Default(), label_filename, &label_str)); + std::vector<string> labels = str_util::Split(label_str, '\n'); + GraphTransferUtils::DumpTopNFloatResults( + float_array, labels.data(), + std::min(element_count, static_cast<int>(labels.size())), + 10 /* show top_n results */); +} + +static void CheckFirstResult( + const std::vector<ISocControlWrapper::ByteArray>& outputs, + const int expected_first_id) { + EXPECT_GE(outputs.size(), 1); + const int byte_size = std::get<1>(outputs.at(0)); + const int element_count = byte_size / sizeof(float); + const float* float_array = + reinterpret_cast<float*>(std::get<0>(outputs.at(0))); + EXPECT_GE(element_count, 1); + std::vector<string> labels(element_count); + std::priority_queue<std::tuple<float, int, string>> queue = + GraphTransferUtils::GetTopNFloatResults(float_array, labels.data(), + element_count); + const std::tuple<float, int, string>& entry = queue.top(); + EXPECT_EQ(expected_first_id, std::get<1>(entry)); +} // CAVEAT: This test only runs when you specify hexagon library using // makefile. @@ -77,12 +121,17 @@ TEST(GraphTransferer, RunInceptionV3OnHexagonExample) { const int fsize = bmp.size(); LOG(INFO) << "Read " << image_filename << ", size = " << fsize << "bytes"; const int64 pixel_count = WIDTH * HEIGHT * DEPTH; + CHECK(fsize >= 22 /* pos of height */ + sizeof(int)); + CHECK(bmp.data() != nullptr); uint8* const img_bytes = bit_cast<uint8*>(bmp.data()); const int header_size = *(reinterpret_cast<int*>(img_bytes + 10)); + LOG(INFO) << "header size = " << header_size; const int size = *(reinterpret_cast<int*>(img_bytes + 14)); + LOG(INFO) << "image size = " << size; const int width = *(reinterpret_cast<int*>(img_bytes + 18)); + LOG(INFO) << "width = " << width; const int height = *(reinterpret_cast<int*>(img_bytes + 22)); - LOG(INFO) << header_size << ", " << size << ", " << width << ", " << height; + LOG(INFO) << "height = " << height; CHECK(fsize >= (WIDTH + 1) * WIDTH * 3 + header_size); uint8* const bmp_pixels = &img_bytes[header_size]; @@ -129,12 +178,23 @@ TEST(GraphTransferer, RunInceptionV3OnHexagonExample) { hexagon_control_wrapper.FillInputNode("Mul", ba); // 4. Execute graph - hexagon_control_wrapper.ExecuteGraph(); + profile_utils::CpuUtils::EnableClockCycleProfiling(true); + ClockCycleProfiler prof; + for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) { + prof.Start(); + hexagon_control_wrapper.ExecuteGraph(); + prof.Stop(); + } - // 5. Read output node's outputs + // 5-1. Read output node's outputs std::vector<ISocControlWrapper::ByteArray> outputs; hexagon_control_wrapper.ReadOutputNode("softmax", &outputs); + // 5-2. Dump results + DumpTop10Results(outputs); + CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID); + prof.DumpStatistics("Graph Execution"); + // 6. Teardown graph in hexagon hexagon_control_wrapper.TeardownGraph(); diff --git a/tensorflow/core/kernels/image_resizer_state.h b/tensorflow/core/kernels/image_resizer_state.h index 8870937422..33383d16a8 100644 --- a/tensorflow/core/kernels/image_resizer_state.h +++ b/tensorflow/core/kernels/image_resizer_state.h @@ -90,6 +90,18 @@ struct ImageResizerState { errors::InvalidArgument("input image must be of non-zero size")); height_scale = CalculateResizeScale(in_height, out_height, align_corners_); width_scale = CalculateResizeScale(in_width, out_width, align_corners_); + + // Guard against overflows + OP_REQUIRES(context, + ceilf((out_height - 1) * height_scale) <= + static_cast<float>(std::numeric_limits<int64>::max()), + errors::InvalidArgument( + "input image height scale would cause an overflow")); + OP_REQUIRES( + context, + ceilf((out_width - 1) * width_scale) <= static_cast<float>(INT_MAX), + errors::InvalidArgument( + "input image width scale would cause an overflow")); } // Calculates all the required variables, and allocates the output. diff --git a/tensorflow/core/kernels/inplace_ops.cc b/tensorflow/core/kernels/inplace_ops.cc index 5f1f5b652c..b44f2f5465 100644 --- a/tensorflow/core/kernels/inplace_ops.cc +++ b/tensorflow/core/kernels/inplace_ops.cc @@ -29,39 +29,24 @@ typedef Eigen::ThreadPoolDevice CPUDevice; namespace functor { template <typename T> -Status DoInplaceUpdate(const CPUDevice& d, InplaceOpType op, - const Tensor& value, const Tensor& loc, Tensor* output) { - auto Tloc = loc.flat<int64>(); +Status DoParallelConcatUpdate(const CPUDevice& d, const Tensor& value, + int32 loc, Tensor* output) { auto Tvalue = value.flat_outer_dims<T>(); auto Toutput = output->flat_outer_dims<T>(); auto nrows = Toutput.dimension(0); - for (int64 j = 0; j < Tloc.size(); ++j) { - auto r = (Tloc(j) % nrows + nrows) % nrows; // Guard index range. - switch (op) { - case I_UPDATE: - Toutput.template chip<0>(r).device(d) = Tvalue.template chip<0>(j); - break; - case I_ADD: - Toutput.template chip<0>(r).device(d) += Tvalue.template chip<0>(j); - break; - case I_SUB: - Toutput.template chip<0>(r).device(d) -= Tvalue.template chip<0>(j); - break; - default: - return errors::InvalidArgument("Unsupported inplace operation", op); - } - } + auto r = (loc % nrows + nrows) % nrows; // Guard index range. + Toutput.template chip<0>(r).device(d) = Tvalue.template chip<0>(0); return Status::OK(); } template <> -Status DoInplace(const CPUDevice& d, InplaceOpType op, const Tensor& value, - const Tensor& loc, Tensor* output) { +Status DoParallelConcat(const CPUDevice& d, const Tensor& value, int32 loc, + Tensor* output) { CHECK_EQ(value.dtype(), output->dtype()); switch (value.dtype()) { #define CASE(type) \ case DataTypeToEnum<type>::value: \ - return DoInplaceUpdate<type>(d, op, value, loc, output); + return DoParallelConcatUpdate<type>(d, value, loc, output); TF_CALL_NUMBER_TYPES(CASE); #undef CASE default: @@ -73,19 +58,17 @@ Status DoInplace(const CPUDevice& d, InplaceOpType op, const Tensor& value, namespace { -// TODO(apassos): validate the shapes better. -class InplaceOpBase : public OpKernel { +template <typename Device> +class ParallelConcatUpdate : public OpKernel { public: - explicit InplaceOpBase(OpKernelConstruction* ctx) : OpKernel(ctx) {} + explicit ParallelConcatUpdate(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("loc", &loc_)); + } void Compute(OpKernelContext* ctx) override { auto value = ctx->input(0); - auto loc = ctx->input(1); - auto update = ctx->input(2); + auto update = ctx->input(1); - OP_REQUIRES(ctx, TensorShapeUtils::IsVector(loc.shape()), - errors::InvalidArgument("loc must be a vector. ", - loc.shape().DebugString())); OP_REQUIRES( ctx, value.dims() == update.dims(), errors::InvalidArgument("value and update shape doesn't match: ", @@ -98,67 +81,39 @@ class InplaceOpBase : public OpKernel { value.shape().DebugString(), " vs. ", update.shape().DebugString())); } - OP_REQUIRES(ctx, loc.dim_size(0) == update.dim_size(0), - errors::InvalidArgument("loc and update shape doesn't match: ", - loc.shape().DebugString(), " vs. ", + OP_REQUIRES(ctx, 1 == update.dim_size(0), + errors::InvalidArgument("update shape doesn't match: ", update.shape().DebugString())); Tensor output = value; // This creates an alias intentionally. - OP_REQUIRES_OK(ctx, DoCompute(ctx, update, loc, &output)); + const auto& d = ctx->eigen_device<Device>(); + OP_REQUIRES_OK( + ctx, ::tensorflow::functor::DoParallelConcat(d, update, loc_, &output)); ctx->set_output(0, output); } - protected: - virtual Status DoCompute(OpKernelContext* ctx, const Tensor& value, - const Tensor& loc, Tensor* output) = 0; -}; - -template <typename Device, functor::InplaceOpType op> -class InplaceOp : public InplaceOpBase { - public: - explicit InplaceOp(OpKernelConstruction* ctx) : InplaceOpBase(ctx) {} - - protected: - Status DoCompute(OpKernelContext* ctx, const Tensor& value, const Tensor& loc, - Tensor* output) override { - const auto& d = ctx->eigen_device<Device>(); - return ::tensorflow::functor::DoInplace(d, op, value, loc, output); - } + private: + int32 loc_; }; template <typename Device, typename T> -class EmptyOp : public OpKernel { +class ParallelConcatStart : public OpKernel { public: - explicit EmptyOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("init", &init_)); + explicit ParallelConcatStart(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &shape_)); } void Compute(OpKernelContext* ctx) override { - const Tensor& shape = ctx->input(0); - OP_REQUIRES( - ctx, TensorShapeUtils::IsVector(shape.shape()), - errors::InvalidArgument("shape must be a vector of int32, got shape ", - shape.shape().DebugString())); - auto dims = shape.flat<int32>(); - TensorShape out_shape; - OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape( - reinterpret_cast<const int32*>(dims.data()), - dims.size(), &out_shape)); Tensor* out = nullptr; // We do not know whether the output will be used on GPU. Setting it to be // gpu-compatible for now. AllocatorAttributes attr; attr.set_gpu_compatible(true); - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, out_shape, &out, attr)); - - if (init_) { - functor::SetZeroFunctor<Device, T>()(ctx->eigen_device<Device>(), - out->flat<T>()); - } + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, shape_, &out, attr)); } private: - bool init_; + TensorShape shape_; }; class FailureKernel : public OpKernel { @@ -176,16 +131,15 @@ class FailureKernel : public OpKernel { REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") \ .Device(DEVICE_CPU) \ .TypeConstraint<type>("T"), \ - InplaceOp<CPUDevice, functor::I_UPDATE>); + ParallelConcatUpdate<CPUDevice>); TF_CALL_NUMBER_TYPES(REGISTER) #undef REGISTER #define REGISTER_EMPTY(type) \ REGISTER_KERNEL_BUILDER(Name("_ParallelConcatStart") \ .Device(DEVICE_CPU) \ - .HostMemory("shape") \ .TypeConstraint<type>("dtype"), \ - EmptyOp<CPUDevice, type>) + ParallelConcatStart<CPUDevice, type>) TF_CALL_POD_STRING_TYPES(REGISTER_EMPTY) #undef REGISTER_EMPTY @@ -204,9 +158,8 @@ typedef Eigen::GpuDevice GPUDevice; #define REGISTER_EMPTY(type) \ REGISTER_KERNEL_BUILDER(Name("_ParallelConcatStart") \ .Device(DEVICE_GPU) \ - .HostMemory("shape") \ .TypeConstraint<type>("dtype"), \ - EmptyOp<GPUDevice, type>); + ParallelConcatStart<GPUDevice, type>); TF_CALL_GPU_NUMBER_TYPES(REGISTER_EMPTY) #undef REGISTER_EMPTY @@ -221,7 +174,7 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_PARALLEL_CONCAT); REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") \ .Device(DEVICE_GPU) \ .TypeConstraint<type>("T"), \ - InplaceOp<GPUDevice, functor::I_UPDATE>); + ParallelConcatUpdate<GPUDevice>); TF_CALL_GPU_NUMBER_TYPES(REGISTER) #undef REGISTER @@ -231,11 +184,10 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER) REGISTER_KERNEL_BUILDER(Name("_ParallelConcatUpdate") .Device(DEVICE_GPU) .HostMemory("value") - .HostMemory("loc") .HostMemory("update") .HostMemory("output") .TypeConstraint<int32>("T"), - InplaceOp<CPUDevice, functor::I_UPDATE>); + ParallelConcatUpdate<CPUDevice>); #endif } // end namespace diff --git a/tensorflow/core/kernels/inplace_ops_functor.h b/tensorflow/core/kernels/inplace_ops_functor.h index 6cb15eda91..53529f5165 100644 --- a/tensorflow/core/kernels/inplace_ops_functor.h +++ b/tensorflow/core/kernels/inplace_ops_functor.h @@ -22,19 +22,9 @@ limitations under the License. namespace tensorflow { namespace functor { -// Inplace update/add/sub values in 'y'. It computes -// y[i, :] = v if op is I_UPDATE -// y[i, :] += v if op is I_ADD -// y[i, :] -= v if op is I_SUB -enum InplaceOpType { - I_UPDATE, // x = y - I_ADD, // x += y - I_SUB, // x -= y -}; - template <typename Device> -Status DoInplace(const Device& device, InplaceOpType op, const Tensor& value, - const Tensor& loc, Tensor* output); +Status DoParallelConcat(const Device& device, const Tensor& value, int32 loc, + Tensor* output); } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc b/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc index 8e70f4575d..8467360435 100644 --- a/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/inplace_ops_functor_gpu.cu.cc @@ -26,72 +26,43 @@ namespace functor { typedef Eigen::GpuDevice Device; -template <typename T, InplaceOpType op> -__global__ void DoInplaceOpKernel(int nthreads, const int64 rows, - const int64 cols, const int64 n, const T* src, - const int64* rowids, T* dst) { +template <typename T> +__global__ void DoParallelConcatOpKernel(int nthreads, const int64 rows, + const int64 cols, int32 loc, + const T* src, T* dst) { CUDA_1D_KERNEL_LOOP(idx, nthreads) { - int64 r = idx / cols; int64 c = idx % cols; - r = (rowids[r] % rows + rows) % rows; // Guard index range. + int64 r = (loc % rows + rows) % rows; // Guard index range. T* p = dst + r * cols + c; const T* q = src + idx; - switch (op) { - case I_UPDATE: - *p = ldg(q); - break; - case I_ADD: - *p += ldg(q); - break; - case I_SUB: - *p -= ldg(q); - break; - } + *p = ldg(q); } } template <typename T> -Status DoInplaceUpdate(const Device& d, InplaceOpType op, const Tensor& value, - const Tensor& loc, Tensor* output) { +Status DoParallelConcatUpdate(const Device& d, const Tensor& value, int32 loc, + Tensor* output) { const int64 nelem = value.NumElements(); CudaLaunchConfig cfg = GetCudaLaunchConfig(nelem, d); auto Toutput = output->flat_outer_dims<T>(); const int64 nrows = Toutput.dimension(0); const int64 ncols = Toutput.dimension(1); - const int64 n = loc.NumElements(); const T* src = value.flat<T>().data(); - const int64* rowids = loc.flat<int64>().data(); T* dst = output->flat<T>().data(); - switch (op) { - case I_UPDATE: - DoInplaceOpKernel<T, I_UPDATE> - <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>( - cfg.virtual_thread_count, nrows, ncols, n, src, rowids, dst); - break; - case I_ADD: - DoInplaceOpKernel<T, I_ADD> - <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>( - cfg.virtual_thread_count, nrows, ncols, n, src, rowids, dst); - break; - case I_SUB: - DoInplaceOpKernel<T, I_SUB> - <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>( - cfg.virtual_thread_count, nrows, ncols, n, src, rowids, dst); - break; - default: - return errors::InvalidArgument("Unsupported operation type", op); - } + DoParallelConcatOpKernel<T> + <<<cfg.block_count, cfg.thread_per_block, 0, d.stream()>>>( + cfg.virtual_thread_count, nrows, ncols, loc, src, dst); return Status::OK(); } template <> -Status DoInplace(const Device& d, InplaceOpType op, const Tensor& value, - const Tensor& loc, Tensor* output) { +Status DoParallelConcat(const Device& d, const Tensor& value, int32 loc, + Tensor* output) { CHECK_EQ(value.dtype(), output->dtype()); switch (value.dtype()) { -#define CASE(type) \ - case DataTypeToEnum<type>::value: \ - return DoInplaceUpdate<type>(d, op, value, loc, output); \ +#define CASE(type) \ + case DataTypeToEnum<type>::value: \ + return DoParallelConcatUpdate<type>(d, value, loc, output); \ break; CASE(float) diff --git a/tensorflow/core/kernels/record_input_op.cc b/tensorflow/core/kernels/record_input_op.cc new file mode 100644 index 0000000000..878996c9d6 --- /dev/null +++ b/tensorflow/core/kernels/record_input_op.cc @@ -0,0 +1,67 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/kernels/record_yielder.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { + +class RecordInputOp : public OpKernel { + public: + explicit RecordInputOp(OpKernelConstruction* ctx) : OpKernel(ctx) { +#define GETATTR(TYPE, FIELD) \ + TYPE FIELD; \ + OP_REQUIRES_OK(ctx, ctx->GetAttr(#FIELD, &FIELD)); + + GETATTR(string, file_pattern); + GETATTR(int64, file_random_seed); + GETATTR(float, file_shuffle_shift_ratio); + GETATTR(int64, file_buffer_size); + GETATTR(int64, file_parallelism); + GETATTR(int64, batch_size); +#undef GETATTR + + RecordYielder::Options yopts; + yopts.file_pattern = file_pattern; + yopts.seed = file_random_seed; + yopts.bufsize = file_buffer_size; + yopts.file_shuffle_shift_ratio = file_shuffle_shift_ratio; + yopts.parallelism = file_parallelism; + yielder_ = std::unique_ptr<RecordYielder>(new RecordYielder(ctx, yopts)); + + batch_size_ = batch_size; + } + + void Compute(OpKernelContext* ctx) override { + Tensor out(DT_STRING, {batch_size_}); + auto t_out = out.flat<string>(); + for (int i = 0; i < batch_size_; ++i) { + OP_REQUIRES_OK(ctx, yielder_->YieldOne(&t_out(i))); + } + ctx->set_output(0, out); + } + + private: + int64 batch_size_; + std::unique_ptr<RecordYielder> yielder_; +}; + +REGISTER_KERNEL_BUILDER(Name("RecordInput").Device(DEVICE_CPU), RecordInputOp); +} // namespace tensorflow diff --git a/tensorflow/core/kernels/record_yielder.cc b/tensorflow/core/kernels/record_yielder.cc new file mode 100644 index 0000000000..e391752289 --- /dev/null +++ b/tensorflow/core/kernels/record_yielder.cc @@ -0,0 +1,216 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/record_yielder.h" + +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { + +RecordYielder::RecordYielder(OpKernelConstruction* context, + const RecordYielder::Options& opts) + : opts_(opts), + thread_(new thread::ThreadPool(context->env(), "record_yielder", + 1 + opts.parallelism)), + epoch_(0), + rnd_(opts.seed) { + thread_->Schedule([this]() { MainLoop(); }); +} + +RecordYielder::~RecordYielder() { + { + mutex_lock l(mu_); + stop_ = true; + buf_empty_.notify_all(); + buf_enough_.notify_all(); + buf_not_full_.notify_all(); + } + main_loop_done_.WaitForNotification(); + delete thread_; +} + +Status RecordYielder::YieldOne(string* value) { + mutex_lock l(mu_); + while (!BufEnough()) { + buf_enough_.wait(l); + } + if (status_.ok()) { + bool notify_no_longer_full = !BufNotFull(); + CHECK(!stop_ && !buf_.empty()); + *value = std::move(buf_.back()); + buf_.pop_back(); + ++num_records_yielded_in_epoch_; + // Assumption is that an epoch always has something in the buffer + // until it ends. If the input pipeline was slower than the consumers + // by a lot this might not be true. Not sure how to handle. + if (buf_.empty()) { + buf_empty_.notify_all(); + } + if (notify_no_longer_full) { + buf_not_full_.notify_all(); + } + } + return status_; +} + +struct RecordYielder::Shard { + int index; // Shard index. + std::vector<string> filenames; // File names given to this shard. + Notification done; // Notified when this shard is done. + Status status; // Shard status. +}; + +bool RecordYielder::ShouldFinish(const Status& s) { + mutex_lock l(mu_); + status_.Update(s); + return stop_ || !status_.ok(); +} + +static Status MatchFiles(const string& patterns, + std::vector<string>* filenames) { + for (const auto& file_pattern : str_util::Split(patterns, ',')) { + std::vector<string> tmp_filenames; + TF_RETURN_IF_ERROR( + Env::Default()->GetMatchingPaths(file_pattern, &tmp_filenames)); + filenames->insert(filenames->end(), + std::make_move_iterator(tmp_filenames.begin()), + std::make_move_iterator(tmp_filenames.end())); + } + return Status::OK(); +} + +void RecordYielder::MainLoop() { + while (true) { + ++epoch_; + num_records_yielded_in_epoch_ = 0; + + // Finds all files. + std::vector<string> filenames; + Status s = MatchFiles(opts_.file_pattern, &filenames); + if (ShouldFinish(s)) break; + + if (filenames.empty()) { + s = errors::NotFound("Found no files at ", opts_.file_pattern); + if (ShouldFinish(s)) break; + } + + // Shuffles these files according to the epoch # and random seed. + std::mt19937_64 shuffle_rnd( + Hash64(reinterpret_cast<char*>(&epoch_), sizeof(epoch_), opts_.seed)); + std::shuffle(filenames.begin(), filenames.end(), shuffle_rnd); + + // Left-shift the filename list. + const int64 num = filenames.size(); + int64 shift; + if (0 <= opts_.file_shuffle_shift_ratio && + opts_.file_shuffle_shift_ratio < 1) { + shift = opts_.file_shuffle_shift_ratio * num; + std::rotate(filenames.begin(), filenames.begin() + shift, + filenames.end()); + } + + // Shards files and use one thread to go through each shard. + const int N = opts_.parallelism; + std::vector<Shard> shards(N); + for (int i = 0; i < N; ++i) { + Shard* shard = &shards[i]; + shard->index = i; + for (int j = i; j < filenames.size(); j += N) { + shard->filenames.push_back(filenames[j]); + } + thread_->Schedule([this, shard]() { ShardLoop(shard); }); + } + for (int i = 0; i < N; ++i) { + shards[i].done.WaitForNotification(); + s.Update(shards[i].status); + } + if (ShouldFinish(s)) break; + + // Starts the next epoch once all buffered records are consumed. + { + mutex_lock l(mu_); + epoch_end_ = true; + while (!BufEmpty()) { + buf_empty_.wait(l); + } + epoch_end_ = false; + } + } + main_loop_done_.Notify(); +} + +bool RecordYielder::Add(std::vector<string>* values) { + mutex_lock l(mu_); + while (!BufNotFull()) { + buf_not_full_.wait(l); + } + while (BufNotFull() && !values->empty()) { + // Adds values->back(). Swaps its position with another random + // element. + auto index = rnd_() % (buf_.size() + 1); + if (index == buf_.size()) { + buf_.push_back(std::move(values->back())); + } else { + buf_.push_back(std::move(buf_[index])); + buf_[index] = std::move(values->back()); + } + values->pop_back(); + } + if (BufEnough()) { + buf_enough_.notify_all(); + } + return stop_; +} + +void RecordYielder::ShardLoop(Shard* shard) { + std::vector<string> values; + const int64 kRecords = 16; + for (const string& filename : shard->filenames) { + std::unique_ptr<RandomAccessFile> file; + if (ShouldFinish(Status::OK())) break; + Status s = Env::Default()->NewRandomAccessFile(filename, &file); + if (!s.ok()) { + shard->status = errors::InvalidArgument("Can't open ", filename); + break; + } + io::RecordReader rdr(file.get()); + uint64 offset = 0; + string record; + while (true) { + Status s = rdr.ReadRecord(&offset, &record); + if (s.ok()) { + values.emplace_back(std::move(record)); + if (values.size() >= kRecords && Add(&values)) { + shard->status = errors::Aborted("stopped"); + break; + } + } else if (errors::IsOutOfRange(s)) { + break; + } else { + shard->status = s; + break; + } + } + } + // Adds the remaining values of this shard to buf_. + while (!values.empty()) { + Add(&values); + } + shard->done.Notify(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/record_yielder.h b/tensorflow/core/kernels/record_yielder.h new file mode 100644 index 0000000000..503644f3b8 --- /dev/null +++ b/tensorflow/core/kernels/record_yielder.h @@ -0,0 +1,157 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_KERNELS_RECORD_YIELDER_H_ +#define TENSORFLOW_KERNELS_RECORD_YIELDER_H_ + +#include <atomic> +#include <random> +#include <string> +#include <vector> + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/thread_annotations.h" + +namespace tensorflow { + +// RecordYielder produces value records from a set of tfrecord files +// in a random order. +// +// It guarantees that: +// 1) all records in tfrecords are yielded within every epoch; +// 2) each record is yielded only once within every epoch; +// 3) the order in which records are yielded are highly randomized. +// 4) the peak memory usage is roughly avg record size * +// (opts.bufsize + opts.parellelism * 16). +// +// Usage example: +// RecordYielder::Options opts; +// opts.file_pattern = "input-*"; +// opts.seed = 301; +// opts.bufsize = 1000000; // A randomized buffer with 1M records. +// opts.parallelism = 8; // Uses 8 tfrecord iterators to iterate +// // through all files. +// RecordYielder yielder(opts); +// string val; +// while (true) { +// yielder.YieldOne(&val); +// // process val +// } +// +// RecordYielder can be accessed by multiple threads concurrently. +class RecordYielder { + public: + struct Options { + // Glob pattern for tfrecords. + string file_pattern; + + // Random seed. It determines how data files are shuffled and how + // records are shuffled. + int64 seed = 0; + + // Each epoch, all files are first shuffled according to the + // random seed and the epoch number, and then all files are + // left-shifted by file_shuffle_shift_ratio * num_files slots. If + // file_shuffle_shift_ratio is not within [0, 1), the + // implementation clip it to [0, 1). + float file_shuffle_shift_ratio = 0; + + // Randomization buffer keeps these many records. + uint64 bufsize = 1; + + // Uses these many concurrent tfrecord iterators to iterate through + // tfrecords. + int32 parallelism = 1; + }; + + explicit RecordYielder(OpKernelConstruction* context, + const RecordYielder::Options& opts); + ~RecordYielder(); + + RecordYielder(const RecordYielder&) = delete; + RecordYielder& operator=(const RecordYielder&) = delete; + + // Yields one 'value'. + Status YieldOne(string* value); + + // Returns the current epoch number. + int64 current_epoch() const { return epoch_; } + + private: + typedef RecordYielder ME; + + Options opts_; + + // Backgrounds threads. Owned. + thread::ThreadPool* thread_; + + // Epoch number. + std::atomic<int64> epoch_; + + mutex mu_; + + // Turned to true when this is deleted. + bool stop_ GUARDED_BY(mu_) = false; + Status status_ GUARDED_BY(mu_); + + // PRG used for randomization. + std::mt19937_64 rnd_ GUARDED_BY(mu_); + + // Randomization buffer. + std::vector<string> buf_ GUARDED_BY(mu_); + + // True iff we are draining an epoch. + bool epoch_end_ = false; + + int64 num_records_yielded_in_epoch_ = 0; + + // Trigger when the main loop has exited. + Notification main_loop_done_; + + // condition_variables. + condition_variable buf_empty_; + bool BufEmpty() const SHARED_LOCKS_REQUIRED(mu_) { + return stop_ || buf_.empty(); + } + + condition_variable buf_not_full_; + bool BufNotFull() const SHARED_LOCKS_REQUIRED(mu_) { + return stop_ || buf_.size() < opts_.bufsize; + } + + condition_variable buf_enough_; + bool BufEnough() const SHARED_LOCKS_REQUIRED(mu_) { + // NOTE: Unless we are finishing an epoch, we want to make sure + // the buf_ contains enough randomized elements before yielding + // any. + return stop_ || !status_.ok() || (epoch_end_ && !buf_.empty()) || + (!epoch_end_ && + buf_.size() >= std::max<int64>(1, opts_.bufsize / 2)); + } + + void MainLoop(); + struct Shard; + void ShardLoop(Shard* shard); + bool ShouldFinish(const Status& s); + bool Add(std::vector<string>* values); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_KERNELS_RECORD_YIELDER_H_ diff --git a/tensorflow/core/kernels/resize_bilinear_op.cc b/tensorflow/core/kernels/resize_bilinear_op.cc index 6dfe871c52..85d28d2c64 100644 --- a/tensorflow/core/kernels/resize_bilinear_op.cc +++ b/tensorflow/core/kernels/resize_bilinear_op.cc @@ -64,6 +64,201 @@ class ResizeBilinearOp : public OpKernel { bool align_corners_; }; +namespace { +// Compute the interpolation indices only once. +struct CachedInterpolation { + int64 lower; // Lower source index used in the interpolation + int64 upper; // Upper source index used in the interpolation + // 1-D linear iterpolation scale (see: + // https://en.wikipedia.org/wiki/Bilinear_interpolation) + float lerp; + // How many consecutive points use the same lower & upper indices + int consecutive; +}; + +enum ImageScalePattern { SCALE_UP, SIMILAR, SCALE_DOWN }; + +inline ImageScalePattern compute_image_scale_pattern(const int64 out_height, + const int64 out_width, + const int64 in_height, + const int64 in_width) { + if (in_height * 2 < out_height || in_width * 2 < out_width) { + return SCALE_UP; + } else if (out_height * 2 < in_height || out_width * 2 < in_width) { + return SCALE_DOWN; + } else { + return SIMILAR; + } +} + +inline int compute_scratch_size(const int64 out_height, const int64 out_width, + const int64 in_height, const int64 in_width, + const int channels, + const ImageScalePattern scale_pattern) { + // Allocate a CachedInterpolation for each y, and each x in the out-height, + // plus 2 extra to avoid extra branches in the + // CachedInterpolation.consecutive computation. + const int cached_computation_size = + sizeof(CachedInterpolation) * (out_height + out_width + 2); + if (scale_pattern == SCALE_DOWN) { + return cached_computation_size; + } else { + // In order to avoid paying the cost of data type conversion multiple times, + // we must allocate a temporary image as well. + const int tmp_image_size = sizeof(float) * in_height * in_width * channels; + // We batch up all memory allocations into a single malloc call for + // performance reasons. + return cached_computation_size + tmp_image_size; + } +} + +inline void compute_interpolation_weights(const ImageScalePattern scale_pattern, + const int64 out_size, + const int64 in_size, + const float scale, + CachedInterpolation* interpolation) { + interpolation[out_size].lower = 0; + interpolation[out_size].upper = 0; + interpolation[out_size].consecutive = 0; + for (int64 i = out_size - 1; i >= 0; --i) { + const float in = i * scale; + interpolation[i].lower = static_cast<int64>(in); + interpolation[i].upper = std::min(interpolation[i].lower + 1, in_size - 1); + interpolation[i].lerp = in - interpolation[i].lower; + interpolation[i].consecutive = + interpolation[i + 1].lower == interpolation[i].lower && + interpolation[i + 1].upper == interpolation[i].upper + ? interpolation[i + 1].consecutive + 1 + : 1; + } +} + +template <typename T> +struct Converter { + static inline const float* convert_image_to_float( + typename TTypes<T, 4>::ConstTensor images, const int batch_index, + const int64 in_height, const int64 in_width, const int channels, + std::vector<float>* converted_image_v) { + converted_image_v->resize(in_height * in_width * channels); + float* converted_image = converted_image_v->data(); + for (int64 y = 0; y < in_height; ++y) { + for (int64 x = 0; x < in_width; ++x) { + for (int c = 0; c < channels; ++c) { + converted_image[y * in_width * channels + x * channels + c] = + static_cast<float>(images(batch_index, y, x, c)); + } + } + } + return converted_image; + } +}; + +template <> +struct Converter<float> { + static inline const float* convert_image_to_float( + typename TTypes<float, 4>::ConstTensor images, const int b, + const int64 in_height, const int64 in_width, const int channels, + std::vector<float>* converted_image_v) { + return images.data() + (b * in_height * in_width * channels); + } +}; + +/** + * Computes the bilinear interpolation from the appropriate 4 float points + * and the linear interpolation weights. + */ +inline float compute_lerp(const float top_left, const float top_right, + const float bottom_left, const float bottom_right, + const float x_lerp, const float y_lerp) { + const float top = top_left + (top_right - top_left) * x_lerp; + const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; + return top + (bottom - top) * y_lerp; +} + +template <typename T> +inline void scale_down_image(typename TTypes<T, 4>::ConstTensor images, + const int batch_size, const int64 out_height, + const int64 out_width, const int channels, + const std::vector<CachedInterpolation>& xs, + const std::vector<CachedInterpolation>& ys, + typename TTypes<float, 4>::Tensor output) { + // Do not eagerly convert all input data points, as we ignore most. + for (int b = 0; b < batch_size; ++b) { + // Compute the interpolation + for (int64 y = 0; y < out_height; ++y) { + for (int64 x = 0; x < out_width; ++x) { + for (int c = 0; c < channels; ++c) { + const float top_left(images(b, ys[y].lower, xs[x].lower, c)); + const float top_right(images(b, ys[y].lower, xs[x].upper, c)); + const float bottom_left(images(b, ys[y].upper, xs[x].lower, c)); + const float bottom_right(images(b, ys[y].upper, xs[x].upper, c)); + output(b, y, x, c) = + compute_lerp(top_left, top_right, bottom_left, bottom_right, + xs[x].lerp, ys[y].lerp); + } + } + } + } +} + +inline void scale_up_image(const float* input_image, const int batch_index, + const int64 out_height, const int64 out_width, + const int channels, const int64 in_height, + const int64 in_width, + const std::vector<CachedInterpolation>& xs, + const std::vector<CachedInterpolation>& ys, + typename TTypes<float, 4>::Tensor output) { + for (int64 y = 0; y < out_height; y += ys[y].consecutive) { + const int64 in_y_lower = ys[y].lower * in_width * channels; + const int64 in_y_upper = ys[y].upper * in_width * channels; + for (int64 x = 0; x < out_width; x += xs[x].consecutive) { + const int64 in_x_lower = xs[x].lower * channels; + const int64 in_x_upper = xs[x].upper * channels; + for (int c = 0; c < channels; ++c) { + const float top_left = input_image[in_y_lower + in_x_lower + c]; + const float top_right = input_image[in_y_lower + in_x_upper + c]; + const float bottom_left = input_image[in_y_upper + in_x_lower + c]; + const float bottom_right = input_image[in_y_upper + in_x_upper + c]; + for (int64 y_inner = y; y_inner < y + ys[y].consecutive; ++y_inner) { + for (int64 x_inner = x; x_inner < x + xs[x].consecutive; ++x_inner) { + output(batch_index, y_inner, x_inner, c) = + compute_lerp(top_left, top_right, bottom_left, bottom_right, + xs[x_inner].lerp, ys[y_inner].lerp); + } + } + } + } + } +} + +inline void scale_similar_image(const float* input_image, const int b, + const int64 out_height, const int64 out_width, + const int channels, const int64 in_height, + const int64 in_width, + const std::vector<CachedInterpolation>& xs, + const std::vector<CachedInterpolation>& ys, + typename TTypes<float, 4>::Tensor output) { + // Compute the interpolation + for (int64 y = 0; y < out_height; ++y) { + const int64 in_y_lower = ys[y].lower * in_width * channels; + const int64 in_y_upper = ys[y].upper * in_width * channels; + // Similar-sized images do not have a set of inner loops. + for (int64 x = 0; x < out_width; ++x) { + const int64 in_x_lower = xs[x].lower * channels; + const int64 in_x_upper = xs[x].upper * channels; + for (int c = 0; c < channels; ++c) { + const float top_left = input_image[in_y_lower + in_x_lower + c]; + const float top_right = input_image[in_y_lower + in_x_upper + c]; + const float bottom_left = input_image[in_y_upper + in_x_lower + c]; + const float bottom_right = input_image[in_y_upper + in_x_upper + c]; + output(b, y, x, c) = compute_lerp(top_left, top_right, bottom_left, + bottom_right, xs[x].lerp, ys[y].lerp); + } + } + } +} +} // namespace + // Partial specialization of ResizeBilinear functor for a CPUDevice. namespace functor { template <typename T> @@ -71,7 +266,7 @@ struct ResizeBilinear<CPUDevice, T> { void operator()(const CPUDevice& d, typename TTypes<T, 4>::ConstTensor images, const float height_scale, const float width_scale, typename TTypes<float, 4>::Tensor output) { - const int batch = images.dimension(0); + const int batch_size = images.dimension(0); const int64 in_height = images.dimension(1); const int64 in_width = images.dimension(2); const int channels = images.dimension(3); @@ -79,31 +274,41 @@ struct ResizeBilinear<CPUDevice, T> { const int64 out_height = output.dimension(1); const int64 out_width = output.dimension(2); - for (int b = 0; b < batch; ++b) { - for (int y = 0; y < out_height; ++y) { - const float in_y = y * height_scale; - const int64 top_y_index = static_cast<int64>(floorf(in_y)); - const int64 bottom_y_index = - std::min(static_cast<int64>(ceilf(in_y)), in_height - 1); - const float y_lerp = in_y - top_y_index; - for (int x = 0; x < out_width; ++x) { - const float in_x = x * width_scale; - const int64 left_x_index = static_cast<int64>(floorf(in_x)); - const int64 right_x_index = - std::min(static_cast<int64>(ceilf(in_x)), in_width - 1); - const float x_lerp = in_x - left_x_index; - for (int c = 0; c < channels; ++c) { - const float top_left(images(b, top_y_index, left_x_index, c)); - const float top_right(images(b, top_y_index, right_x_index, c)); - const float bottom_left(images(b, bottom_y_index, left_x_index, c)); - const float bottom_right( - images(b, bottom_y_index, right_x_index, c)); - const float top = top_left + (top_right - top_left) * x_lerp; - const float bottom = - bottom_left + (bottom_right - bottom_left) * x_lerp; - output(b, y, x, c) = top + (bottom - top) * y_lerp; - } - } + // Handle no-op resizes efficiently. + if (out_height == in_height && out_width == in_width) { + output = images.template cast<float>(); + return; + } + + const ImageScalePattern scale_pattern = + compute_image_scale_pattern(out_height, out_width, in_height, in_width); + std::vector<CachedInterpolation> ys(out_height + 1); + std::vector<CachedInterpolation> xs(out_width + 1); + std::vector<float> converted_image_v; + + // Compute the cached interpolation weights on the x and y dimensions. + compute_interpolation_weights(scale_pattern, out_height, in_height, + height_scale, ys.data()); + compute_interpolation_weights(scale_pattern, out_width, in_width, + width_scale, xs.data()); + + if (scale_pattern == SCALE_UP) { + for (int b = 0; b < batch_size; ++b) { + const float* converted_image = Converter<T>::convert_image_to_float( + images, b, in_height, in_width, channels, &converted_image_v); + scale_up_image(converted_image, b, out_height, out_width, channels, + in_height, in_width, xs, ys, output); + } + } else if (scale_pattern == SCALE_DOWN) { + // Do not eagerly convert all input data points, as we ignore most. + scale_down_image<T>(images, batch_size, out_height, out_width, channels, + xs, ys, output); + } else { + for (int b = 0; b < batch_size; ++b) { + const float* converted_image = Converter<T>::convert_image_to_float( + images, b, in_height, in_width, channels, &converted_image_v); + scale_similar_image(converted_image, b, out_height, out_width, channels, + in_height, in_width, xs, ys, output); } } } diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/resize_bilinear_op_test.cc index 32acdf2df8..a4f1120578 100644 --- a/tensorflow/core/kernels/resize_bilinear_op_test.cc +++ b/tensorflow/core/kernels/resize_bilinear_op_test.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -39,6 +40,74 @@ class ResizeBilinearOpTest : public OpsTestBase { .Finalize(node_def())); TF_EXPECT_OK(InitOp()); } + + const Tensor* AddRandomImageInput(const TensorShape& shape) { + CHECK_GT(input_types_.size(), inputs_.size()) + << "Adding more inputs than types; perhaps you need to call MakeOp"; + CHECK_EQ(shape.dims(), 4) << "All images must have 4 dimensions."; + bool is_ref = IsRefType(input_types_[inputs_.size()]); + Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()), + DataTypeToEnum<float>::v(), shape); + input->flat<float>().setRandom(); + tensors_.push_back(input); + if (is_ref) { + CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), + DataTypeToEnum<float>::v()); + inputs_.push_back({&lock_for_refs_, input}); + } else { + CHECK_EQ(input_types_[inputs_.size()], DataTypeToEnum<float>::v()); + inputs_.push_back({nullptr, input}); + } + return input; + } + + // This is the straight forward unoptimized implementation of resize bilinear + // We use this to confirm that the optimized version is exactly identical. + void ResizeBilinearBaseline(TTypes<float, 4>::ConstTensor images, + TTypes<float, 4>::Tensor output) { + const int batch = images.dimension(0); + const int64 in_height = images.dimension(1); + const int64 in_width = images.dimension(2); + const int channels = images.dimension(3); + + ASSERT_EQ(batch, output.dimension(0)); + ASSERT_EQ(channels, output.dimension(3)); + + const int64 out_height = output.dimension(1); + const int64 out_width = output.dimension(2); + + const float height_scale = in_height / static_cast<float>(out_height); + const float width_scale = in_width / static_cast<float>(out_width); + + for (int b = 0; b < batch; ++b) { + for (int64 y = 0; y < out_height; ++y) { + const float in_y = y * height_scale; + const int64 top_y_index = static_cast<int64>(floorf(in_y)); + const int64 bottom_y_index = + std::min(static_cast<int64>(ceilf(in_y)), in_height - 1); + const float y_lerp = in_y - top_y_index; + for (int64 x = 0; x < out_width; ++x) { + const float in_x = x * width_scale; + const int64 left_x_index = static_cast<int64>(floorf(in_x)); + const int64 right_x_index = + std::min(static_cast<int64>(ceilf(in_x)), in_width - 1); + const float x_lerp = in_x - left_x_index; + for (int c = 0; c < channels; ++c) { + const float top_left = images(b, top_y_index, left_x_index, c); + const float top_right = images(b, top_y_index, right_x_index, c); + const float bottom_left = + images(b, bottom_y_index, left_x_index, c); + const float bottom_right = + images(b, bottom_y_index, right_x_index, c); + const float top = top_left + (top_right - top_left) * x_lerp; + const float bottom = + bottom_left + (bottom_right - bottom_left) * x_lerp; + output(b, y, x, c) = top + (bottom - top) * y_lerp; + } + } + } + } + } }; class ResizeBilinearOpAlignCornersTest : public OpsTestBase { @@ -68,6 +137,23 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To1x1) { test::ExpectTensorEqual<float>(expected, *GetOutput(0)); } +TEST_F(ResizeBilinearOpTest, TestBilinearRandom2x2To1x1) { + const Tensor* input = AddRandomImageInput(TensorShape({1, 2, 2, 1})); + AddInputFromArray<int32>(TensorShape({2}), {1, 1}); + TF_ASSERT_OK(RunOpKernel()); + + // When scaling down, we have to arbitrarily pick a pixel from the + // original input. In this case, we choose the top/left most pixel. + Tensor* output = GetOutput(0); + std::unique_ptr<Tensor> expected( + new Tensor(device_->GetAllocator(AllocatorAttributes()), + DataTypeToEnum<float>::v(), TensorShape({1, 1, 1, 1}))); + ResizeBilinearBaseline(input->tensor<float, 4>(), + expected->tensor<float, 4>()); + EXPECT_EQ(input->flat<float>()(0), output->flat<float>()(0)); + test::ExpectTensorEqual<float>(*expected.get(), *output); +} + TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners2x2To1x1) { // Input: // 1, 2 @@ -302,6 +388,62 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To4x4) { test::ExpectTensorEqual<float>(expected, *GetOutput(0)); } +TEST_F(ResizeBilinearOpTest, TestBilinearRandom183x299To299x299) { + const TensorShape shape({1, 183, 299, 1}); + const Tensor* input = AddRandomImageInput(shape); + AddInputFromArray<int32>(TensorShape({2}), {299, 299}); + TF_ASSERT_OK(RunOpKernel()); + + std::unique_ptr<Tensor> expected( + new Tensor(device_->GetAllocator(AllocatorAttributes()), + DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1}))); + ResizeBilinearBaseline(input->tensor<float, 4>(), + expected->tensor<float, 4>()); + test::ExpectTensorEqual<float>(*expected, *GetOutput(0)); +} + +TEST_F(ResizeBilinearOpTest, TestBilinearRandom141x186To299x299) { + const TensorShape shape({1, 141, 186, 1}); + const Tensor* input = AddRandomImageInput(shape); + AddInputFromArray<int32>(TensorShape({2}), {299, 299}); + TF_ASSERT_OK(RunOpKernel()); + + std::unique_ptr<Tensor> expected( + new Tensor(device_->GetAllocator(AllocatorAttributes()), + DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1}))); + ResizeBilinearBaseline(input->tensor<float, 4>(), + expected->tensor<float, 4>()); + test::ExpectTensorEqual<float>(*expected, *GetOutput(0)); +} + +TEST_F(ResizeBilinearOpTest, TestBilinearRandom749x603To299x299) { + const TensorShape shape({1, 749, 603, 1}); + const Tensor* input = AddRandomImageInput(shape); + AddInputFromArray<int32>(TensorShape({2}), {299, 299}); + TF_ASSERT_OK(RunOpKernel()); + + std::unique_ptr<Tensor> expected( + new Tensor(device_->GetAllocator(AllocatorAttributes()), + DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1}))); + ResizeBilinearBaseline(input->tensor<float, 4>(), + expected->tensor<float, 4>()); + test::ExpectTensorEqual<float>(*expected, *GetOutput(0)); +} + +TEST_F(ResizeBilinearOpTest, TestBilinearRandom299x299To299x299) { + const TensorShape shape({1, 299, 299, 1}); + const Tensor* input = AddRandomImageInput(shape); + AddInputFromArray<int32>(TensorShape({2}), {299, 299}); + TF_ASSERT_OK(RunOpKernel()); + + std::unique_ptr<Tensor> expected( + new Tensor(device_->GetAllocator(AllocatorAttributes()), + DataTypeToEnum<float>::v(), TensorShape({1, 299, 299, 1}))); + ResizeBilinearBaseline(input->tensor<float, 4>(), + expected->tensor<float, 4>()); + test::ExpectTensorEqual<float>(*expected, *GetOutput(0)); +} + TEST_F(ResizeBilinearOpTest, TestInvalidOutputSize) { AddInputFromArray<float>(TensorShape({1, 2, 2, 1}), {1, 2, 3, 4}); AddInputFromArray<int32>(TensorShape({2}), {0, 0}); diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index 6c4f20a23a..6a3f3dfc77 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -1386,21 +1386,21 @@ void wrapper_libxsmm_spmdm_createSparseSlice_generic_thread( void wrapper_libxsmm_spmdm_compute_generic_thread( empty_type_wrapper<bfloat16>, const libxsmm_spmdm_handle* handle, char transA, char transB, const bfloat16* alpha, - libxsmm_CSR_sparseslice* A_sparse, const bfloat16* B, const bfloat16* beta, - float* C, int block_id, int tid, int nthreads) { + libxsmm_CSR_sparseslice* A_sparse, const bfloat16* B, char transC, + const bfloat16* beta, float* C, int block_id, int tid, int nthreads) { return libxsmm_spmdm_compute_bfloat16_thread( handle, transA, transB, reinterpret_cast<const uint16*>(alpha), A_sparse, - reinterpret_cast<const uint16*>(B), 'N', reinterpret_cast<const uint16*>(beta), - C, block_id, tid, nthreads); + reinterpret_cast<const uint16*>(B), transC, + reinterpret_cast<const uint16*>(beta), C, block_id, tid, nthreads); } void wrapper_libxsmm_spmdm_compute_generic_thread( empty_type_wrapper<float>, const libxsmm_spmdm_handle* handle, char transA, char transB, const float* alpha, libxsmm_CSR_sparseslice* A_sparse, - const float* B, const float* beta, float* C, int block_id, int tid, - int nthreads) { + const float* B, char transC, const float* beta, float* C, int block_id, + int tid, int nthreads) { return libxsmm_spmdm_compute_fp32_thread(handle, transA, transB, alpha, - A_sparse, B, 'N', beta, C, block_id, tid, - nthreads); + A_sparse, B, transC, beta, C, + block_id, tid, nthreads); } class PinnedToCurrentCPU { @@ -1438,7 +1438,7 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute( const typename LibxsmmSparseMatMul<TL, TR>::ConstMatrixMapR& right, bool transpose_left, const DeviceBase::CpuWorkerThreads* thread_pool, bool transpose_output, MatrixMap* output) { - if (transpose_output || transpose_left) { + if (false) { // Not handled by libxsmm currently SparseMatMul<TL, TR>::Compute( nullptr /* Assumes no cached data for fallback */, left, right, @@ -1455,7 +1455,6 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute( (transpose_output ? output->dimension(1) : output->dimension(0))); CHECK_EQ(right_dim1, (transpose_output ? output->dimension(0) : output->dimension(1))); - CHECK(!transpose_output); if (left_dim0 < 32 || left_dim1 < 32 || right_dim1 < 32) { // Causes problems in libxsmm SparseMatMul<TL, TR>::Compute( @@ -1482,7 +1481,7 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute( if (work_item >= total_num_creation_blocks) break; wrapper_libxsmm_spmdm_createSparseSlice_generic_thread( empty_type_wrapper<TL>{}, &entry->handle, - (transpose_left ? 'T' : 'N'), left_data, entry->output_csr, work_item, + (transpose_left ? 'Y' : 'N'), left_data, entry->output_csr, work_item, i, num_threads); } }); @@ -1504,8 +1503,9 @@ inline void LibxsmmSparseMatMul<TL, TR>::Compute( const TL beta(0.0); // Stored in a variable so we can get a pointer wrapper_libxsmm_spmdm_compute_generic_thread( empty_type_wrapper<TL>{}, &entry->handle, - (transpose_left ? 'T' : 'N'), 'N', &alpha, entry->output_csr, - right_data, &beta, output_data, work_item, i, num_threads); + (transpose_left ? 'Y' : 'N'), 'N', &alpha, entry->output_csr, + right_data, (transpose_output ? 'Y' : 'N'), &beta, output_data, + work_item, i, num_threads); } }); // Put handle + CSR storage back into cache diff --git a/tensorflow/core/kernels/variable_ops.h b/tensorflow/core/kernels/variable_ops.h index d8d8831702..2839c3d8cf 100644 --- a/tensorflow/core/kernels/variable_ops.h +++ b/tensorflow/core/kernels/variable_ops.h @@ -53,29 +53,29 @@ class VariableOp : public OpKernel { dtype_ = RemoveRefType(context->output_type(0)); } - ~VariableOp() override { - if (var_) var_->Unref(); - } - void Compute(OpKernelContext* ctx) override { mutex_lock l(init_mu_); - if (var_ == nullptr) { - OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(), - true /* use name() */)); - auto creator = [this](Var** var) { - *var = new Var(dtype_); - (*var)->tensor()->set_shape(shape_); - return Status::OK(); - }; - OP_REQUIRES_OK(ctx, - cinfo_.resource_manager()->LookupOrCreate<Var>( - cinfo_.container(), cinfo_.name(), &var_, creator)); + if (!initialized_) { + OP_REQUIRES_OK( + ctx, + cinfo_.Init(ctx->resource_manager(), def(), true /* use name() */)); + initialized_ = true; } + auto creator = [this](Var** var) { + *var = new Var(dtype_); + (*var)->tensor()->set_shape(shape_); + return Status::OK(); + }; + Var* var; + OP_REQUIRES_OK(ctx, + cinfo_.resource_manager()->LookupOrCreate<Var>( + cinfo_.container(), cinfo_.name(), &var, creator)); // Output a reference to our tensor, so it may be updated. // - // As long as *this is alive, the ref we return here is valid - // because *this owns a ref on var_. - ctx->set_output_ref(0, var_->mu(), var_->tensor()); + // As long as the resource manager hasn't been cleared the ref we return + // here is valid because it owns a ref on var. + ctx->set_output_ref(0, var->mu(), var->tensor()); + var->Unref(); } private: @@ -84,7 +84,7 @@ class VariableOp : public OpKernel { mutex init_mu_; ContainerInfo cinfo_ GUARDED_BY(init_mu_); - Var* var_ GUARDED_BY(init_mu_) = nullptr; + bool initialized_ GUARDED_BY(init_mu_){false}; TF_DISALLOW_COPY_AND_ASSIGN(VariableOp); }; diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 7ce667675d..d61e7b32de 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -1226,11 +1226,9 @@ Equivalent to np.full // -------------------------------------------------------------------------- REGISTER_OP("_ParallelConcatStart") - .Input("shape: Tshape") .Output("output: dtype") + .Attr("shape: shape") .Attr("dtype: type") - .Attr("Tshape: {int32, int64} = DT_INT32") - .Attr("init: bool = false") .SetIsStateful() .SetShapeFn([](InferenceContext* c) { ShapeHandle out; @@ -1246,44 +1244,27 @@ conjunction with inplace operations. shape: 1-D `Tensor` indicating the shape of the output. dtype: The element type of the returned tensor. -init: `bool` indicating whether or not to zero the allocated memory. output: An empty Tensor of the specified type. )doc"); // -------------------------------------------------------------------------- REGISTER_OP("_ParallelConcatUpdate") .Input("value: T") - .Input("loc: Tshape") .Input("update: T") .Output("output: T") .Attr("T: type") - .Attr("Tshape: {int32, int64} = DT_INT32") + .Attr("loc: int") .SetShapeFn(shape_inference::UnchangedShape) .Doc(R"doc( Updates input `value` at `loc` with `update`. -If `loc` is None, `value` and `update` must be the same size. -``` -value = update -``` - -If `loc` is a scalar, `value` has rank 1 higher than `update` -``` -value[i, :] = update -``` - -If `loc` is a vector, `value` has the same rank as `update` -``` -value[loc, :] = update -``` - If you use this function you will almost certainly want to add a control dependency as done in the implementation of parallel_stack to avoid race conditions. value: A `Tensor` object that will be updated in-place. -loc: A scalar or 1-D `Tensor` indicating the indices of the first dimension - such that value[loc, :] is updated. +loc: A scalar indicating the index of the first dimension such that + value[loc, :] is updated. update: A `Tensor` of rank one less than `value` if `loc` is a scalar, otherwise of rank equal to `value` that contains the new values for `value`. @@ -1917,7 +1898,7 @@ This op first slices `input` along the dimension `batch_dim`, and for each slice `i`, reverses the first `seq_lengths[i]` elements along the dimension `seq_dim`. -The elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`, +The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`, and `seq_lengths` must be a vector of length `input.dims[batch_dim]`. The output slice `i` along dimension `batch_dim` is then given by input @@ -1970,7 +1951,7 @@ output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...] input: The input to reverse. seq_lengths: 1-D with length `input.dims(batch_dim)` and - `max(seq_lengths) < input.dims(seq_dim)` + `max(seq_lengths) <= input.dims(seq_dim)` seq_dim: The dimension which is partially reversed. batch_dim: The dimension along which reversal is performed. output: The partially reversed input. It has the same shape as `input`. diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt index cfb7504664..49297ae409 100644 --- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt @@ -23219,6 +23219,53 @@ op { } } op { + name: "RecordInput" + output_arg { + name: "records" + type: DT_STRING + } + attr { + name: "file_pattern" + type: "string" + } + attr { + name: "file_random_seed" + type: "int" + default_value { + i: 301 + } + } + attr { + name: "file_shuffle_shift_ratio" + type: "float" + default_value { + f: 0 + } + } + attr { + name: "file_buffer_size" + type: "int" + default_value { + i: 10000 + } + } + attr { + name: "file_parallelism" + type: "int" + default_value { + i: 16 + } + } + attr { + name: "batch_size" + type: "int" + default_value { + i: 32 + } + } + is_stateful: true +} +op { name: "ReduceJoin" input_arg { name: "inputs" diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc index 54e766e8e9..a19d9483a1 100644 --- a/tensorflow/core/ops/data_flow_ops.cc +++ b/tensorflow/core/ops/data_flow_ops.cc @@ -2211,4 +2211,27 @@ dequeue with many fewer capabilities and options. This Op is optimized for performance. )doc"); +REGISTER_OP("RecordInput") + .Output("records: string") + .Attr("file_pattern: string") + .Attr("file_random_seed: int = 301") + .Attr("file_shuffle_shift_ratio: float = 0") + .Attr("file_buffer_size: int = 10000") + .Attr("file_parallelism: int = 16") + .Attr("batch_size: int = 32") + .SetIsStateful() + .SetShapeFn(shape_inference::UnknownShape) + .Doc(R"doc( +Emits randomized records. + +records: A tensor of shape [batch_size]. +file_pattern: Glob pattern for the data files. +file_random_seed: Random seeds used to produce randomized records. +file_shuffle_shift_ratio: Shifts the list of files after the list is randomly + shuffled. +file_buffer_size: The randomization shuffling buffer. +file_parallelism: How many sstables are opened and concurrently iterated over. +batch_size: The batch size. +)doc"); + } // namespace tensorflow diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index e631c289c6..937e9f588c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -15028,6 +15028,61 @@ op { description: "Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`\nis the corresponding input gradient." } op { + name: "RecordInput" + output_arg { + name: "records" + description: "A tensor of shape [batch_size]." + type: DT_STRING + } + attr { + name: "file_pattern" + type: "string" + description: "Glob pattern for the data files." + } + attr { + name: "file_random_seed" + type: "int" + default_value { + i: 301 + } + description: "Random seeds used to produce randomized records." + } + attr { + name: "file_shuffle_shift_ratio" + type: "float" + default_value { + f: 0 + } + description: "Shifts the list of files after the list is randomly\nshuffled." + } + attr { + name: "file_buffer_size" + type: "int" + default_value { + i: 10000 + } + description: "The randomization shuffling buffer." + } + attr { + name: "file_parallelism" + type: "int" + default_value { + i: 16 + } + description: "How many sstables are opened and concurrently iterated over." + } + attr { + name: "batch_size" + type: "int" + default_value { + i: 32 + } + description: "The batch size." + } + summary: "Emits randomized records." + is_stateful: true +} +op { name: "ReduceJoin" input_arg { name: "inputs" @@ -17453,7 +17508,7 @@ op { } input_arg { name: "seq_lengths" - description: "1-D with length `input.dims(batch_dim)` and\n`max(seq_lengths) < input.dims(seq_dim)`" + description: "1-D with length `input.dims(batch_dim)` and\n`max(seq_lengths) <= input.dims(seq_dim)`" type_attr: "Tlen" } output_arg { @@ -17492,7 +17547,7 @@ op { } } summary: "Reverses variable length slices." - description: "This op first slices `input` along the dimension `batch_dim`, and for each\nslice `i`, reverses the first `seq_lengths[i]` elements along\nthe dimension `seq_dim`.\n\nThe elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`,\nand `seq_lengths` must be a vector of length `input.dims[batch_dim]`.\n\nThe output slice `i` along dimension `batch_dim` is then given by input\nslice `i`, with the first `seq_lengths[i]` slices along dimension\n`seq_dim` reversed.\n\nFor example:\n\n```prettyprint\n# Given this:\nbatch_dim = 0\nseq_dim = 1\ninput.dims = (4, 8, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]\noutput[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]\noutput[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]\noutput[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[0, 7:, :, ...] = input[0, 7:, :, ...]\noutput[1, 2:, :, ...] = input[1, 2:, :, ...]\noutput[2, 3:, :, ...] = input[2, 3:, :, ...]\noutput[3, 2:, :, ...] = input[3, 2:, :, ...]\n```\n\nIn contrast, if:\n\n```prettyprint\n# Given this:\nbatch_dim = 2\nseq_dim = 0\ninput.dims = (8, ?, 4, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]\noutput[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]\noutput[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]\noutput[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]\noutput[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]\noutput[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]\noutput[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]\n```" + description: "This op first slices `input` along the dimension `batch_dim`, and for each\nslice `i`, reverses the first `seq_lengths[i]` elements along\nthe dimension `seq_dim`.\n\nThe elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,\nand `seq_lengths` must be a vector of length `input.dims[batch_dim]`.\n\nThe output slice `i` along dimension `batch_dim` is then given by input\nslice `i`, with the first `seq_lengths[i]` slices along dimension\n`seq_dim` reversed.\n\nFor example:\n\n```prettyprint\n# Given this:\nbatch_dim = 0\nseq_dim = 1\ninput.dims = (4, 8, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]\noutput[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]\noutput[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]\noutput[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[0, 7:, :, ...] = input[0, 7:, :, ...]\noutput[1, 2:, :, ...] = input[1, 2:, :, ...]\noutput[2, 3:, :, ...] = input[2, 3:, :, ...]\noutput[3, 2:, :, ...] = input[3, 2:, :, ...]\n```\n\nIn contrast, if:\n\n```prettyprint\n# Given this:\nbatch_dim = 2\nseq_dim = 0\ninput.dims = (8, ?, 4, ...)\nseq_lengths = [7, 2, 3, 5]\n\n# then slices of input are reversed on seq_dim, but only up to seq_lengths:\noutput[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]\noutput[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]\noutput[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]\noutput[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]\n\n# while entries past seq_lens are copied through:\noutput[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]\noutput[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]\noutput[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]\noutput[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]\n```" } op { name: "ReverseV2" diff --git a/tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc new file mode 100644 index 0000000000..6f852a653f --- /dev/null +++ b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.cc @@ -0,0 +1,37 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h" + +#include <chrono> + +namespace tensorflow { + +void ClockCycleProfiler::DumpStatistics(const string& tag) { + CHECK(!IsStarted()); + const double average_clock_cycle = GetAverageClockCycle(); + const double count = GetCount(); + const std::chrono::duration<double> average_time = + profile_utils::CpuUtils::ConvertClockCycleToTime( + static_cast<int64>(average_clock_cycle + 0.5)); + LOG(INFO) << tag << ": average = " + << std::chrono::duration_cast<std::chrono::microseconds>( + average_time) + .count() + << " us (" << average_clock_cycle << " cycles)" + << ", count = " << count; +} + +} // namespace tensorflow diff --git a/tensorflow/core/platform/profile_utils/clock_cycle_profiler.h b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.h new file mode 100644 index 0000000000..876bb9c020 --- /dev/null +++ b/tensorflow/core/platform/profile_utils/clock_cycle_profiler.h @@ -0,0 +1,104 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PLATFORM_PROFILE_UTILS_CLOCK_CYCLE_PROFILER_H_ +#define TENSORFLOW_PLATFORM_PROFILE_UTILS_CLOCK_CYCLE_PROFILER_H_ + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/profile_utils/cpu_utils.h" + +namespace tensorflow { + +class ClockCycleProfiler { + public: + ClockCycleProfiler() = default; + + // Start counting clock cycle. + inline void Start() { + CHECK(!IsStarted()) << "Profiler has been already started."; + start_clock_ = GetCurrentClockCycleInternal(); + } + + // Stop counting clock cycle. + inline void Stop() { + CHECK(IsStarted()) << "Profiler is not started yet."; + AccumulateClockCycle(); + } + + // Get how many times Start() is called. + inline double GetCount() { + CHECK(!IsStarted()); + return count_; + } + + // Get average clock cycle. + inline double GetAverageClockCycle() { + CHECK(!IsStarted()); + return average_clock_cycle_; + } + + // TODO(satok): Support more statistics (e.g. standard deviation) + // Get worst clock cycle. + inline double GetWorstClockCycle() { + CHECK(!IsStarted()); + return worst_clock_cycle_; + } + + // Dump statistics + void DumpStatistics(const string& tag); + + private: + inline uint64 GetCurrentClockCycleInternal() { + const uint64 clockCycle = profile_utils::CpuUtils::GetCurrentClockCycle(); + if (clockCycle <= 0) { + if (valid_) { + LOG(WARNING) << "GetCurrentClockCycle is not implemented." + << " Return 1 instead."; + valid_ = false; + } + return 1; + } else { + return clockCycle; + } + } + + inline bool IsStarted() const { return start_clock_ > 0; } + + inline void AccumulateClockCycle() { + const uint64 now = GetCurrentClockCycleInternal(); + const double clock_diff = static_cast<double>(now - start_clock_); + const double next_count = count_ + 1.0; + const double next_count_inv = 1.0 / next_count; + const double next_ave_cpu_clock = + next_count_inv * (average_clock_cycle_ * count_ + clock_diff); + count_ = next_count; + average_clock_cycle_ = next_ave_cpu_clock; + worst_clock_cycle_ = std::max(worst_clock_cycle_, clock_diff); + start_clock_ = 0; + } + + uint64 start_clock_{0}; + double count_{0.0}; + double average_clock_cycle_{0.0}; + double worst_clock_cycle_{0.0}; + bool valid_{true}; + + TF_DISALLOW_COPY_AND_ASSIGN(ClockCycleProfiler); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_PLATFORM_PROFILE_UTILS_CLOCK_CYCLE_PROFILER_H_ diff --git a/tensorflow/core/platform/profile_utils/cpu_utils_test.cc b/tensorflow/core/platform/profile_utils/cpu_utils_test.cc index 7cbd994661..fccc4d38a7 100644 --- a/tensorflow/core/platform/profile_utils/cpu_utils_test.cc +++ b/tensorflow/core/platform/profile_utils/cpu_utils_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/platform/profile_utils/cpu_utils.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -68,5 +69,18 @@ TEST_F(CpuUtilsTest, CheckMicroSecPerClock) { } } +TEST_F(CpuUtilsTest, SimpleUsageOfClockCycleProfiler) { + static constexpr int LOOP_COUNT = 10; + ClockCycleProfiler prof; + for (int i = 0; i < LOOP_COUNT; ++i) { + prof.Start(); + prof.Stop(); + } + EXPECT_EQ(LOOP_COUNT, static_cast<int>(prof.GetCount() + 0.5)); + if (DBG) { + prof.DumpStatistics("CpuUtilsTest"); + } +} + } // namespace profile_utils } // namespace tensorflow diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index 0c1cea5fc3..c795ba67a8 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -39,7 +39,6 @@ cc_binary( "notap", ], deps = [ - ":demo_proto_lib_cc", "//tensorflow/contrib/android:android_tensorflow_inference_jni", "//tensorflow/core:android_tensorflow_lib", LINKER_SCRIPT, @@ -118,20 +117,3 @@ filegroup( ) exports_files(["AndroidManifest.xml"]) - -load( - "//tensorflow/core:platform/default/build_config.bzl", - "tf_proto_library", -) - -tf_proto_library( - name = "demo_proto_lib", - srcs = glob( - ["**/*.proto"], - ), - cc_api_version = 2, - visibility = ["//visibility:public"], -) - -# ----------------------------------------------------------------------------- -# Google-internal targets go here (must be at the end). diff --git a/tensorflow/examples/android/jni/box_coder_jni.cc b/tensorflow/examples/android/jni/box_coder_jni.cc deleted file mode 100644 index be85414fc1..0000000000 --- a/tensorflow/examples/android/jni/box_coder_jni.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file loads the box coder mappings. - -#include <android/asset_manager.h> -#include <android/asset_manager_jni.h> -#include <android/bitmap.h> - -#include <jni.h> -#include <pthread.h> -#include <sys/stat.h> -#include <unistd.h> -#include <map> -#include <queue> -#include <sstream> -#include <string> - -#include "tensorflow/contrib/android/jni/jni_utils.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow/core/platform/types.h" - -#include "tensorflow/examples/android/proto/box_coder.pb.h" - -#define TENSORFLOW_METHOD(METHOD_NAME) \ - Java_org_tensorflow_demo_TensorFlowMultiBoxDetector_##METHOD_NAME // NOLINT - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -JNIEXPORT void JNICALL TENSORFLOW_METHOD(loadCoderOptions)( - JNIEnv* env, jobject thiz, jobject java_asset_manager, jstring location, - jfloatArray priors); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -JNIEXPORT void JNICALL TENSORFLOW_METHOD(loadCoderOptions)( - JNIEnv* env, jobject thiz, jobject java_asset_manager, jstring location, - jfloatArray priors) { - AAssetManager* const asset_manager = - AAssetManager_fromJava(env, java_asset_manager); - LOG(INFO) << "Acquired AssetManager."; - - const std::string location_str = GetString(env, location); - - org_tensorflow_demo::MultiBoxCoderOptions multi_options; - - LOG(INFO) << "Reading file to proto: " << location_str; - ReadFileToProtoOrDie(asset_manager, location_str.c_str(), &multi_options); - - LOG(INFO) << "Read file. " << multi_options.box_coder_size() << " entries."; - - jboolean iCopied = JNI_FALSE; - jfloat* values = env->GetFloatArrayElements(priors, &iCopied); - - const int array_length = env->GetArrayLength(priors); - LOG(INFO) << "Array length: " << array_length - << " (/8 = " << (array_length / 8) << ")"; - CHECK_EQ(array_length % 8, 0); - - const int num_items = - std::min(array_length / 8, multi_options.box_coder_size()); - - for (int i = 0; i < num_items; ++i) { - const org_tensorflow_demo::BoxCoderOptions& options = - multi_options.box_coder(i); - - for (int j = 0; j < 4; ++j) { - const org_tensorflow_demo::BoxCoderPrior& prior = options.priors(j); - values[i * 8 + j * 2] = prior.mean(); - values[i * 8 + j * 2 + 1] = prior.stddev(); - } - } - env->ReleaseFloatArrayElements(priors, values, 0); - - LOG(INFO) << "Read " << num_items << " options"; -} diff --git a/tensorflow/examples/android/proto/box_coder.proto b/tensorflow/examples/android/proto/box_coder.proto deleted file mode 100644 index 8576294110..0000000000 --- a/tensorflow/examples/android/proto/box_coder.proto +++ /dev/null @@ -1,42 +0,0 @@ -syntax = "proto2"; - -package org_tensorflow_demo; - -// Prior for a single feature (like minimum x coordinate, width, area, etc.) -message BoxCoderPrior { - optional float mean = 1 [default = 0.0]; - optional float stddev = 2 [default = 1.0]; -}; - -// Box encoding/decoding configuration for a single box. -message BoxCoderOptions { - // Number of priors must match the number of values used to encoded - // values which is derived from the use_... flags below. - repeated BoxCoderPrior priors = 1; - - // Minimum/maximum X/Y of the four corners are used as features. - // Order: MinX, MinY, MaxX, MaxY. - // Number of values: 4. - optional bool use_corners = 2 [default = true]; - - // Width and height of the box in this order. - // Number of values: 2. - optional bool use_width_height = 3 [default = false]; - - // Coordinates of the center of the box. - // Order: X, Y. - // Number of values: 2. - optional bool use_center = 4 [default = false]; - - // Area of the box. - // Number of values: 1. - optional bool use_area = 5 [default = false]; -}; - -// Options for MultiBoxCoder which is a encoder/decoder for a fixed number of -// boxes. -// A list of BoxCoderOptions that allows for storing multiple box coder options -// in a single file. -message MultiBoxCoderOptions { - repeated BoxCoderOptions box_coder = 1; -}; diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java index 9ab5a7108a..d06f2d3c0f 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/DetectorActivity.java @@ -60,7 +60,7 @@ public class DetectorActivity extends CameraActivity implements OnImageAvailable private static final String MB_OUTPUT_NAMES = "output_locations/Reshape,output_scores/Reshape"; private static final String MB_MODEL_FILE = "file:///android_asset/multibox_model.pb"; private static final String MB_LOCATION_FILE = - "file:///android_asset/multibox_location_priors.pb"; + "file:///android_asset/multibox_location_priors.txt"; // Configuration values for tiny-yolo-voc. Note that the graph is not included with TensorFlow and // must be manually placed in the assets/ directory by the user. diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java index e438956c7d..34a4361626 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowMultiBoxDetector.java @@ -19,10 +19,16 @@ import android.content.res.AssetManager; import android.graphics.Bitmap; import android.graphics.RectF; import android.os.Trace; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.PriorityQueue; +import java.util.StringTokenizer; import org.tensorflow.contrib.android.TensorFlowInferenceInterface; import org.tensorflow.demo.env.Logger; @@ -80,7 +86,7 @@ public class TensorFlowMultiBoxDetector implements Classifier { final float imageStd, final String inputName, final String outputName) { - TensorFlowMultiBoxDetector d = new TensorFlowMultiBoxDetector(); + final TensorFlowMultiBoxDetector d = new TensorFlowMultiBoxDetector(); d.inputName = inputName; d.inputSize = inputSize; d.imageMean = imageMean; @@ -89,7 +95,11 @@ public class TensorFlowMultiBoxDetector implements Classifier { d.boxPriors = new float[numLocations * 8]; - d.loadCoderOptions(assetManager, locationFilename, d.boxPriors); + try { + d.loadCoderOptions(assetManager, locationFilename, d.boxPriors); + } catch (final IOException e) { + throw new RuntimeException("Error initializing box priors from " + locationFilename); + } // Pre-allocate buffers. d.outputNames = outputName.split(","); @@ -110,9 +120,42 @@ public class TensorFlowMultiBoxDetector implements Classifier { private TensorFlowMultiBoxDetector() {} - // Load BoxCoderOptions from native code. - private native void loadCoderOptions( - AssetManager assetManager, String locationFilename, float[] boxPriors); + private void loadCoderOptions( + final AssetManager assetManager, final String locationFilename, final float[] boxPriors) + throws IOException { + // Try to be intelligent about opening from assets or sdcard depending on prefix. + final String assetPrefix = "file:///android_asset/"; + InputStream is; + if (locationFilename.startsWith(assetPrefix)) { + is = assetManager.open(locationFilename.split(assetPrefix)[1]); + } else { + is = new FileInputStream(locationFilename); + } + + // Read values. Number of values per line doesn't matter, as long as they are separated + // by commas and/or whitespace, and there are exactly numLocations * 8 values total. + // Values are in the order mean, std for each consecutive corner of each box, for a total of 8 + // per location. + final BufferedReader reader = new BufferedReader(new InputStreamReader(is)); + int priorIndex = 0; + String line; + while ((line = reader.readLine()) != null) { + final StringTokenizer st = new StringTokenizer(line, ", "); + while (st.hasMoreTokens()) { + final String token = st.nextToken(); + try { + final float number = Float.parseFloat(token); + boxPriors[priorIndex++] = number; + } catch (final NumberFormatException e) { + // Silently ignore. + } + } + } + if (priorIndex != boxPriors.length) { + throw new RuntimeException( + "BoxPrior length mismatch: " + priorIndex + " vs " + boxPriors.length); + } + } private float[] decodeLocationsEncoding(final float[] locationEncoding) { final float[] locations = new float[locationEncoding.length]; @@ -216,7 +259,7 @@ public class TensorFlowMultiBoxDetector implements Classifier { } @Override - public void enableStatLogging(boolean debug) { + public void enableStatLogging(final boolean debug) { inferenceInterface.enableStatLogging(debug); } diff --git a/tensorflow/examples/label_image/main.cc b/tensorflow/examples/label_image/main.cc index 544b1b2738..08e6e4544a 100644 --- a/tensorflow/examples/label_image/main.cc +++ b/tensorflow/examples/label_image/main.cc @@ -97,7 +97,7 @@ Status ReadTensorFromImageFile(string file_name, const int input_height, file_name); // Now try to figure out what kind of file it is and decode it. const int wanted_channels = 3; - Output image_reader; + tensorflow::Output image_reader; if (tensorflow::StringPiece(file_name).ends_with(".png")) { image_reader = DecodePng(root.WithOpName("png_reader"), file_reader, DecodePng::Channels(wanted_channels)); diff --git a/tensorflow/g3doc/api_docs/python/array_ops.md b/tensorflow/g3doc/api_docs/python/array_ops.md index 2dcf6bcca6..cb30382c6b 100644 --- a/tensorflow/g3doc/api_docs/python/array_ops.md +++ b/tensorflow/g3doc/api_docs/python/array_ops.md @@ -1109,7 +1109,7 @@ This op first slices `input` along the dimension `batch_axis`, and for each slice `i`, reverses the first `seq_lengths[i]` elements along the dimension `seq_axis`. -The elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`, +The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`, and `seq_lengths` must be a vector of length `input.dims[batch_dim]`. The output slice `i` along dimension `batch_axis` is then given by input @@ -1166,7 +1166,7 @@ output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...] * <b>`input`</b>: A `Tensor`. The input to reverse. * <b>`seq_lengths`</b>: A `Tensor`. Must be one of the following types: `int32`, `int64`. 1-D with length `input.dims(batch_dim)` and - `max(seq_lengths) < input.dims(seq_dim)` + `max(seq_lengths) <= input.dims(seq_dim)` * <b>`seq_axis`</b>: An `int`. The dimension which is partially reversed. * <b>`batch_axis`</b>: An optional `int`. Defaults to `0`. The dimension along which reversal is performed. diff --git a/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md b/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md index 303a99020c..b5aae70911 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md +++ b/tensorflow/g3doc/api_docs/python/contrib.graph_editor.md @@ -1794,6 +1794,9 @@ This handler is typically used to transform a hidden input tensors. Add the transformed elem to the (renamed) collections of elem. +A collection is renamed only if is not a known key, as described in +`tf.GraphKeys`. + ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md index 1e515d6490..fb790e2f1e 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.learn.md +++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md @@ -485,22 +485,19 @@ The signature of the input_fn accepted by export is changing to be consistent wi - - - -#### `tf.contrib.learn.Estimator.export_savedmodel(*args, **kwargs)` {#Estimator.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.Estimator.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#Estimator.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -509,7 +506,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: @@ -1038,22 +1034,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.DNNClassifier.export_savedmodel(*args, **kwargs)` {#DNNClassifier.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.DNNClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNClassifier.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -1062,7 +1055,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: @@ -1466,22 +1458,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.DNNRegressor.export_savedmodel(*args, **kwargs)` {#DNNRegressor.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.DNNRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNRegressor.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -1490,7 +1479,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: @@ -1890,22 +1878,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.LinearClassifier.export_savedmodel(*args, **kwargs)` {#LinearClassifier.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.LinearClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearClassifier.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -1914,7 +1899,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: @@ -2276,22 +2260,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.LinearRegressor.export_savedmodel(*args, **kwargs)` {#LinearRegressor.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.LinearRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearRegressor.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -2300,7 +2281,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md b/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md index c7e32f0437..dae7162a0d 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md +++ b/tensorflow/g3doc/api_docs/python/contrib.learn.monitors.md @@ -2384,7 +2384,7 @@ Can do early stopping on validation metrics if `early_stopping_rounds` is provided. - - - -#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__} +#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, hooks=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__} Initializes a ValidationMonitor. @@ -2399,6 +2399,8 @@ Initializes a ValidationMonitor. * <b>`every_n_steps`</b>: Check for new checkpoints to evaluate every N steps. If a new checkpoint is found, it is evaluated. See `EveryN`. * <b>`metrics`</b>: See `BaseEstimator.evaluate`. +* <b>`hooks`</b>: A list of `SessionRunHook` hooks to pass to the + `Estimator`'s `evaluate` function. * <b>`early_stopping_rounds`</b>: `int`. If the metric indicated by `early_stopping_metric` does not change according to `early_stopping_metric_minimize` for this many steps, then training diff --git a/tensorflow/g3doc/api_docs/python/contrib.linalg.md b/tensorflow/g3doc/api_docs/python/contrib.linalg.md index cbbffb1e78..509dc10e93 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.linalg.md +++ b/tensorflow/g3doc/api_docs/python/contrib.linalg.md @@ -237,7 +237,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperator.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperator.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperator.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -287,7 +287,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperator.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperator.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperator.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -380,7 +380,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperator.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperator.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperator.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -416,7 +416,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.shape_dynamic(name='shape_dynamic')` {#LinearOperator.shape_dynamic} +#### `tf.contrib.linalg.LinearOperator.shape_tensor(name='shape_tensor')` {#LinearOperator.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -497,7 +497,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperator.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperator.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperator.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. @@ -720,7 +720,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorDiag.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorDiag.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -770,7 +770,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorDiag.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorDiag.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -863,7 +863,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorDiag.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorDiag.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -899,7 +899,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.shape_dynamic(name='shape_dynamic')` {#LinearOperatorDiag.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.shape_tensor(name='shape_tensor')` {#LinearOperatorDiag.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -980,7 +980,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorDiag.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorDiag.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. @@ -1237,7 +1237,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorIdentity.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorIdentity.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -1287,7 +1287,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorIdentity.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorIdentity.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -1380,7 +1380,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorIdentity.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorIdentity.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -1416,7 +1416,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorIdentity.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorIdentity.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -1497,7 +1497,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorIdentity.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorIdentity.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. @@ -1728,7 +1728,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorScaledIdentity.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorScaledIdentity.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -1778,7 +1778,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorScaledIdentity.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorScaledIdentity.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -1878,7 +1878,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorScaledIdentity.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorScaledIdentity.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -1914,7 +1914,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorScaledIdentity.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorScaledIdentity.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -1995,7 +1995,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorScaledIdentity.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorScaledIdentity.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. @@ -2209,7 +2209,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorMatrix.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorMatrix.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -2259,7 +2259,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorMatrix.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorMatrix.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -2352,7 +2352,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorMatrix.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorMatrix.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -2388,7 +2388,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.shape_dynamic(name='shape_dynamic')` {#LinearOperatorMatrix.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.shape_tensor(name='shape_tensor')` {#LinearOperatorMatrix.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -2469,7 +2469,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorMatrix.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorMatrix.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. @@ -2685,7 +2685,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorTriL.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorTriL.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -2735,7 +2735,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorTriL.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorTriL.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -2828,7 +2828,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorTriL.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorTriL.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -2864,7 +2864,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.shape_dynamic(name='shape_dynamic')` {#LinearOperatorTriL.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.shape_tensor(name='shape_tensor')` {#LinearOperatorTriL.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -2945,7 +2945,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorTriL.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorTriL.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. @@ -3172,7 +3172,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorComposition.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorComposition.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -3222,7 +3222,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorComposition.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorComposition.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -3322,7 +3322,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorComposition.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorComposition.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -3358,7 +3358,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.shape_dynamic(name='shape_dynamic')` {#LinearOperatorComposition.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.shape_tensor(name='shape_tensor')` {#LinearOperatorComposition.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -3439,7 +3439,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorComposition.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorComposition.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md index 4d6cf8625c..1537865fc6 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md +++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md @@ -1469,10 +1469,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` @@ -1515,10 +1515,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` @@ -1562,10 +1562,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` @@ -1609,10 +1609,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md index 6009e8262e..f5b1ca422c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.learn.LinearRegressor.md @@ -137,22 +137,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.LinearRegressor.export_savedmodel(*args, **kwargs)` {#LinearRegressor.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.LinearRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearRegressor.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -161,7 +158,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md index a449b2f097..1900385928 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.linalg.LinearOperatorDiag.md @@ -189,7 +189,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorDiag.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorDiag.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -239,7 +239,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorDiag.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorDiag.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -332,7 +332,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorDiag.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorDiag.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -368,7 +368,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.shape_dynamic(name='shape_dynamic')` {#LinearOperatorDiag.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.shape_tensor(name='shape_tensor')` {#LinearOperatorDiag.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -449,7 +449,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorDiag.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorDiag.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorDiag.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md index b950cd5fe6..c6e8c748bf 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.reverse_sequence.md @@ -6,7 +6,7 @@ This op first slices `input` along the dimension `batch_axis`, and for each slice `i`, reverses the first `seq_lengths[i]` elements along the dimension `seq_axis`. -The elements of `seq_lengths` must obey `seq_lengths[i] < input.dims[seq_dim]`, +The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`, and `seq_lengths` must be a vector of length `input.dims[batch_dim]`. The output slice `i` along dimension `batch_axis` is then given by input @@ -63,7 +63,7 @@ output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...] * <b>`input`</b>: A `Tensor`. The input to reverse. * <b>`seq_lengths`</b>: A `Tensor`. Must be one of the following types: `int32`, `int64`. 1-D with length `input.dims(batch_dim)` and - `max(seq_lengths) < input.dims(seq_dim)` + `max(seq_lengths) <= input.dims(seq_dim)` * <b>`seq_axis`</b>: An `int`. The dimension which is partially reversed. * <b>`batch_axis`</b>: An optional `int`. Defaults to `0`. The dimension along which reversal is performed. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md index d649e42181..08de000315 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.LinearClassifier.md @@ -165,22 +165,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.LinearClassifier.export_savedmodel(*args, **kwargs)` {#LinearClassifier.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.LinearClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#LinearClassifier.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -189,7 +186,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md index 2bafff8cdf..b24a86f1e1 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.learn.monitors.ValidationMonitor.md @@ -7,7 +7,7 @@ Can do early stopping on validation metrics if `early_stopping_rounds` is provided. - - - -#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__} +#### `tf.contrib.learn.monitors.ValidationMonitor.__init__(x=None, y=None, input_fn=None, batch_size=None, eval_steps=None, every_n_steps=100, metrics=None, hooks=None, early_stopping_rounds=None, early_stopping_metric='loss', early_stopping_metric_minimize=True, name=None)` {#ValidationMonitor.__init__} Initializes a ValidationMonitor. @@ -22,6 +22,8 @@ Initializes a ValidationMonitor. * <b>`every_n_steps`</b>: Check for new checkpoints to evaluate every N steps. If a new checkpoint is found, it is evaluated. See `EveryN`. * <b>`metrics`</b>: See `BaseEstimator.evaluate`. +* <b>`hooks`</b>: A list of `SessionRunHook` hooks to pass to the + `Estimator`'s `evaluate` function. * <b>`early_stopping_rounds`</b>: `int`. If the metric indicated by `early_stopping_metric` does not change according to `early_stopping_metric_minimize` for this many steps, then training diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md index 5e051e5ba8..ee7140922c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorComposition.md @@ -193,7 +193,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorComposition.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorComposition.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -243,7 +243,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorComposition.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorComposition.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -343,7 +343,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorComposition.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorComposition.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -379,7 +379,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.shape_dynamic(name='shape_dynamic')` {#LinearOperatorComposition.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.shape_tensor(name='shape_tensor')` {#LinearOperatorComposition.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -460,7 +460,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorComposition.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorComposition.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorComposition.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md index 37e711c819..f4d68516dc 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.linalg.LinearOperatorIdentity.md @@ -226,7 +226,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorIdentity.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorIdentity.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -276,7 +276,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorIdentity.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorIdentity.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -369,7 +369,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorIdentity.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorIdentity.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -405,7 +405,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorIdentity.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorIdentity.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -486,7 +486,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorIdentity.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorIdentity.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md index 519d5f253e..e76b7838ed 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.LoggingTensorHook.md @@ -3,7 +3,7 @@ Prints the given tensors once every N local steps or once every N seconds. The tensors will be printed to the log, with `INFO` severity. - - - -#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None)` {#LoggingTensorHook.__init__} +#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None, formatter=None)` {#LoggingTensorHook.__init__} Initializes a LoggingHook monitor. @@ -17,6 +17,8 @@ Initializes a LoggingHook monitor. * <b>`every_n_secs`</b>: `int` or `float`, print the values of `tensors` once every N seconds. Exactly one of `every_n_iter` and `every_n_secs` should be provided. +* <b>`formatter`</b>: function, takes dict of `tag`->`Tensor` and returns a string. + If `None` uses default printing all tensors. ##### Raises: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md index 19cec59080..254e28a70a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.train.MonitoredTrainingSession.md @@ -1,4 +1,4 @@ -### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, config=None)` {#MonitoredTrainingSession} +### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, save_summaries_secs=None, config=None)` {#MonitoredTrainingSession} Creates a `MonitoredSession` for training. @@ -26,8 +26,12 @@ inialize/restore. using a default checkpoint saver. If `save_checkpoint_secs` is set to `None`, then the default checkpoint saver isn't used. * <b>`save_summaries_steps`</b>: The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If - `save_summaries_steps` is set to `None`, then the default summary saver + summaries are written to disk using a default summary saver. If both + `save_summaries_steps` and `save_summaries_secs` are set to `None`, then + the default summary saver isn't used. +* <b>`save_summaries_secs`</b>: The frequency, in secs, that the summaries are written + to disk using a default summary saver. If both `save_summaries_steps` and + `save_summaries_secs` are set to `None`, then the default summary saver isn't used. * <b>`config`</b>: an instance of `tf.ConfigProto` proto used to configure the session. It's the `config` argument of constructor of `tf.Session`. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md index 851a1d2210..eeb4226633 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf_debug.LocalCLIDebugHook.md @@ -34,12 +34,18 @@ Create a local debugger command-line interface (CLI) hook. Add a tensor filter. +See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` for details. +Override default behavior to accomodate the possibility of this method being +called prior to the initialization of the underlying +`LocalCLIDebugWrapperSession` object. + ##### Args: -* <b>`filter_name`</b>: (`str`) name of the filter. -* <b>`tensor_filter`</b>: (`callable`) the filter callable. See the doc string of - `DebugDumpDir.find()` for more details about its signature. +* <b>`filter_name`</b>: See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` + for details. +* <b>`tensor_filter`</b>: See doc of + `LocalCLIDebugWrapperSession.add_tensor_filter()` for details. - - - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md deleted file mode 100644 index 519fd9a875..0000000000 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.neg.md +++ /dev/null @@ -1,16 +0,0 @@ -### `tf.neg(x, name=None)` {#neg} - -Computes numerical negative value element-wise. - -I.e., \\(y = -x\\). - -##### Args: - - -* <b>`x`</b>: A `Tensor`. Must be one of the following types: `half`, `float32`, `float64`, `int32`, `int64`, `complex64`, `complex128`. -* <b>`name`</b>: A name for the operation (optional). - -##### Returns: - - A `Tensor`. Has the same type as `x`. - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md index e1caff4de8..9b900ac378 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md @@ -157,22 +157,19 @@ The signature of the input_fn accepted by export is changing to be consistent wi - - - -#### `tf.contrib.learn.Estimator.export_savedmodel(*args, **kwargs)` {#Estimator.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.Estimator.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#Estimator.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -181,7 +178,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md index 9cef244fe4..f37278eb55 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.linalg.LinearOperatorScaledIdentity.md @@ -200,7 +200,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorScaledIdentity.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorScaledIdentity.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -250,7 +250,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorScaledIdentity.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorScaledIdentity.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -350,7 +350,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorScaledIdentity.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorScaledIdentity.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -386,7 +386,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_dynamic(name='shape_dynamic')` {#LinearOperatorScaledIdentity.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.shape_tensor(name='shape_tensor')` {#LinearOperatorScaledIdentity.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -467,7 +467,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorScaledIdentity.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorScaledIdentity.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorScaledIdentity.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md index dd5d361619..b1f95ca2ae 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.learn.DNNClassifier.md @@ -165,22 +165,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.DNNClassifier.export_savedmodel(*args, **kwargs)` {#DNNClassifier.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.DNNClassifier.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNClassifier.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -189,7 +186,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md index aa3019dbf4..a022639c94 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_true_positives.md @@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md index bcf0156924..0aa696ba2f 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.nn.sparse_softmax_cross_entropy_with_logits.md @@ -28,13 +28,13 @@ this function.** _sentinel: Used to prevent positional parameters. Internal, do not use. -* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or - `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. - Other values will raise an exception when this op is run on CPU, and - return `NaN` for corresponding corresponding loss and gradient rows - on GPU. -* <b>`logits`</b>: Unscaled log probabilities of rank `r` and shape - `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`. +* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of + `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` + must be an index in `[0, num_classes)`. Other values will raise an + exception when this op is run on CPU, and return `NaN` for corresponding + loss and gradient rows on GPU. +* <b>`logits`</b>: Unscaled log probabilities of shape + `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. * <b>`name`</b>: A name for the operation (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md index 872705a482..33e1f1c591 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.train.write_graph.md @@ -27,3 +27,7 @@ tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt') * <b>`name`</b>: Filename for the graph. * <b>`as_text`</b>: If `True`, writes the graph as an ASCII proto. +##### Returns: + + The path of the output proto file. + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md index c31a7c68dc..d3f748fec7 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_false_positives.md @@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md index 05b2eba532..153da470ea 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.graph_editor.assign_renamed_collections_handler.md @@ -2,6 +2,9 @@ Add the transformed elem to the (renamed) collections of elem. +A collection is renamed only if is not a known key, as described in +`tf.GraphKeys`. + ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md index 40bb846034..af1ab47660 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.linalg.LinearOperatorMatrix.md @@ -183,7 +183,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorMatrix.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorMatrix.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -233,7 +233,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorMatrix.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorMatrix.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -326,7 +326,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorMatrix.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorMatrix.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -362,7 +362,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.shape_dynamic(name='shape_dynamic')` {#LinearOperatorMatrix.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.shape_tensor(name='shape_tensor')` {#LinearOperatorMatrix.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -443,7 +443,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorMatrix.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorMatrix.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorMatrix.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md index 5454b65f26..13e8d3395a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.linalg.LinearOperatorTriL.md @@ -185,7 +185,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperatorTriL.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperatorTriL.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -235,7 +235,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperatorTriL.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperatorTriL.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -328,7 +328,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperatorTriL.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperatorTriL.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -364,7 +364,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.shape_dynamic(name='shape_dynamic')` {#LinearOperatorTriL.shape_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.shape_tensor(name='shape_tensor')` {#LinearOperatorTriL.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -445,7 +445,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperatorTriL.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperatorTriL.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperatorTriL.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md index d8f12ab9eb..5b9dfd33f4 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_true_negatives.md @@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md index 5934a587fe..22e7531e78 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.learn.DNNRegressor.md @@ -129,22 +129,19 @@ See BaseEstimator.export. - - - -#### `tf.contrib.learn.DNNRegressor.export_savedmodel(*args, **kwargs)` {#DNNRegressor.export_savedmodel} - -Exports inference graph as a SavedModel into given dir. (experimental) - -THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and without warning. +#### `tf.contrib.learn.DNNRegressor.export_savedmodel(export_dir_base, serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False)` {#DNNRegressor.export_savedmodel} +Exports inference graph as a SavedModel into given dir. ##### Args: * <b>`export_dir_base`</b>: A string containing a directory to write the exported graph and checkpoints. -* <b>`input_fn`</b>: A function that takes no argument and +* <b>`serving_input_fn`</b>: A function that takes no argument and returns an `InputFnOps`. * <b>`default_output_alternative_key`</b>: the name of the head to serve when none is - specified. + specified. Not needed for single-headed models. * <b>`assets_extra`</b>: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. @@ -153,7 +150,6 @@ THIS FUNCTION IS EXPERIMENTAL. It may change or be removed at any time, and with renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. * <b>`as_text`</b>: whether to write the SavedModel proto in text format. -* <b>`exports_to_keep`</b>: Number of exports to keep. ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md index a07c373774..41a5a1cb74 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.linalg.LinearOperator.md @@ -215,7 +215,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.batch_shape_dynamic(name='batch_shape_dynamic')` {#LinearOperator.batch_shape_dynamic} +#### `tf.contrib.linalg.LinearOperator.batch_shape_tensor(name='batch_shape_tensor')` {#LinearOperator.batch_shape_tensor} Shape of batch dimensions of this operator, determined at runtime. @@ -265,7 +265,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.domain_dimension_dynamic(name='domain_dimension_dynamic')` {#LinearOperator.domain_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperator.domain_dimension_tensor(name='domain_dimension_tensor')` {#LinearOperator.domain_dimension_tensor} Dimension (in the sense of vector spaces) of the domain of this operator. @@ -358,7 +358,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.range_dimension_dynamic(name='range_dimension_dynamic')` {#LinearOperator.range_dimension_dynamic} +#### `tf.contrib.linalg.LinearOperator.range_dimension_tensor(name='range_dimension_tensor')` {#LinearOperator.range_dimension_tensor} Dimension (in the sense of vector spaces) of the range of this operator. @@ -394,7 +394,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.shape_dynamic(name='shape_dynamic')` {#LinearOperator.shape_dynamic} +#### `tf.contrib.linalg.LinearOperator.shape_tensor(name='shape_tensor')` {#LinearOperator.shape_tensor} Shape of this `LinearOperator`, determined at runtime. @@ -475,7 +475,7 @@ If this operator acts like the batch matrix `A` with - - - -#### `tf.contrib.linalg.LinearOperator.tensor_rank_dynamic(name='tensor_rank_dynamic')` {#LinearOperator.tensor_rank_dynamic} +#### `tf.contrib.linalg.LinearOperator.tensor_rank_tensor(name='tensor_rank_tensor')` {#LinearOperator.tensor_rank_tensor} Rank (in the sense of tensors) of matrix corresponding to this operator. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md index 878ba46941..1464305257 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.metrics.streaming_false_negatives.md @@ -7,10 +7,10 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ##### Args: -* <b>`predictions`</b>: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. -* <b>`labels`</b>: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. +* <b>`predictions`</b>: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. +* <b>`labels`</b>: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. * <b>`weights`</b>: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md index 9026066f66..c425a3e64b 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_local_variable.md @@ -14,7 +14,7 @@ for an extensive description of how reusing works. Here is a basic example: with tf.variable_scope("foo"): v = tf.get_variable("v", [1]) # v.name == "foo/v:0" w = tf.get_variable("w", [1]) # w.name == "foo/w:0" -with tf.variable_scope("foo", reuse=True) +with tf.variable_scope("foo", reuse=True): v1 = tf.get_variable("v") # The same as v above. ``` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md index c7040d28da..f09098eb51 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.get_variable.md @@ -11,7 +11,7 @@ for an extensive description of how reusing works. Here is a basic example: with tf.variable_scope("foo"): v = tf.get_variable("v", [1]) # v.name == "foo/v:0" w = tf.get_variable("w", [1]) # w.name == "foo/w:0" -with tf.variable_scope("foo", reuse=True) +with tf.variable_scope("foo", reuse=True): v1 = tf.get_variable("v") # The same as v above. ``` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md new file mode 100644 index 0000000000..1797a0d3b5 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FeedFnHook.md @@ -0,0 +1,88 @@ +Runs `feed_fn` and sets the `feed_dict` accordingly. +- - - + +#### `tf.train.FeedFnHook.__init__(feed_fn)` {#FeedFnHook.__init__} + +Constructs the FeedFnHook with given `feed_fn`. + +##### Args: + + +* <b>`feed_fn`</b>: function, no arguments and returns `dict` to feed. + + +- - - + +#### `tf.train.FeedFnHook.after_create_session(session, coord)` {#FeedFnHook.after_create_session} + +Called when new TensorFlow session is created. + +This is called to signal the hooks that a new session has been created. This +has two essential differences with the situation in which `begin` is called: + +* When this is called, the graph is finalized and ops can no longer be added + to the graph. +* This method will also be called as a result of recovering a wrapped + session, not only at the beginning of the overall session. + +##### Args: + + +* <b>`session`</b>: A TensorFlow Session that has been created. +* <b>`coord`</b>: A Coordinator object which keeps track of all threads. + + +- - - + +#### `tf.train.FeedFnHook.after_run(run_context, run_values)` {#FeedFnHook.after_run} + +Called after each call to run(). + +The `run_values` argument contains results of requested ops/tensors by +`before_run()`. + +The `run_context` argument is the same one send to `before_run` call. +`run_context.request_stop()` can be called to stop the iteration. + +##### Args: + + +* <b>`run_context`</b>: A `SessionRunContext` object. +* <b>`run_values`</b>: A SessionRunValues object. + + +- - - + +#### `tf.train.FeedFnHook.before_run(run_context)` {#FeedFnHook.before_run} + + + + +- - - + +#### `tf.train.FeedFnHook.begin()` {#FeedFnHook.begin} + +Called once before using the session. + +When called, the default graph is the one that will be launched in the +session. The hook can modify the graph by adding new operations to it. +After the `begin()` call the graph will be finalized and the other callbacks +can not modify the graph anymore. Second call of `begin()` on the same +graph, should not change the graph. + + +- - - + +#### `tf.train.FeedFnHook.end(session)` {#FeedFnHook.end} + +Called at the end of session. + +The `session` argument can be used in case the hook wants to run final ops, +such as saving a last checkpoint. + +##### Args: + + +* <b>`session`</b>: A TensorFlow Session that will be soon closed. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md new file mode 100644 index 0000000000..bf8e7184b6 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.FinalOpsHook.md @@ -0,0 +1,111 @@ +A run hook which evaluates `Tensors` at the end of a session. +- - - + +#### `tf.train.FinalOpsHook.__init__(final_ops, final_ops_feed_dict=None)` {#FinalOpsHook.__init__} + +Constructs the FinalOpHook with ops to run at the end of the session. + +##### Args: + + +* <b>`final_ops`</b>: A single `Tensor`, a list of `Tensors` or a dictionary of + names to `Tensors`. +* <b>`final_ops_feed_dict`</b>: A feed dictionary to use when running + `final_ops_dict`. + + +- - - + +#### `tf.train.FinalOpsHook.after_create_session(session, coord)` {#FinalOpsHook.after_create_session} + +Called when new TensorFlow session is created. + +This is called to signal the hooks that a new session has been created. This +has two essential differences with the situation in which `begin` is called: + +* When this is called, the graph is finalized and ops can no longer be added + to the graph. +* This method will also be called as a result of recovering a wrapped + session, not only at the beginning of the overall session. + +##### Args: + + +* <b>`session`</b>: A TensorFlow Session that has been created. +* <b>`coord`</b>: A Coordinator object which keeps track of all threads. + + +- - - + +#### `tf.train.FinalOpsHook.after_run(run_context, run_values)` {#FinalOpsHook.after_run} + +Called after each call to run(). + +The `run_values` argument contains results of requested ops/tensors by +`before_run()`. + +The `run_context` argument is the same one send to `before_run` call. +`run_context.request_stop()` can be called to stop the iteration. + +##### Args: + + +* <b>`run_context`</b>: A `SessionRunContext` object. +* <b>`run_values`</b>: A SessionRunValues object. + + +- - - + +#### `tf.train.FinalOpsHook.before_run(run_context)` {#FinalOpsHook.before_run} + +Called before each call to run(). + +You can return from this call a `SessionRunArgs` object indicating ops or +tensors to add to the upcoming `run()` call. These ops/tensors will be run +together with the ops/tensors originally passed to the original run() call. +The run args you return can also contain feeds to be added to the run() +call. + +The `run_context` argument is a `SessionRunContext` that provides +information about the upcoming `run()` call: the originally requested +op/tensors, the TensorFlow Session. + +At this point graph is finalized and you can not add ops. + +##### Args: + + +* <b>`run_context`</b>: A `SessionRunContext` object. + +##### Returns: + + None or a `SessionRunArgs` object. + + +- - - + +#### `tf.train.FinalOpsHook.begin()` {#FinalOpsHook.begin} + +Called once before using the session. + +When called, the default graph is the one that will be launched in the +session. The hook can modify the graph by adding new operations to it. +After the `begin()` call the graph will be finalized and the other callbacks +can not modify the graph anymore. Second call of `begin()` on the same +graph, should not change the graph. + + +- - - + +#### `tf.train.FinalOpsHook.end(session)` {#FinalOpsHook.end} + + + + +- - - + +#### `tf.train.FinalOpsHook.final_ops_values` {#FinalOpsHook.final_ops_values} + + + + diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md index cc3dc0a0e5..424448acfd 100644 --- a/tensorflow/g3doc/api_docs/python/index.md +++ b/tensorflow/g3doc/api_docs/python/index.md @@ -260,7 +260,6 @@ * [`minimum`](../../api_docs/python/math_ops.md#minimum) * [`mod`](../../api_docs/python/math_ops.md#mod) * [`multiply`](../../api_docs/python/math_ops.md#multiply) - * [`neg`](../../api_docs/python/math_ops.md#neg) * [`negative`](../../api_docs/python/math_ops.md#negative) * [`norm`](../../api_docs/python/math_ops.md#norm) * [`polygamma`](../../api_docs/python/math_ops.md#polygamma) @@ -619,6 +618,8 @@ * [`do_quantize_training_on_graphdef`](../../api_docs/python/train.md#do_quantize_training_on_graphdef) * [`exponential_decay`](../../api_docs/python/train.md#exponential_decay) * [`ExponentialMovingAverage`](../../api_docs/python/train.md#ExponentialMovingAverage) + * [`FeedFnHook`](../../api_docs/python/train.md#FeedFnHook) + * [`FinalOpsHook`](../../api_docs/python/train.md#FinalOpsHook) * [`FtrlOptimizer`](../../api_docs/python/train.md#FtrlOptimizer) * [`generate_checkpoint_state_proto`](../../api_docs/python/train.md#generate_checkpoint_state_proto) * [`get_checkpoint_mtimes`](../../api_docs/python/train.md#get_checkpoint_mtimes) diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md index 92b001f898..76636dc6f0 100644 --- a/tensorflow/g3doc/api_docs/python/math_ops.md +++ b/tensorflow/g3doc/api_docs/python/math_ops.md @@ -3720,24 +3720,3 @@ invert_permutation(x) ==> [2, 4, 3, 0, 1] A `Tensor`. Has the same type as `x`. 1-D. - -## Other Functions and Classes -- - - - -### `tf.neg(x, name=None)` {#neg} - -Computes numerical negative value element-wise. - -I.e., \\(y = -x\\). - -##### Args: - - -* <b>`x`</b>: A `Tensor`. Must be one of the following types: `half`, `float32`, `float64`, `int32`, `int64`, `complex64`, `complex128`. -* <b>`name`</b>: A name for the operation (optional). - -##### Returns: - - A `Tensor`. Has the same type as `x`. - - diff --git a/tensorflow/g3doc/api_docs/python/nn.md b/tensorflow/g3doc/api_docs/python/nn.md index 84aaa5c5c9..5d64aaf072 100644 --- a/tensorflow/g3doc/api_docs/python/nn.md +++ b/tensorflow/g3doc/api_docs/python/nn.md @@ -2370,13 +2370,13 @@ this function.** _sentinel: Used to prevent positional parameters. Internal, do not use. -* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or - `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. - Other values will raise an exception when this op is run on CPU, and - return `NaN` for corresponding corresponding loss and gradient rows - on GPU. -* <b>`logits`</b>: Unscaled log probabilities of rank `r` and shape - `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`. +* <b>`labels`</b>: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of + `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` + must be an index in `[0, num_classes)`. Other values will raise an + exception when this op is run on CPU, and return `NaN` for corresponding + loss and gradient rows on GPU. +* <b>`logits`</b>: Unscaled log probabilities of shape + `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. * <b>`name`</b>: A name for the operation (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md index 2db192fddd..9890892b0f 100644 --- a/tensorflow/g3doc/api_docs/python/state_ops.md +++ b/tensorflow/g3doc/api_docs/python/state_ops.md @@ -1943,7 +1943,7 @@ for an extensive description of how reusing works. Here is a basic example: with tf.variable_scope("foo"): v = tf.get_variable("v", [1]) # v.name == "foo/v:0" w = tf.get_variable("w", [1]) # w.name == "foo/w:0" -with tf.variable_scope("foo", reuse=True) +with tf.variable_scope("foo", reuse=True): v1 = tf.get_variable("v") # The same as v above. ``` @@ -2032,7 +2032,7 @@ for an extensive description of how reusing works. Here is a basic example: with tf.variable_scope("foo"): v = tf.get_variable("v", [1]) # v.name == "foo/v:0" w = tf.get_variable("w", [1]) # w.name == "foo/w:0" -with tf.variable_scope("foo", reuse=True) +with tf.variable_scope("foo", reuse=True): v1 = tf.get_variable("v") # The same as v above. ``` diff --git a/tensorflow/g3doc/api_docs/python/tf_debug.md b/tensorflow/g3doc/api_docs/python/tf_debug.md index 28fc9ec502..9dc35ac82e 100644 --- a/tensorflow/g3doc/api_docs/python/tf_debug.md +++ b/tensorflow/g3doc/api_docs/python/tf_debug.md @@ -1216,12 +1216,18 @@ Create a local debugger command-line interface (CLI) hook. Add a tensor filter. +See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` for details. +Override default behavior to accomodate the possibility of this method being +called prior to the initialization of the underlying +`LocalCLIDebugWrapperSession` object. + ##### Args: -* <b>`filter_name`</b>: (`str`) name of the filter. -* <b>`tensor_filter`</b>: (`callable`) the filter callable. See the doc string of - `DebugDumpDir.find()` for more details about its signature. +* <b>`filter_name`</b>: See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` + for details. +* <b>`tensor_filter`</b>: See doc of + `LocalCLIDebugWrapperSession.add_tensor_filter()` for details. - - - diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md index 098418f7a6..ac5ef5fc9f 100644 --- a/tensorflow/g3doc/api_docs/python/train.md +++ b/tensorflow/g3doc/api_docs/python/train.md @@ -1533,6 +1533,7 @@ See [Threading and Queues](../../how_tos/threading_and_queues/index.md) for how to use threads and queues. For documentation on the Queue API, see [Queues](../../api_docs/python/io_ops.md#queues). + - - - ### `class tf.train.Coordinator` {#Coordinator} @@ -1984,6 +1985,233 @@ Converts this `QueueRunner` to a `QueueRunnerDef` protocol buffer. - - - +### `class tf.train.LooperThread` {#LooperThread} + +A thread that runs code repeatedly, optionally on a timer. + +This thread class is intended to be used with a `Coordinator`. It repeatedly +runs code specified either as `target` and `args` or by the `run_loop()` +method. + +Before each run the thread checks if the coordinator has requested stop. In +that case the looper thread terminates immediately. + +If the code being run raises an exception, that exception is reported to the +coordinator and the thread terminates. The coordinator will then request all +the other threads it coordinates to stop. + +You typically pass looper threads to the supervisor `Join()` method. +- - - + +#### `tf.train.LooperThread.__init__(coord, timer_interval_secs, target=None, args=None, kwargs=None)` {#LooperThread.__init__} + +Create a LooperThread. + +##### Args: + + +* <b>`coord`</b>: A Coordinator. +* <b>`timer_interval_secs`</b>: Time boundaries at which to call Run(), or None + if it should be called back to back. +* <b>`target`</b>: Optional callable object that will be executed in the thread. +* <b>`args`</b>: Optional arguments to pass to `target` when calling it. +* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it. + +##### Raises: + + +* <b>`ValueError`</b>: If one of the arguments is invalid. + + +- - - + +#### `tf.train.LooperThread.__repr__()` {#LooperThread.__repr__} + + + + +- - - + +#### `tf.train.LooperThread.daemon` {#LooperThread.daemon} + +A boolean value indicating whether this thread is a daemon thread (True) or not (False). + +This must be set before start() is called, otherwise RuntimeError is +raised. Its initial value is inherited from the creating thread; the +main thread is not a daemon thread and therefore all threads created in +the main thread default to daemon = False. + +The entire Python program exits when no alive non-daemon threads are +left. + + +- - - + +#### `tf.train.LooperThread.getName()` {#LooperThread.getName} + + + + +- - - + +#### `tf.train.LooperThread.ident` {#LooperThread.ident} + +Thread identifier of this thread or None if it has not been started. + +This is a nonzero integer. See the thread.get_ident() function. Thread +identifiers may be recycled when a thread exits and another thread is +created. The identifier is available even after the thread has exited. + + +- - - + +#### `tf.train.LooperThread.isAlive()` {#LooperThread.isAlive} + +Return whether the thread is alive. + +This method returns True just before the run() method starts until just +after the run() method terminates. The module function enumerate() +returns a list of all alive threads. + + +- - - + +#### `tf.train.LooperThread.isDaemon()` {#LooperThread.isDaemon} + + + + +- - - + +#### `tf.train.LooperThread.is_alive()` {#LooperThread.is_alive} + +Return whether the thread is alive. + +This method returns True just before the run() method starts until just +after the run() method terminates. The module function enumerate() +returns a list of all alive threads. + + +- - - + +#### `tf.train.LooperThread.join(timeout=None)` {#LooperThread.join} + +Wait until the thread terminates. + +This blocks the calling thread until the thread whose join() method is +called terminates -- either normally or through an unhandled exception +or until the optional timeout occurs. + +When the timeout argument is present and not None, it should be a +floating point number specifying a timeout for the operation in seconds +(or fractions thereof). As join() always returns None, you must call +isAlive() after join() to decide whether a timeout happened -- if the +thread is still alive, the join() call timed out. + +When the timeout argument is not present or None, the operation will +block until the thread terminates. + +A thread can be join()ed many times. + +join() raises a RuntimeError if an attempt is made to join the current +thread as that would cause a deadlock. It is also an error to join() a +thread before it has been started and attempts to do so raises the same +exception. + + +- - - + +#### `tf.train.LooperThread.loop(coord, timer_interval_secs, target, args=None, kwargs=None)` {#LooperThread.loop} + +Start a LooperThread that calls a function periodically. + +If `timer_interval_secs` is None the thread calls `target(args)` +repeatedly. Otherwise `target(args)` is called every `timer_interval_secs` +seconds. The thread terminates when a stop of the coordinator is +requested. + +##### Args: + + +* <b>`coord`</b>: A Coordinator. +* <b>`timer_interval_secs`</b>: Number. Time boundaries at which to call `target`. +* <b>`target`</b>: A callable object. +* <b>`args`</b>: Optional arguments to pass to `target` when calling it. +* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it. + +##### Returns: + + The started thread. + + +- - - + +#### `tf.train.LooperThread.name` {#LooperThread.name} + +A string used for identification purposes only. + +It has no semantics. Multiple threads may be given the same name. The +initial name is set by the constructor. + + +- - - + +#### `tf.train.LooperThread.run()` {#LooperThread.run} + + + + +- - - + +#### `tf.train.LooperThread.run_loop()` {#LooperThread.run_loop} + +Called at 'timer_interval_secs' boundaries. + + +- - - + +#### `tf.train.LooperThread.setDaemon(daemonic)` {#LooperThread.setDaemon} + + + + +- - - + +#### `tf.train.LooperThread.setName(name)` {#LooperThread.setName} + + + + +- - - + +#### `tf.train.LooperThread.start()` {#LooperThread.start} + +Start the thread's activity. + +It must be called at most once per thread object. It arranges for the +object's run() method to be invoked in a separate thread of control. + +This method will raise a RuntimeError if called more than once on the +same thread object. + + +- - - + +#### `tf.train.LooperThread.start_loop()` {#LooperThread.start_loop} + +Called when the thread starts. + + +- - - + +#### `tf.train.LooperThread.stop_loop()` {#LooperThread.stop_loop} + +Called when the thread stops. + + + +- - - + ### `tf.train.add_queue_runner(qr, collection='queue_runners')` {#add_queue_runner} Adds a `QueueRunner` to a collection in the graph. @@ -3531,7 +3759,7 @@ with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)): - - - -### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, config=None)` {#MonitoredTrainingSession} +### `tf.train.MonitoredTrainingSession(master='', is_chief=True, checkpoint_dir=None, scaffold=None, hooks=None, chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, save_summaries_secs=None, config=None)` {#MonitoredTrainingSession} Creates a `MonitoredSession` for training. @@ -3559,8 +3787,12 @@ inialize/restore. using a default checkpoint saver. If `save_checkpoint_secs` is set to `None`, then the default checkpoint saver isn't used. * <b>`save_summaries_steps`</b>: The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If - `save_summaries_steps` is set to `None`, then the default summary saver + summaries are written to disk using a default summary saver. If both + `save_summaries_steps` and `save_summaries_secs` are set to `None`, then + the default summary saver isn't used. +* <b>`save_summaries_secs`</b>: The frequency, in secs, that the summaries are written + to disk using a default summary saver. If both `save_summaries_steps` and + `save_summaries_secs` are set to `None`, then the default summary saver isn't used. * <b>`config`</b>: an instance of `tf.ConfigProto` proto used to configure the session. It's the `config` argument of constructor of `tf.Session`. @@ -4111,232 +4343,312 @@ for more information about their attributes. -## Training Utilities +## Training Hooks + +Hooks are tools that run in the process of training/evaluation of the model. - - - -### `tf.train.global_step(sess, global_step_tensor)` {#global_step} +### `class tf.train.SessionRunHook` {#SessionRunHook} -Small helper to get the global step. +Hook to extend calls to MonitoredSession.run(). +- - - -```python -# Creates a variable to hold the global_step. -global_step_tensor = tf.Variable(10, trainable=False, name='global_step') -# Creates a session. -sess = tf.Session() -# Initializes the variable. -print('global_step: %s' % tf.train.global_step(sess, global_step_tensor)) +#### `tf.train.SessionRunHook.after_create_session(session, coord)` {#SessionRunHook.after_create_session} -global_step: 10 -``` +Called when new TensorFlow session is created. -##### Args: +This is called to signal the hooks that a new session has been created. This +has two essential differences with the situation in which `begin` is called: +* When this is called, the graph is finalized and ops can no longer be added + to the graph. +* This method will also be called as a result of recovering a wrapped + session, not only at the beginning of the overall session. -* <b>`sess`</b>: A TensorFlow `Session` object. -* <b>`global_step_tensor`</b>: `Tensor` or the `name` of the operation that contains - the global step. +##### Args: -##### Returns: - The global step value. +* <b>`session`</b>: A TensorFlow Session that has been created. +* <b>`coord`</b>: A Coordinator object which keeps track of all threads. - - - -### `tf.train.basic_train_loop(supervisor, train_step_fn, args=None, kwargs=None, master='')` {#basic_train_loop} - -Basic loop to train a model. +#### `tf.train.SessionRunHook.after_run(run_context, run_values)` {#SessionRunHook.after_run} -Calls `train_step_fn` in a loop to train a model. The function is called as: +Called after each call to run(). -```python -train_step_fn(session, *args, **kwargs) -``` +The `run_values` argument contains results of requested ops/tensors by +`before_run()`. -It is passed a `tf.Session` in addition to `args` and `kwargs`. The function -typically runs one training step in the session. +The `run_context` argument is the same one send to `before_run` call. +`run_context.request_stop()` can be called to stop the iteration. ##### Args: -* <b>`supervisor`</b>: `tf.Supervisor` to run the training services. -* <b>`train_step_fn`</b>: Callable to execute one training step. Called - repeatedly as `train_step_fn(session, *args **kwargs)`. -* <b>`args`</b>: Optional positional arguments passed to `train_step_fn`. -* <b>`kwargs`</b>: Optional keyword arguments passed to `train_step_fn`. -* <b>`master`</b>: Master to use to create the training session. Defaults to - `""` which causes the session to be created in the local process. +* <b>`run_context`</b>: A `SessionRunContext` object. +* <b>`run_values`</b>: A SessionRunValues object. - - - -### `tf.train.get_global_step(graph=None)` {#get_global_step} +#### `tf.train.SessionRunHook.before_run(run_context)` {#SessionRunHook.before_run} -Get the global step tensor. +Called before each call to run(). -The global step tensor must be an integer variable. We first try to find it -in the collection `GLOBAL_STEP`, or by name `global_step:0`. +You can return from this call a `SessionRunArgs` object indicating ops or +tensors to add to the upcoming `run()` call. These ops/tensors will be run +together with the ops/tensors originally passed to the original run() call. +The run args you return can also contain feeds to be added to the run() +call. + +The `run_context` argument is a `SessionRunContext` that provides +information about the upcoming `run()` call: the originally requested +op/tensors, the TensorFlow Session. + +At this point graph is finalized and you can not add ops. ##### Args: -* <b>`graph`</b>: The graph to find the global step in. If missing, use default graph. +* <b>`run_context`</b>: A `SessionRunContext` object. ##### Returns: - The global step variable, or `None` if none was found. + None or a `SessionRunArgs` object. -##### Raises: +- - - -* <b>`TypeError`</b>: If the global step tensor has a non-integer type, or if it is not - a `Variable`. +#### `tf.train.SessionRunHook.begin()` {#SessionRunHook.begin} + +Called once before using the session. + +When called, the default graph is the one that will be launched in the +session. The hook can modify the graph by adding new operations to it. +After the `begin()` call the graph will be finalized and the other callbacks +can not modify the graph anymore. Second call of `begin()` on the same +graph, should not change the graph. - - - -### `tf.train.assert_global_step(global_step_tensor)` {#assert_global_step} +#### `tf.train.SessionRunHook.end(session)` {#SessionRunHook.end} -Asserts `global_step_tensor` is a scalar int `Variable` or `Tensor`. +Called at the end of session. + +The `session` argument can be used in case the hook wants to run final ops, +such as saving a last checkpoint. ##### Args: -* <b>`global_step_tensor`</b>: `Tensor` to test. +* <b>`session`</b>: A TensorFlow Session that will be soon closed. + - - - -### `tf.train.write_graph(graph_or_graph_def, logdir, name, as_text=True)` {#write_graph} +### `class tf.train.SessionRunArgs` {#SessionRunArgs} -Writes a graph proto to a file. +Represents arguments to be added to a `Session.run()` call. -The graph is written as a binary proto unless `as_text` is `True`. +Args: + fetches: Exactly like the 'fetches' argument to Session.Run(). + Can be a single tensor or op, a list of 'fetches' or a dictionary + of fetches. For example: + fetches = global_step_tensor + fetches = [train_op, summary_op, global_step_tensor] + fetches = {'step': global_step_tensor, 'summ': summary_op} + Note that this can recurse as expected: + fetches = {'step': global_step_tensor, + 'ops': [train_op, check_nan_op]} + feed_dict: Exactly like the `feed_dict` argument to `Session.Run()` + options: Exactly like the `options` argument to `Session.run()`, i.e., a + config_pb2.RunOptions proto. +- - - -```python -v = tf.Variable(0, name='my_variable') -sess = tf.Session() -tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt') -``` +#### `tf.train.SessionRunArgs.__getnewargs__()` {#SessionRunArgs.__getnewargs__} -or +Return self as a plain tuple. Used by copy and pickle. -```python -v = tf.Variable(0, name='my_variable') -sess = tf.Session() -tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt') -``` -##### Args: +- - - +#### `tf.train.SessionRunArgs.__getstate__()` {#SessionRunArgs.__getstate__} -* <b>`graph_or_graph_def`</b>: A `Graph` or a `GraphDef` protocol buffer. -* <b>`logdir`</b>: Directory where to write the graph. This can refer to remote - filesystems, such as Google Cloud Storage (GCS). -* <b>`name`</b>: Filename for the graph. -* <b>`as_text`</b>: If `True`, writes the graph as an ASCII proto. +Exclude the OrderedDict from pickling - - - -### `class tf.train.SessionRunHook` {#SessionRunHook} +#### `tf.train.SessionRunArgs.__new__(cls, fetches, feed_dict=None, options=None)` {#SessionRunArgs.__new__} + + + -Hook to extend calls to MonitoredSession.run(). - - - -#### `tf.train.SessionRunHook.after_create_session(session, coord)` {#SessionRunHook.after_create_session} +#### `tf.train.SessionRunArgs.__repr__()` {#SessionRunArgs.__repr__} -Called when new TensorFlow session is created. +Return a nicely formatted representation string -This is called to signal the hooks that a new session has been created. This -has two essential differences with the situation in which `begin` is called: -* When this is called, the graph is finalized and ops can no longer be added - to the graph. -* This method will also be called as a result of recovering a wrapped - session, not only at the beginning of the overall session. +- - - -##### Args: +#### `tf.train.SessionRunArgs.feed_dict` {#SessionRunArgs.feed_dict} +Alias for field number 1 -* <b>`session`</b>: A TensorFlow Session that has been created. -* <b>`coord`</b>: A Coordinator object which keeps track of all threads. + +- - - + +#### `tf.train.SessionRunArgs.fetches` {#SessionRunArgs.fetches} + +Alias for field number 0 - - - -#### `tf.train.SessionRunHook.after_run(run_context, run_values)` {#SessionRunHook.after_run} +#### `tf.train.SessionRunArgs.options` {#SessionRunArgs.options} -Called after each call to run(). +Alias for field number 2 -The `run_values` argument contains results of requested ops/tensors by -`before_run()`. -The `run_context` argument is the same one send to `before_run` call. -`run_context.request_stop()` can be called to stop the iteration. -##### Args: +- - - +### `class tf.train.SessionRunContext` {#SessionRunContext} -* <b>`run_context`</b>: A `SessionRunContext` object. -* <b>`run_values`</b>: A SessionRunValues object. +Provides information about the `session.run()` call being made. + +Provides information about original request to `Session.Run()` function. +SessionRunHook objects can stop the loop by calling `request_stop()` of +`run_context`. In the future we may use this object to add more information +about run without changing the Hook API. +- - - + +#### `tf.train.SessionRunContext.__init__(original_args, session)` {#SessionRunContext.__init__} + +Initializes SessionRunContext. - - - -#### `tf.train.SessionRunHook.before_run(run_context)` {#SessionRunHook.before_run} +#### `tf.train.SessionRunContext.original_args` {#SessionRunContext.original_args} -Called before each call to run(). +A `SessionRunArgs` object holding the original arguments of `run()`. -You can return from this call a `SessionRunArgs` object indicating ops or -tensors to add to the upcoming `run()` call. These ops/tensors will be run -together with the ops/tensors originally passed to the original run() call. -The run args you return can also contain feeds to be added to the run() -call. +If user called `MonitoredSession.run(fetches=a, feed_dict=b)`, then this +field is equal to SessionRunArgs(a, b). -The `run_context` argument is a `SessionRunContext` that provides -information about the upcoming `run()` call: the originally requested -op/tensors, the TensorFlow Session. +##### Returns: -At this point graph is finalized and you can not add ops. + A `SessionRunArgs` object -##### Args: +- - - + +#### `tf.train.SessionRunContext.request_stop()` {#SessionRunContext.request_stop} -* <b>`run_context`</b>: A `SessionRunContext` object. +Sets stop requested field. + +Hooks can use this function to request stop of iterations. +`MonitoredSession` checks whether this is called or not. + + +- - - + +#### `tf.train.SessionRunContext.session` {#SessionRunContext.session} + +A TensorFlow session object which will execute the `run`. + + +- - - + +#### `tf.train.SessionRunContext.stop_requested` {#SessionRunContext.stop_requested} + +Returns whether a stop is requested or not. + +If true, `MonitoredSession` stops iterations. ##### Returns: - None or a `SessionRunArgs` object. + A `bool` + - - - -#### `tf.train.SessionRunHook.begin()` {#SessionRunHook.begin} +### `class tf.train.SessionRunValues` {#SessionRunValues} -Called once before using the session. +Contains the results of `Session.run()`. -When called, the default graph is the one that will be launched in the -session. The hook can modify the graph by adding new operations to it. -After the `begin()` call the graph will be finalized and the other callbacks -can not modify the graph anymore. Second call of `begin()` on the same -graph, should not change the graph. +In the future we may use this object to add more information about result of +run without changing the Hook API. + +Args: + results: The return values from `Session.run()` corresponding to the fetches + attribute returned in the RunArgs. Note that this has the same shape as + the RunArgs fetches. For example: + fetches = global_step_tensor + => results = nparray(int) + fetches = [train_op, summary_op, global_step_tensor] + => results = [None, nparray(string), nparray(int)] + fetches = {'step': global_step_tensor, 'summ': summary_op} + => results = {'step': nparray(int), 'summ': nparray(string)} + options: `RunOptions` from the `Session.run()` call. + run_metadata: `RunMetadata` from the `Session.run()` call. +- - - + +#### `tf.train.SessionRunValues.__getnewargs__()` {#SessionRunValues.__getnewargs__} + +Return self as a plain tuple. Used by copy and pickle. - - - -#### `tf.train.SessionRunHook.end(session)` {#SessionRunHook.end} +#### `tf.train.SessionRunValues.__getstate__()` {#SessionRunValues.__getstate__} -Called at the end of session. +Exclude the OrderedDict from pickling -The `session` argument can be used in case the hook wants to run final ops, -such as saving a last checkpoint. -##### Args: +- - - +#### `tf.train.SessionRunValues.__new__(_cls, results, options, run_metadata)` {#SessionRunValues.__new__} + +Create new instance of SessionRunValues(results, options, run_metadata) + + +- - - + +#### `tf.train.SessionRunValues.__repr__()` {#SessionRunValues.__repr__} + +Return a nicely formatted representation string + + +- - - + +#### `tf.train.SessionRunValues.options` {#SessionRunValues.options} + +Alias for field number 1 + + +- - - + +#### `tf.train.SessionRunValues.results` {#SessionRunValues.results} + +Alias for field number 0 + + +- - - + +#### `tf.train.SessionRunValues.run_metadata` {#SessionRunValues.run_metadata} + +Alias for field number 2 -* <b>`session`</b>: A TensorFlow Session that will be soon closed. @@ -4349,7 +4661,7 @@ Prints the given tensors once every N local steps or once every N seconds. The tensors will be printed to the log, with `INFO` severity. - - - -#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None)` {#LoggingTensorHook.__init__} +#### `tf.train.LoggingTensorHook.__init__(tensors, every_n_iter=None, every_n_secs=None, formatter=None)` {#LoggingTensorHook.__init__} Initializes a LoggingHook monitor. @@ -4363,6 +4675,8 @@ Initializes a LoggingHook monitor. * <b>`every_n_secs`</b>: `int` or `float`, print the values of `tensors` once every N seconds. Exactly one of `every_n_iter` and `every_n_secs` should be provided. +* <b>`formatter`</b>: function, takes dict of `tag`->`Tensor` and returns a string. + If `None` uses default printing all tensors. ##### Raises: @@ -4953,431 +5267,343 @@ such as saving a last checkpoint. - - - -### `class tf.train.SessionRunArgs` {#SessionRunArgs} - -Represents arguments to be added to a `Session.run()` call. +### `class tf.train.FinalOpsHook` {#FinalOpsHook} -Args: - fetches: Exactly like the 'fetches' argument to Session.Run(). - Can be a single tensor or op, a list of 'fetches' or a dictionary - of fetches. For example: - fetches = global_step_tensor - fetches = [train_op, summary_op, global_step_tensor] - fetches = {'step': global_step_tensor, 'summ': summary_op} - Note that this can recurse as expected: - fetches = {'step': global_step_tensor, - 'ops': [train_op, check_nan_op]} - feed_dict: Exactly like the `feed_dict` argument to `Session.Run()` - options: Exactly like the `options` argument to `Session.run()`, i.e., a - config_pb2.RunOptions proto. +A run hook which evaluates `Tensors` at the end of a session. - - - -#### `tf.train.SessionRunArgs.__getnewargs__()` {#SessionRunArgs.__getnewargs__} +#### `tf.train.FinalOpsHook.__init__(final_ops, final_ops_feed_dict=None)` {#FinalOpsHook.__init__} -Return self as a plain tuple. Used by copy and pickle. +Constructs the FinalOpHook with ops to run at the end of the session. +##### Args: -- - - -#### `tf.train.SessionRunArgs.__getstate__()` {#SessionRunArgs.__getstate__} - -Exclude the OrderedDict from pickling +* <b>`final_ops`</b>: A single `Tensor`, a list of `Tensors` or a dictionary of + names to `Tensors`. +* <b>`final_ops_feed_dict`</b>: A feed dictionary to use when running + `final_ops_dict`. - - - -#### `tf.train.SessionRunArgs.__new__(cls, fetches, feed_dict=None, options=None)` {#SessionRunArgs.__new__} - +#### `tf.train.FinalOpsHook.after_create_session(session, coord)` {#FinalOpsHook.after_create_session} +Called when new TensorFlow session is created. +This is called to signal the hooks that a new session has been created. This +has two essential differences with the situation in which `begin` is called: -- - - - -#### `tf.train.SessionRunArgs.__repr__()` {#SessionRunArgs.__repr__} - -Return a nicely formatted representation string - +* When this is called, the graph is finalized and ops can no longer be added + to the graph. +* This method will also be called as a result of recovering a wrapped + session, not only at the beginning of the overall session. -- - - +##### Args: -#### `tf.train.SessionRunArgs.feed_dict` {#SessionRunArgs.feed_dict} -Alias for field number 1 +* <b>`session`</b>: A TensorFlow Session that has been created. +* <b>`coord`</b>: A Coordinator object which keeps track of all threads. - - - -#### `tf.train.SessionRunArgs.fetches` {#SessionRunArgs.fetches} +#### `tf.train.FinalOpsHook.after_run(run_context, run_values)` {#FinalOpsHook.after_run} -Alias for field number 0 +Called after each call to run(). +The `run_values` argument contains results of requested ops/tensors by +`before_run()`. -- - - +The `run_context` argument is the same one send to `before_run` call. +`run_context.request_stop()` can be called to stop the iteration. -#### `tf.train.SessionRunArgs.options` {#SessionRunArgs.options} +##### Args: -Alias for field number 2 +* <b>`run_context`</b>: A `SessionRunContext` object. +* <b>`run_values`</b>: A SessionRunValues object. - - - -### `class tf.train.SessionRunContext` {#SessionRunContext} - -Provides information about the `session.run()` call being made. - -Provides information about original request to `Session.Run()` function. -SessionRunHook objects can stop the loop by calling `request_stop()` of -`run_context`. In the future we may use this object to add more information -about run without changing the Hook API. -- - - +#### `tf.train.FinalOpsHook.before_run(run_context)` {#FinalOpsHook.before_run} -#### `tf.train.SessionRunContext.__init__(original_args, session)` {#SessionRunContext.__init__} +Called before each call to run(). -Initializes SessionRunContext. +You can return from this call a `SessionRunArgs` object indicating ops or +tensors to add to the upcoming `run()` call. These ops/tensors will be run +together with the ops/tensors originally passed to the original run() call. +The run args you return can also contain feeds to be added to the run() +call. +The `run_context` argument is a `SessionRunContext` that provides +information about the upcoming `run()` call: the originally requested +op/tensors, the TensorFlow Session. -- - - +At this point graph is finalized and you can not add ops. -#### `tf.train.SessionRunContext.original_args` {#SessionRunContext.original_args} +##### Args: -A `SessionRunArgs` object holding the original arguments of `run()`. -If user called `MonitoredSession.run(fetches=a, feed_dict=b)`, then this -field is equal to SessionRunArgs(a, b). +* <b>`run_context`</b>: A `SessionRunContext` object. ##### Returns: - A `SessionRunArgs` object + None or a `SessionRunArgs` object. - - - -#### `tf.train.SessionRunContext.request_stop()` {#SessionRunContext.request_stop} - -Sets stop requested field. - -Hooks can use this function to request stop of iterations. -`MonitoredSession` checks whether this is called or not. - - -- - - +#### `tf.train.FinalOpsHook.begin()` {#FinalOpsHook.begin} -#### `tf.train.SessionRunContext.session` {#SessionRunContext.session} +Called once before using the session. -A TensorFlow session object which will execute the `run`. +When called, the default graph is the one that will be launched in the +session. The hook can modify the graph by adding new operations to it. +After the `begin()` call the graph will be finalized and the other callbacks +can not modify the graph anymore. Second call of `begin()` on the same +graph, should not change the graph. - - - -#### `tf.train.SessionRunContext.stop_requested` {#SessionRunContext.stop_requested} +#### `tf.train.FinalOpsHook.end(session)` {#FinalOpsHook.end} -Returns whether a stop is requested or not. - -If true, `MonitoredSession` stops iterations. - -##### Returns: - - A `bool` - - - -### `class tf.train.SessionRunValues` {#SessionRunValues} +#### `tf.train.FinalOpsHook.final_ops_values` {#FinalOpsHook.final_ops_values} -Contains the results of `Session.run()`. -In the future we may use this object to add more information about result of -run without changing the Hook API. -Args: - results: The return values from `Session.run()` corresponding to the fetches - attribute returned in the RunArgs. Note that this has the same shape as - the RunArgs fetches. For example: - fetches = global_step_tensor - => results = nparray(int) - fetches = [train_op, summary_op, global_step_tensor] - => results = [None, nparray(string), nparray(int)] - fetches = {'step': global_step_tensor, 'summ': summary_op} - => results = {'step': nparray(int), 'summ': nparray(string)} - options: `RunOptions` from the `Session.run()` call. - run_metadata: `RunMetadata` from the `Session.run()` call. -- - - - -#### `tf.train.SessionRunValues.__getnewargs__()` {#SessionRunValues.__getnewargs__} - -Return self as a plain tuple. Used by copy and pickle. - - -- - - - -#### `tf.train.SessionRunValues.__getstate__()` {#SessionRunValues.__getstate__} - -Exclude the OrderedDict from pickling - - - -#### `tf.train.SessionRunValues.__new__(_cls, results, options, run_metadata)` {#SessionRunValues.__new__} - -Create new instance of SessionRunValues(results, options, run_metadata) - +### `class tf.train.FeedFnHook` {#FeedFnHook} +Runs `feed_fn` and sets the `feed_dict` accordingly. - - - -#### `tf.train.SessionRunValues.__repr__()` {#SessionRunValues.__repr__} - -Return a nicely formatted representation string +#### `tf.train.FeedFnHook.__init__(feed_fn)` {#FeedFnHook.__init__} +Constructs the FeedFnHook with given `feed_fn`. -- - - +##### Args: -#### `tf.train.SessionRunValues.options` {#SessionRunValues.options} -Alias for field number 1 +* <b>`feed_fn`</b>: function, no arguments and returns `dict` to feed. - - - -#### `tf.train.SessionRunValues.results` {#SessionRunValues.results} +#### `tf.train.FeedFnHook.after_create_session(session, coord)` {#FeedFnHook.after_create_session} -Alias for field number 0 +Called when new TensorFlow session is created. +This is called to signal the hooks that a new session has been created. This +has two essential differences with the situation in which `begin` is called: -- - - +* When this is called, the graph is finalized and ops can no longer be added + to the graph. +* This method will also be called as a result of recovering a wrapped + session, not only at the beginning of the overall session. -#### `tf.train.SessionRunValues.run_metadata` {#SessionRunValues.run_metadata} +##### Args: -Alias for field number 2 +* <b>`session`</b>: A TensorFlow Session that has been created. +* <b>`coord`</b>: A Coordinator object which keeps track of all threads. - - - -### `class tf.train.LooperThread` {#LooperThread} - -A thread that runs code repeatedly, optionally on a timer. +#### `tf.train.FeedFnHook.after_run(run_context, run_values)` {#FeedFnHook.after_run} -This thread class is intended to be used with a `Coordinator`. It repeatedly -runs code specified either as `target` and `args` or by the `run_loop()` -method. - -Before each run the thread checks if the coordinator has requested stop. In -that case the looper thread terminates immediately. - -If the code being run raises an exception, that exception is reported to the -coordinator and the thread terminates. The coordinator will then request all -the other threads it coordinates to stop. - -You typically pass looper threads to the supervisor `Join()` method. -- - - +Called after each call to run(). -#### `tf.train.LooperThread.__init__(coord, timer_interval_secs, target=None, args=None, kwargs=None)` {#LooperThread.__init__} +The `run_values` argument contains results of requested ops/tensors by +`before_run()`. -Create a LooperThread. +The `run_context` argument is the same one send to `before_run` call. +`run_context.request_stop()` can be called to stop the iteration. ##### Args: -* <b>`coord`</b>: A Coordinator. -* <b>`timer_interval_secs`</b>: Time boundaries at which to call Run(), or None - if it should be called back to back. -* <b>`target`</b>: Optional callable object that will be executed in the thread. -* <b>`args`</b>: Optional arguments to pass to `target` when calling it. -* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it. - -##### Raises: - - -* <b>`ValueError`</b>: If one of the arguments is invalid. +* <b>`run_context`</b>: A `SessionRunContext` object. +* <b>`run_values`</b>: A SessionRunValues object. - - - -#### `tf.train.LooperThread.__repr__()` {#LooperThread.__repr__} +#### `tf.train.FeedFnHook.before_run(run_context)` {#FeedFnHook.before_run} - - - -#### `tf.train.LooperThread.daemon` {#LooperThread.daemon} - -A boolean value indicating whether this thread is a daemon thread (True) or not (False). - -This must be set before start() is called, otherwise RuntimeError is -raised. Its initial value is inherited from the creating thread; the -main thread is not a daemon thread and therefore all threads created in -the main thread default to daemon = False. - -The entire Python program exits when no alive non-daemon threads are -left. - - -- - - - -#### `tf.train.LooperThread.getName()` {#LooperThread.getName} +#### `tf.train.FeedFnHook.begin()` {#FeedFnHook.begin} +Called once before using the session. +When called, the default graph is the one that will be launched in the +session. The hook can modify the graph by adding new operations to it. +After the `begin()` call the graph will be finalized and the other callbacks +can not modify the graph anymore. Second call of `begin()` on the same +graph, should not change the graph. - - - -#### `tf.train.LooperThread.ident` {#LooperThread.ident} +#### `tf.train.FeedFnHook.end(session)` {#FeedFnHook.end} -Thread identifier of this thread or None if it has not been started. - -This is a nonzero integer. See the thread.get_ident() function. Thread -identifiers may be recycled when a thread exits and another thread is -created. The identifier is available even after the thread has exited. - - -- - - - -#### `tf.train.LooperThread.isAlive()` {#LooperThread.isAlive} +Called at the end of session. -Return whether the thread is alive. +The `session` argument can be used in case the hook wants to run final ops, +such as saving a last checkpoint. -This method returns True just before the run() method starts until just -after the run() method terminates. The module function enumerate() -returns a list of all alive threads. +##### Args: -- - - +* <b>`session`</b>: A TensorFlow Session that will be soon closed. -#### `tf.train.LooperThread.isDaemon()` {#LooperThread.isDaemon} +## Training Utilities - - - -#### `tf.train.LooperThread.is_alive()` {#LooperThread.is_alive} - -Return whether the thread is alive. - -This method returns True just before the run() method starts until just -after the run() method terminates. The module function enumerate() -returns a list of all alive threads. - +### `tf.train.global_step(sess, global_step_tensor)` {#global_step} -- - - +Small helper to get the global step. -#### `tf.train.LooperThread.join(timeout=None)` {#LooperThread.join} +```python +# Creates a variable to hold the global_step. +global_step_tensor = tf.Variable(10, trainable=False, name='global_step') +# Creates a session. +sess = tf.Session() +# Initializes the variable. +print('global_step: %s' % tf.train.global_step(sess, global_step_tensor)) -Wait until the thread terminates. +global_step: 10 +``` -This blocks the calling thread until the thread whose join() method is -called terminates -- either normally or through an unhandled exception -or until the optional timeout occurs. +##### Args: -When the timeout argument is present and not None, it should be a -floating point number specifying a timeout for the operation in seconds -(or fractions thereof). As join() always returns None, you must call -isAlive() after join() to decide whether a timeout happened -- if the -thread is still alive, the join() call timed out. -When the timeout argument is not present or None, the operation will -block until the thread terminates. +* <b>`sess`</b>: A TensorFlow `Session` object. +* <b>`global_step_tensor`</b>: `Tensor` or the `name` of the operation that contains + the global step. -A thread can be join()ed many times. +##### Returns: -join() raises a RuntimeError if an attempt is made to join the current -thread as that would cause a deadlock. It is also an error to join() a -thread before it has been started and attempts to do so raises the same -exception. + The global step value. - - - -#### `tf.train.LooperThread.loop(coord, timer_interval_secs, target, args=None, kwargs=None)` {#LooperThread.loop} +### `tf.train.basic_train_loop(supervisor, train_step_fn, args=None, kwargs=None, master='')` {#basic_train_loop} -Start a LooperThread that calls a function periodically. +Basic loop to train a model. -If `timer_interval_secs` is None the thread calls `target(args)` -repeatedly. Otherwise `target(args)` is called every `timer_interval_secs` -seconds. The thread terminates when a stop of the coordinator is -requested. +Calls `train_step_fn` in a loop to train a model. The function is called as: -##### Args: +```python +train_step_fn(session, *args, **kwargs) +``` +It is passed a `tf.Session` in addition to `args` and `kwargs`. The function +typically runs one training step in the session. -* <b>`coord`</b>: A Coordinator. -* <b>`timer_interval_secs`</b>: Number. Time boundaries at which to call `target`. -* <b>`target`</b>: A callable object. -* <b>`args`</b>: Optional arguments to pass to `target` when calling it. -* <b>`kwargs`</b>: Optional keyword arguments to pass to `target` when calling it. +##### Args: -##### Returns: - The started thread. +* <b>`supervisor`</b>: `tf.Supervisor` to run the training services. +* <b>`train_step_fn`</b>: Callable to execute one training step. Called + repeatedly as `train_step_fn(session, *args **kwargs)`. +* <b>`args`</b>: Optional positional arguments passed to `train_step_fn`. +* <b>`kwargs`</b>: Optional keyword arguments passed to `train_step_fn`. +* <b>`master`</b>: Master to use to create the training session. Defaults to + `""` which causes the session to be created in the local process. - - - -#### `tf.train.LooperThread.name` {#LooperThread.name} - -A string used for identification purposes only. +### `tf.train.get_global_step(graph=None)` {#get_global_step} -It has no semantics. Multiple threads may be given the same name. The -initial name is set by the constructor. +Get the global step tensor. +The global step tensor must be an integer variable. We first try to find it +in the collection `GLOBAL_STEP`, or by name `global_step:0`. -- - - +##### Args: -#### `tf.train.LooperThread.run()` {#LooperThread.run} +* <b>`graph`</b>: The graph to find the global step in. If missing, use default graph. +##### Returns: + The global step variable, or `None` if none was found. -- - - +##### Raises: -#### `tf.train.LooperThread.run_loop()` {#LooperThread.run_loop} -Called at 'timer_interval_secs' boundaries. +* <b>`TypeError`</b>: If the global step tensor has a non-integer type, or if it is not + a `Variable`. - - - -#### `tf.train.LooperThread.setDaemon(daemonic)` {#LooperThread.setDaemon} - - - +### `tf.train.assert_global_step(global_step_tensor)` {#assert_global_step} -- - - +Asserts `global_step_tensor` is a scalar int `Variable` or `Tensor`. -#### `tf.train.LooperThread.setName(name)` {#LooperThread.setName} +##### Args: +* <b>`global_step_tensor`</b>: `Tensor` to test. - - - -#### `tf.train.LooperThread.start()` {#LooperThread.start} - -Start the thread's activity. - -It must be called at most once per thread object. It arranges for the -object's run() method to be invoked in a separate thread of control. +### `tf.train.write_graph(graph_or_graph_def, logdir, name, as_text=True)` {#write_graph} -This method will raise a RuntimeError if called more than once on the -same thread object. +Writes a graph proto to a file. +The graph is written as a binary proto unless `as_text` is `True`. -- - - +```python +v = tf.Variable(0, name='my_variable') +sess = tf.Session() +tf.train.write_graph(sess.graph_def, '/tmp/my-model', 'train.pbtxt') +``` -#### `tf.train.LooperThread.start_loop()` {#LooperThread.start_loop} +or -Called when the thread starts. +```python +v = tf.Variable(0, name='my_variable') +sess = tf.Session() +tf.train.write_graph(sess.graph, '/tmp/my-model', 'train.pbtxt') +``` +##### Args: -- - - -#### `tf.train.LooperThread.stop_loop()` {#LooperThread.stop_loop} +* <b>`graph_or_graph_def`</b>: A `Graph` or a `GraphDef` protocol buffer. +* <b>`logdir`</b>: Directory where to write the graph. This can refer to remote + filesystems, such as Google Cloud Storage (GCS). +* <b>`name`</b>: Filename for the graph. +* <b>`as_text`</b>: If `True`, writes the graph as an ASCII proto. -Called when the thread stops. +##### Returns: + The path of the output proto file. diff --git a/tensorflow/g3doc/tutorials/tflearn/index.md b/tensorflow/g3doc/tutorials/tflearn/index.md index b6e26ee351..9f6485e30b 100644 --- a/tensorflow/g3doc/tutorials/tflearn/index.md +++ b/tensorflow/g3doc/tutorials/tflearn/index.md @@ -202,8 +202,8 @@ The code above first defines the model's feature columns, which specify the data type for the features in the data set. All the feature data is continuous, so `tf.contrib.layers.real_valued_column` is the appropriate function to use to construct the feature columns. There are four features in the data set (sepal -width, sepal height, petal width, and petal height), so `dimensions` must be set -accordingly to `4` to hold all the data. +width, sepal height, petal width, and petal height), so accordingly `dimension` +must be set to `4` to hold all the data. Then, the code creates a `DNNClassifier` model using the following arguments: diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go index 75c111e957..d9ebec0f8c 100644 --- a/tensorflow/go/genop/internal/genop.go +++ b/tensorflow/go/genop/internal/genop.go @@ -395,7 +395,7 @@ func goType(tfType string) (string, error) { case "type": gotype = "tf.DataType" case "shape": - gotype = "[]int64" + gotype = "tf.Shape" case "tensor": gotype = "tf.Tensor" case "string": diff --git a/tensorflow/go/graph.go b/tensorflow/go/graph.go index 2eb1194610..c0f91ffb30 100644 --- a/tensorflow/go/graph.go +++ b/tensorflow/go/graph.go @@ -259,13 +259,38 @@ func setAttr(cdesc *C.TF_OperationDescription, status *status, name string, valu if err := status.Err(); err != nil { return fmt.Errorf("bad value for attribute %q: %v", name, err) } + case Shape: + ndims, dims := cshape(value) + var dimsp *C.int64_t + if ndims > 0 { + dimsp = &dims[0] + } + C.TF_SetAttrShape(cdesc, cAttrName, dimsp, ndims) + case []Shape: + ndims := make([]C.int, len(value)) + dims := make([][]C.int64_t, len(value)) + dimsp := make([]*C.int64_t, len(value)) + for i, s := range value { + ndims[i], dims[i] = cshape(s) + if ndims[i] > 0 { + dimsp[i] = &dims[i][0] + } + } + C.TF_SetAttrShapeList(cdesc, cAttrName, &dimsp[0], &ndims[0], C.int(len(value))) default: - // Shapes can be done, but will require that it be - // distinguishable from []int64. Which is fine, it - // probably makes sense to define a Shape type anyway, - // since that should handle partially known shapes as - // well and hide the special meaning of -1? return fmt.Errorf("attribute %q has a type (%T) which is not valid for operation attributes", name, value) } return nil } + +func cshape(s Shape) (C.int, []C.int64_t) { + ndims := C.int(s.NumDimensions()) + if ndims < 0 { + return -1, nil + } + dims := make([]C.int64_t, ndims) + for i, s := range s.dims { + dims[i] = C.int64_t(s) + } + return ndims, dims +} diff --git a/tensorflow/go/op/op_test.go b/tensorflow/go/op/op_test.go new file mode 100644 index 0000000000..eaa27bfcd0 --- /dev/null +++ b/tensorflow/go/op/op_test.go @@ -0,0 +1,33 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Tests for the generated code of some operations. + +package op + +import ( + "testing" + + tf "github.com/tensorflow/tensorflow/tensorflow/go" +) + +func TestPlaceholder(t *testing.T) { + s := NewScope() + Placeholder(s.SubScope("x"), tf.Float, PlaceholderShape(tf.MakeShape(-1, 10))) + Placeholder(s.SubScope("y"), tf.Float, PlaceholderShape(tf.ScalarShape())) + Placeholder(s.SubScope("z"), tf.Float, PlaceholderShape(tf.Shape{})) + if _, err := s.Finalize(); err != nil { + t.Fatal(err) + } +} diff --git a/tensorflow/go/operation_test.go b/tensorflow/go/operation_test.go index 8080515ee9..4c4c960448 100644 --- a/tensorflow/go/operation_test.go +++ b/tensorflow/go/operation_test.go @@ -81,6 +81,21 @@ func TestOperationOutputListSize(t *testing.T) { } } +func TestOperationShapeAttribute(t *testing.T) { + g := NewGraph() + _, err := g.AddOperation(OpSpec{ + Type: "Placeholder", + Attrs: map[string]interface{}{ + "dtype": Float, + "shape": MakeShape(-1, 3), + }, + }) + if err != nil { + t.Fatal(err) + } + // If and when the API to get attributes is added, check that here. +} + func TestOutputShape(t *testing.T) { graph := NewGraph() testdata := []struct { diff --git a/tensorflow/go/shape.go b/tensorflow/go/shape.go new file mode 100644 index 0000000000..c48bbf29a3 --- /dev/null +++ b/tensorflow/go/shape.go @@ -0,0 +1,102 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tensorflow + +import ( + "fmt" + "strings" +) + +// Shape represents the (possibly partially known) shape of a tensor that will +// be produced by an operation. +// +// The zero-value of a Shape represents a shape with an unknown number of +// dimensions. +type Shape struct { + dims []int64 +} + +// ScalarShape returns a Shape representing a scalar. +func ScalarShape() Shape { + return Shape{dims: make([]int64, 0)} +} + +// MakeShape returns a Shape with the provided size of each dimension. +// +// A value of -1 implies that the size of the corresponding dimension is not +// known. +func MakeShape(shape ...int64) Shape { + cpy := make([]int64, len(shape)) + copy(cpy, shape) + return Shape{dims: cpy} +} + +// NumDimensions returns the number of dimensions represented by s, or -1 if +// unknown. +func (s Shape) NumDimensions() int { + if s.dims == nil { + return -1 + } + return len(s.dims) +} + +// Size returns the size of the dim-th dimension of the shape, or -1 if it +// is unknown. +// +// REQUIRES: 0 <= dim < s.NumDimensions() +func (s Shape) Size(dim int) int64 { + if dim < 0 || dim > s.NumDimensions() { + return -1 + } + return s.dims[dim] +} + +// IsFullySpecified returns true iff the size of all the dimensions of s are +// known. +func (s Shape) IsFullySpecified() bool { + if s.dims == nil { + return false + } + for _, size := range s.dims { + if size <= 1 { + return false + } + } + return true +} + +// ToSlice returns the (possibly partially known) shape represented by s as a +// slice, or an error if the number of dimensions is not known. +func (s Shape) ToSlice() ([]int64, error) { + if s.dims == nil { + return nil, fmt.Errorf("cannot create a slice for a Shape with an unknown number of dimensions") + } + cpy := make([]int64, len(s.dims)) + copy(cpy, s.dims) + return cpy, nil +} + +func (s Shape) String() string { + if s.dims == nil { + return "?" + } + ret := fmt.Sprint(s.dims) + for _, size := range s.dims { + if size < 0 { + ret = strings.Replace(ret, fmt.Sprint(size), "?", 1) + } + } + return strings.Replace(ret, " ", ", ", -1) +} diff --git a/tensorflow/go/shape_test.go b/tensorflow/go/shape_test.go new file mode 100644 index 0000000000..f8f3d4e94b --- /dev/null +++ b/tensorflow/go/shape_test.go @@ -0,0 +1,83 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tensorflow + +import ( + "fmt" + "reflect" + "testing" +) + +func TestShape(t *testing.T) { + tests := []struct { + shape Shape + slice []int64 + full bool + str string + }{ + { + shape: ScalarShape(), + slice: make([]int64, 0), + full: true, + str: "[]", + }, + { + shape: MakeShape(-1, 2, -1, 4), + slice: []int64{-1, 2, -1, 4}, + full: false, + str: "[?, 2, ?, 4]", + }, + { + shape: MakeShape(2, 3), + slice: []int64{2, 3}, + full: true, + str: "[2, 3]", + }, + } + for _, test := range tests { + t.Run(fmt.Sprintf("%#v", test.shape), func(t *testing.T) { + if got, want := test.shape.NumDimensions(), len(test.slice); got != want { + t.Errorf("Got %v, want %v", got, want) + } + if gotSlice, err := test.shape.ToSlice(); err != nil || !reflect.DeepEqual(gotSlice, test.slice) { + t.Errorf("Got (%#v, %v), want (%#v, nil)", gotSlice, err, test.slice) + } + if got, want := test.shape.IsFullySpecified(), test.full; got != want { + t.Errorf("Got %v, want %v", got, want) + } + if got, want := test.shape.String(), test.str; got != want { + t.Errorf("Got %v, want %v", got, want) + } + }) + } + +} + +func TestZeroShape(t *testing.T) { + var s Shape + if s.NumDimensions() != -1 { + t.Error(s.NumDimensions()) + } + if _, err := s.ToSlice(); err == nil { + t.Error("ToSlice() on a Shape of unknown number of dimensions should fail") + } + if s.IsFullySpecified() { + t.Error("Shape of unknown number of dimensions should not be fully specified") + } + if got, want := s.String(), "?"; got != want { + t.Errorf("Got %q, want %q", got, want) + } + +} diff --git a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java index b13f830631..cb3de5f744 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/OperationBuilderTest.java @@ -17,6 +17,7 @@ package org.tensorflow; import static org.junit.Assert.fail; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -26,7 +27,8 @@ import org.junit.runners.JUnit4; public class OperationBuilderTest { // TODO(ashankar): Restore this test once the C API gracefully handles mixing graphs and // operations instead of segfaulting. - // @Test + @Test + @Ignore public void failWhenMixingOperationsOnDifferentGraphs() { try (Graph g1 = new Graph(); Graph g2 = new Graph()) { diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index c1a8191def..248d4c9b81 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -1308,7 +1308,12 @@ class InteractiveSession(BaseSession): config: (Optional) `ConfigProto` proto used to configure the session. """ if not config: - config = config_pb2.ConfigProto() + # If config is not provided, choose some reasonable defaults for + # interactive use: + # + # - Grow GPU memory as needed at the cost of fragmentation. + gpu_options = config_pb2.GPUOptions(allow_growth=True) + config = config_pb2.ConfigProto(gpu_options=gpu_options) # Interactive sessions always place pruned graphs. config.graph_options.place_pruned_graph = True diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 0aa5ce0a60..9ad8a1121f 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -592,6 +592,18 @@ py_test( ], ) +sh_test( + name = "examples_test", + size = "small", + srcs = ["examples/examples_test.sh"], + data = [ + ":debug_errors", + ":debug_fibonacci", + ":debug_mnist", + ":debug_tflearn_iris", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/python/debug/examples/debug_fibonacci.py b/tensorflow/python/debug/examples/debug_fibonacci.py index 14722ecd08..6fdc78b605 100644 --- a/tensorflow/python/debug/examples/debug_fibonacci.py +++ b/tensorflow/python/debug/examples/debug_fibonacci.py @@ -45,7 +45,7 @@ def main(_): sess.run(tf.global_variables_initializer()) # Wrap the TensorFlow Session object for debugging. - sess = tf_debug.LocalCLIDebugWrapperSession(sess) + sess = tf_debug.LocalCLIDebugWrapperSession(sess, ui_type=FLAGS.ui_type) sess.run(n1) @@ -66,5 +66,10 @@ if __name__ == "__main__": type=int, default=20, help="Length of the fibonacci sequence to compute.") + parser.add_argument( + "--ui_type", + type=str, + default="curses", + help="Command-line user interface type (curses | readline)") FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/python/debug/examples/debug_mnist.py b/tensorflow/python/debug/examples/debug_mnist.py index d8195a6847..73d398c086 100644 --- a/tensorflow/python/debug/examples/debug_mnist.py +++ b/tensorflow/python/debug/examples/debug_mnist.py @@ -41,11 +41,14 @@ RAND_SEED = 42 def main(_): # Import data - mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) + mnist = input_data.read_data_sets(FLAGS.data_dir, + one_hot=True, + fake_data=FLAGS.fake_data) def feed_dict(train): - if train: - xs, ys = mnist.train.next_batch(FLAGS.train_batch_size, fake_data=False) + if train or FLAGS.fake_data: + xs, ys = mnist.train.next_batch(FLAGS.train_batch_size, + fake_data=FLAGS.fake_data) else: xs, ys = mnist.test.images, mnist.test.labels @@ -157,6 +160,13 @@ if __name__ == "__main__": default="curses", help="Command-line user interface type (curses | readline)") parser.add_argument( + "--fake_data", + type="bool", + nargs="?", + const=True, + default=False, + help="Use fake MNIST data for unit testing") + parser.add_argument( "--debug", type="bool", nargs="?", diff --git a/tensorflow/python/debug/examples/debug_tflearn_iris.py b/tensorflow/python/debug/examples/debug_tflearn_iris.py index 009885b9ea..57ebba689d 100644 --- a/tensorflow/python/debug/examples/debug_tflearn_iris.py +++ b/tensorflow/python/debug/examples/debug_tflearn_iris.py @@ -80,15 +80,22 @@ def iris_input_fn(): def main(_): - training_data_path, test_data_path = maybe_download_data(FLAGS.data_dir) - # Load datasets. - training_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=training_data_path, - target_dtype=np.int, - features_dtype=np.float32) - test_set = tf.contrib.learn.datasets.base.load_csv_with_header( - filename=test_data_path, target_dtype=np.int, features_dtype=np.float32) + if FLAGS.fake_data: + training_set = tf.contrib.learn.datasets.base.Dataset( + np.random.random([120, 4]), + np.random.random_integers(3, size=[120]) - 1) + test_set = tf.contrib.learn.datasets.base.Dataset( + np.random.random([30, 4]), + np.random.random_integers(3, size=[30]) - 1) + else: + training_data_path, test_data_path = maybe_download_data(FLAGS.data_dir) + training_set = tf.contrib.learn.datasets.base.load_csv_with_header( + filename=training_data_path, + target_dtype=np.int, + features_dtype=np.float32) + test_set = tf.contrib.learn.datasets.base.load_csv_with_header( + filename=test_data_path, target_dtype=np.int, features_dtype=np.float32) # Specify that all features have real-value data feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] @@ -102,8 +109,11 @@ def main(_): n_classes=3, model_dir=model_dir) - hooks = ([tf_debug.LocalCLIDebugHook(ui_type=FLAGS.ui_type)] if FLAGS.debug - else None) + hooks = None + if FLAGS.debug: + debug_hook = tf_debug.LocalCLIDebugHook(ui_type=FLAGS.ui_type) + debug_hook.add_tensor_filter("has_inf_or_nan", tf_debug.has_inf_or_nan) + hooks = [debug_hook] if not FLAGS.use_experiment: # Fit model. @@ -163,6 +173,13 @@ if __name__ == "__main__": default="curses", help="Command-line user interface type (curses | readline)") parser.add_argument( + "--fake_data", + type="bool", + nargs="?", + const=True, + default=False, + help="Use fake MNIST data for unit testing") + parser.add_argument( "--debug", type="bool", nargs="?", diff --git a/tensorflow/python/debug/examples/examples_test.sh b/tensorflow/python/debug/examples/examples_test.sh new file mode 100755 index 0000000000..397078b91d --- /dev/null +++ b/tensorflow/python/debug/examples/examples_test.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Bash unit tests for TensorFlow Debugger (tfdbg) Python examples that do not +# involve downloading data. + +set -e + + +DEBUG_FIBONACCI_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_fibonacci" + +# Override the default ui_type=curses to allow the test to pass in a tty-less +# test environment. +cat << EOF | "${DEBUG_FIBONACCI_BIN}" --ui_type=readline +run +exit +EOF + + +DEBUG_ERRORS_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_errors" + +cat << EOF | "${DEBUG_ERRORS_BIN}" --error=no_error --ui_type=readline +run +exit +EOF + + +DEBUG_MNIST_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_mnist" + +# Use a large enough "run -t" number to let the process end properly. +cat << EOF | "${DEBUG_MNIST_BIN}" --debug --fake_data --ui_type=readline +run -f has_inf_or_nan +run -t 1000 +EOF + + +DEBUG_TFLEARN_IRIS_BIN="$TEST_SRCDIR/org_tensorflow/tensorflow/python/debug/debug_tflearn_iris" + +cat << EOF | "${DEBUG_TFLEARN_IRIS_BIN}" --debug --fake_data --train_steps=2 --ui_type=readline +run -f has_inf_or_nan +EOF diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py index cda2becc6e..30f0e117e6 100644 --- a/tensorflow/python/debug/wrappers/hooks.py +++ b/tensorflow/python/debug/wrappers/hooks.py @@ -44,6 +44,28 @@ class LocalCLIDebugHook(session_run_hook.SessionRunHook, self._ui_type = ui_type self._wrapper_initialized = False + self._pending_tensor_filters = {} + + def add_tensor_filter(self, filter_name, tensor_filter): + """Add a tensor filter. + + See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` for details. + Override default behavior to accomodate the possibility of this method being + called prior to the initialization of the underlying + `LocalCLIDebugWrapperSession` object. + + Args: + filter_name: See doc of `LocalCLIDebugWrapperSession.add_tensor_filter()` + for details. + tensor_filter: See doc of + `LocalCLIDebugWrapperSession.add_tensor_filter()` for details. + """ + + if self._wrapper_initialized: + local_cli_wrapper.LocalCLIDebugWrapperSession.add_tensor_filter( + self, filter_name, tensor_filter) + else: + self._pending_tensor_filters[filter_name] = tensor_filter def begin(self): pass @@ -52,6 +74,13 @@ class LocalCLIDebugHook(session_run_hook.SessionRunHook, if not self._wrapper_initialized: local_cli_wrapper.LocalCLIDebugWrapperSession.__init__( self, run_context.session, ui_type=self._ui_type) + + # Actually register tensor filters registered prior to the construction + # of the underlying LocalCLIDebugWrapperSession object. + for filter_name in self._pending_tensor_filters: + local_cli_wrapper.LocalCLIDebugWrapperSession.add_tensor_filter( + self, filter_name, self._pending_tensor_filters[filter_name]) + self._wrapper_initialized = True # Increment run call counter. diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index 49406eedf3..cc33c20f87 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -476,7 +476,8 @@ def import_scoped_meta_graph(meta_graph_or_file, sorted(input_map)): raise ValueError("Graph contains unbound inputs: %s. Must " "provide these inputs through input_map." % - ",".join([compat.as_str(v) for v in field.value])) + ",".join([compat.as_str(v) for v in field.value + if not input_map or v not in input_map])) break # Sets graph to default graph if it's not passed in. diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index f4c3dcf99f..13b6923c3c 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -342,6 +342,18 @@ tf_py_test( ) tf_py_test( + name = "record_input_test", + size = "small", + srcs = ["record_input_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:data_flow_ops", + "//tensorflow/python:io_ops", + "//tensorflow/python:util", + ], +) + +tf_py_test( name = "io_ops_test", size = "small", srcs = ["io_ops_test.py"], diff --git a/tensorflow/python/kernel_tests/argmax_op_test.py b/tensorflow/python/kernel_tests/argmax_op_test.py index ac9a78d0fa..a5352561aa 100644 --- a/tensorflow/python/kernel_tests/argmax_op_test.py +++ b/tensorflow/python/kernel_tests/argmax_op_test.py @@ -90,6 +90,12 @@ class ArgMaxTest(test.TestCase): r"Reduction axis 0 is empty in shape \[0\]"): op([], 0).eval() + def testDefaultAxis(self): + with self.test_session(): + for op in math_ops.argmin, math_ops.argmax: + ans = op([1]).eval() + self.assertAllEqual(ans, 0) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/confusion_matrix_test.py b/tensorflow/python/kernel_tests/confusion_matrix_test.py index cf88209148..2d116df2ff 100644 --- a/tensorflow/python/kernel_tests/confusion_matrix_test.py +++ b/tensorflow/python/kernel_tests/confusion_matrix_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.ops import array_ops from tensorflow.python.ops import confusion_matrix from tensorflow.python.ops import math_ops @@ -215,5 +216,239 @@ class ConfusionMatrixTest(test.TestCase): self.assertEqual(tf_cm.dtype, np.int64) +class RemoveSqueezableDimensionsTest(test.TestCase): + + def testBothScalarShape(self): + label_values = 1.0 + prediction_values = 0.0 + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.float32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.float32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder)) + + with self.test_session(): + self.assertAllEqual(label_values, static_labels.eval()) + self.assertAllEqual(prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + prediction_values, dynamic_predictions.eval(feed_dict=feed_dict)) + + def testSameShape(self): + label_values = np.ones(shape=(2, 3, 1)) + prediction_values = np.zeros_like(label_values) + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder)) + + with self.test_session(): + self.assertAllEqual(label_values, static_labels.eval()) + self.assertAllEqual(prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + prediction_values, dynamic_predictions.eval(feed_dict=feed_dict)) + + def testSameShapeExpectedRankDiff0(self): + label_values = np.ones(shape=(2, 3, 1)) + prediction_values = np.zeros_like(label_values) + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values, expected_rank_diff=0)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder, expected_rank_diff=0)) + + with self.test_session(): + self.assertAllEqual(label_values, static_labels.eval()) + self.assertAllEqual(prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + prediction_values, dynamic_predictions.eval(feed_dict=feed_dict)) + + def testSqueezableLabels(self): + label_values = np.ones(shape=(2, 3, 1)) + prediction_values = np.zeros(shape=(2, 3)) + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder)) + + expected_label_values = np.reshape(label_values, newshape=(2, 3)) + with self.test_session(): + self.assertAllEqual(expected_label_values, static_labels.eval()) + self.assertAllEqual(prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + expected_label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + prediction_values, dynamic_predictions.eval(feed_dict=feed_dict)) + + def testSqueezableLabelsExpectedRankDiffPlus1(self): + label_values = np.ones(shape=(2, 3, 1)) + prediction_values = np.zeros(shape=(2, 3, 5)) + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values, expected_rank_diff=1)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder, expected_rank_diff=1)) + + expected_label_values = np.reshape(label_values, newshape=(2, 3)) + with self.test_session(): + self.assertAllEqual(expected_label_values, static_labels.eval()) + self.assertAllEqual(prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + expected_label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + prediction_values, dynamic_predictions.eval(feed_dict=feed_dict)) + + def testSqueezablePredictions(self): + label_values = np.ones(shape=(2, 3)) + prediction_values = np.zeros(shape=(2, 3, 1)) + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder)) + + expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3)) + with self.test_session(): + self.assertAllEqual(label_values, static_labels.eval()) + self.assertAllEqual(expected_prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + expected_prediction_values, + dynamic_predictions.eval(feed_dict=feed_dict)) + + def testSqueezablePredictionsExpectedRankDiffMinus1(self): + label_values = np.ones(shape=(2, 3, 5)) + prediction_values = np.zeros(shape=(2, 3, 1)) + static_labels, static_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values, expected_rank_diff=-1)) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder, expected_rank_diff=-1)) + + expected_prediction_values = np.reshape(prediction_values, newshape=(2, 3)) + with self.test_session(): + self.assertAllEqual(label_values, static_labels.eval()) + self.assertAllEqual(expected_prediction_values, static_predictions.eval()) + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + label_values, dynamic_labels.eval(feed_dict=feed_dict)) + self.assertAllEqual( + expected_prediction_values, + dynamic_predictions.eval(feed_dict=feed_dict)) + + def testUnsqueezableLabels(self): + label_values = np.ones(shape=(2, 3, 2)) + prediction_values = np.zeros(shape=(2, 3)) + with self.assertRaisesRegexp(ValueError, r"Can not squeeze dim\[2\]"): + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder)) + + with self.test_session(): + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Tried to explicitly squeeze dimension 2"): + dynamic_labels.eval(feed_dict=feed_dict) + self.assertAllEqual( + prediction_values, dynamic_predictions.eval(feed_dict=feed_dict)) + + def testUnsqueezablePredictions(self): + label_values = np.ones(shape=(2, 3)) + prediction_values = np.zeros(shape=(2, 3, 2)) + with self.assertRaisesRegexp(ValueError, r"Can not squeeze dim\[2\]"): + confusion_matrix.remove_squeezable_dimensions( + label_values, prediction_values) + + labels_placeholder = array_ops.placeholder(dtype=dtypes.int32) + predictions_placeholder = array_ops.placeholder(dtype=dtypes.int32) + dynamic_labels, dynamic_predictions = ( + confusion_matrix.remove_squeezable_dimensions( + labels_placeholder, predictions_placeholder)) + + with self.test_session(): + feed_dict = { + labels_placeholder: label_values, + predictions_placeholder: prediction_values + } + self.assertAllEqual( + label_values, dynamic_labels.eval(feed_dict=feed_dict)) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Tried to explicitly squeeze dimension 2"): + dynamic_predictions.eval(feed_dict=feed_dict) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py index 125d353df3..f3ae092b6f 100644 --- a/tensorflow/python/kernel_tests/losses_test.py +++ b/tensorflow/python/kernel_tests/losses_test.py @@ -72,7 +72,7 @@ class AbsoluteDifferenceLossTest(test.TestCase): self.assertAlmostEqual(5.5 * weights, loss.eval(), 3) def testNonZeroLossWithOneDimBatchSpecificWeights(self): - weights = constant_op.constant([1.2, 0.0], shape=[2,]) + weights = constant_op.constant((1.2, 0.0), shape=(2, 1)) loss = losses.absolute_difference(self._labels, self._predictions, weights) with self.test_session(): self.assertAlmostEqual(5.6, loss.eval(), 3) @@ -154,7 +154,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[0, 0, 1], [1, 0, 0], [0, 1, 0]]) - weights = constant_op.constant([1.2, 3.4, 5.6], shape=[3]) + weights = constant_op.constant((1.2, 3.4, 5.6)) with self.test_session(): loss = losses.softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3) @@ -296,8 +296,6 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): constant_op.constant(weights)) self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) - # TODO(b/33556118): Bug: this should be averaged across all dimensions, not - # summed across dim 0. def testNonZeroLossWith1DTensorWeight(self): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) @@ -305,25 +303,25 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): weights = 2.3 with self.test_session(): loss = losses.sparse_softmax_cross_entropy( - labels, logits, constant_op.constant(weights, shape=(1,))) - self.assertAlmostEqual(weights * 3.0 * 10.0, loss.eval(), 2) + labels, logits, constant_op.constant((weights,))) + self.assertAlmostEqual(weights * 10.0, loss.eval(), 3) def testNonZeroLossWithPlaceholderForWeights(self): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) - weights = array_ops.placeholder(dtypes.float32, shape=(None,)) + weights = array_ops.placeholder(dtypes.float32) with self.test_session() as sess: loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) loss_val = sess.run(loss, - feed_dict={weights: [1.2, 3.4, 5.6]}) + feed_dict={weights: ((1.2,), (3.4,), (5.6,))}) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss_val, 3) def testNonZeroLossWithPlaceholderForLogitsLabelsAndWeights(self): logits = array_ops.placeholder(dtypes.float32, shape=(None, 3)) labels = array_ops.placeholder(dtypes.int32, shape=(None, 1)) - weights = array_ops.placeholder(dtypes.float32, shape=(None,)) + weights = array_ops.placeholder(dtypes.float32) with self.test_session() as sess: loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) loss_val = sess.run(loss, @@ -332,7 +330,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]], labels: [[2], [0], [1]], - weights: [1.2, 3.4, 5.6], + weights: ((1.2,), (3.4,), (5.6,)), }) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss_val, 3) @@ -340,7 +338,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) - weights = constant_op.constant([1.2, 3.4, 5.6], shape=[3]) + weights = constant_op.constant([1.2, 3.4, 5.6], shape=(3, 1)) with self.test_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual((1.2 + 3.4 + 5.6) * 10.0 / 3.0, loss.eval(), 3) @@ -358,7 +356,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) - weights = constant_op.constant([0, 0, 0], shape=[3]) + weights = constant_op.constant([0, 0, 0], shape=(3, 1)) with self.test_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(0.0, loss.eval(), 3) @@ -367,7 +365,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0], [0.0, 0.0, 10.0]]) labels = constant_op.constant([[2], [0], [1]]) - weights = constant_op.constant([1.2, 0, 0], shape=[3]) + weights = constant_op.constant([1.2, 0, 0], shape=(3, 1)) with self.test_session(): loss = losses.sparse_softmax_cross_entropy(labels, logits, weights) self.assertAlmostEqual(12.0, loss.eval(), 3) @@ -432,9 +430,9 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase): [-100.0, -100.0, 100.0, -100.0], [-100.0, -100.0, -100.0, 100.0]]) labels = constant_op.constant([[0, 1], [2, 3]]) - weights = constant_op.constant([1.2, 3.4, 5.6, 7.8]) + weights = constant_op.constant(1.2) - with self.assertRaises(errors_impl.InvalidArgumentError): + with self.assertRaisesRegexp(ValueError, 'dimension'): losses.sparse_softmax_cross_entropy( labels, logits, weights=weights).eval() @@ -629,7 +627,7 @@ class LogLossTest(test.TestCase): loss, 3) def testNonZeroLossWithOneDimBatchSpecificWeights(self): - weights = constant_op.constant([1.2, 3.4], shape=[2]) + weights = constant_op.constant((1.2, 3.4), shape=(2, 1)) expected_losses = np.multiply( self._expected_losses, np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))) @@ -638,7 +636,7 @@ class LogLossTest(test.TestCase): self.assertAlmostEqual(-np.sum(expected_losses) / 6.0, loss.eval(), 3) def testNonZeroLossWithOneDimBatchSpecificWeightsSomeZero(self): - weights = constant_op.constant([1.2, 0], shape=[2]) + weights = constant_op.constant((1.2, 0), shape=(2, 1)) expected_losses = np.multiply(self._expected_losses, np.asarray([1.2, 1.2, 1.2, 0, 0, 0]).reshape( (2, 3))) @@ -797,7 +795,7 @@ class MeanSquaredErrorTest(test.TestCase): self.assertAlmostEqual(49.5 * weights, loss.eval(), 3) def testNonZeroLossWithOneDimBatchSpecificWeights(self): - weights = constant_op.constant([1.2, 3.4], shape=[2,]) + weights = constant_op.constant([1.2, 3.4], shape=(2, 1)) loss = losses.mean_squared_error(self._labels, self._predictions, weights) with self.test_session(): self.assertAlmostEqual(767.8 / 6.0, loss.eval(), 3) @@ -855,7 +853,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): labels=constant_op.constant(self._labels), weights=None) - def _test_mean_pairwise_squared_error( + def _test_valid_weights( self, labels, predictions, expected_loss, weights=1.0): with self.test_session(): static_inputs_op = losses.mean_pairwise_squared_error( @@ -881,11 +879,11 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): expected_loss, dynamic_inputs_op.eval(feed_dict=feed_dict), places=3) def testAllCorrectNoLossWeight(self): - self._test_mean_pairwise_squared_error( + self._test_valid_weights( self._labels, self._labels, expected_loss=0.0) def testNonZeroLoss(self): - self._test_mean_pairwise_squared_error( + self._test_valid_weights( self._labels, self._predictions, expected_loss=np.sum(self._expected_losses)) @@ -916,7 +914,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): def testNonZeroLossWithPythonScalarWeight(self): weight = 2.3 - self._test_mean_pairwise_squared_error( + self._test_valid_weights( self._labels, self._predictions, expected_loss=weight * np.sum(self._expected_losses), weights=weight) @@ -932,16 +930,9 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): loss.eval(), 3) def testNonZeroLossWithScalarZeroWeight(self): - self._test_mean_pairwise_squared_error( + self._test_valid_weights( self._labels, self._predictions, expected_loss=0.0, weights=0.0) - def testNonZeroLossWithOneDimBatchSpecificWeights(self): - weights = np.asarray((1.2, 3.4)) - self._test_mean_pairwise_squared_error( - self._labels, self._predictions, - expected_loss=np.sum(np.multiply(weights, self._expected_losses)), - weights=weights) - def test3d(self): labels = np.array([ [[1, 9, 2], [12, 11, 10], [9, 8, 7]], @@ -951,7 +942,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): [[4, 8, 12], [1, 2, 3], [4, 5, 6]], [[8, 1, 3], [7, 8, 9], [10, 11, 12]], ]) - self._test_mean_pairwise_squared_error( + self._test_valid_weights( labels, predictions, expected_loss=122.22222) def test3dWeightedScalar(self): @@ -964,11 +955,36 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): [[8, 1, 3], [7, 8, 9], [10, 11, 12]], ]) weight = 3.0 - self._test_mean_pairwise_squared_error( + self._test_valid_weights( labels, predictions, expected_loss=weight * 122.22222, weights=weight) - def test3dWeighted2x0(self): + def _test_invalid_weights( + self, labels, predictions, weights=1.0): + expected_error_msg = 'weights can not be broadcast to values' + + # Static check. + with self.assertRaisesRegexp(ValueError, expected_error_msg): + losses.mean_pairwise_squared_error( + predictions=predictions, labels=labels, weights=weights) + + # Dynamic check. + predictions_placeholder = array_ops.placeholder(dtypes.float32) + labels_placeholder = array_ops.placeholder(dtypes.int32) + weights_placeholder = array_ops.placeholder(dtypes.float32) + dynamic_inputs_op = losses.mean_pairwise_squared_error( + predictions=predictions_placeholder, + labels=labels_placeholder, + weights=weights_placeholder) + with self.test_session(): + with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): + dynamic_inputs_op.eval(feed_dict={ + predictions_placeholder: predictions, + labels_placeholder: labels, + weights_placeholder: weights, + }) + + def testInvalid3dWeighted2x0(self): labels = np.array([ [[1, 9, 2], [12, 11, 10], [9, 8, 7]], [[-5, -5, 7], [6, 5, 4], [3, 2, 1]], @@ -977,11 +993,9 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): [[4, 8, 12], [1, 2, 3], [4, 5, 6]], [[8, 1, 3], [7, 8, 9], [10, 11, 12]], ]) - self._test_mean_pairwise_squared_error( - labels, predictions, expected_loss=253.24445, - weights=np.asarray((1.2, 3.4))) + self._test_invalid_weights( + labels, predictions, weights=np.asarray((1.2, 3.4))) - # TODO(ptucker): According to the pydoc, this should work. def test3dWeighted2x3x3(self): labels = np.array([ [[1, 9, 2], [12, 11, 10], [9, 8, 7]], @@ -991,19 +1005,13 @@ class MeanPairwiseSquaredErrorTest(test.TestCase): [[4, 8, 12], [1, 2, 3], [4, 5, 6]], [[8, 1, 3], [7, 8, 9], [10, 11, 12]], ]) - with self.assertRaisesRegexp( - ValueError, 'Dimensions must be equal, but are 2 and 3'): - losses.mean_pairwise_squared_error( - predictions=predictions, labels=labels, - weights=np.ones((2, 3, 3))) - - def testZeroLossWithOneDimBatchZeroWeights(self): - self._test_mean_pairwise_squared_error( - self._labels, self._predictions, expected_loss=0.0, - weights=np.zeros((2,))) + self._test_valid_weights( + # TODO(ptucker): This doesn't look right. + labels, predictions, expected_loss=9 * 122.22222, + weights=np.ones((2, 3, 3))) def testLossWithAllZeroBatchSpecificWeights(self): - self._test_mean_pairwise_squared_error( + self._test_valid_weights( self._labels, self._predictions, expected_loss=0.0, weights=np.zeros((2, 1))) @@ -1071,7 +1079,7 @@ class CosineDistanceLossTest(test.TestCase): predictions=constant_op.constant(self._predictions), labels=constant_op.constant(self._labels), dim=2, - weights=constant_op.constant([1, 0, 0])) + weights=np.asarray((1, 0, 0)).reshape((3, 1, 1))) with self.test_session(): self.assertEqual(1.0, loss.eval()) @@ -1081,21 +1089,10 @@ class CosineDistanceLossTest(test.TestCase): labels=constant_op.constant(self._labels), dim=2, weights=constant_op.constant( - [1, 0, 0, 1, 1, 1], shape=(3, 2))) + [1, 0, 0, 1, 1, 1], shape=(3, 2, 1))) with self.test_session(): self.assertEqual(3.0 / 4.0, loss.eval()) - def testValueErrorThrownWithShapelessPlaceholder(self): - tf_predictions = array_ops.placeholder(dtypes.float32) - with self.test_session(): - with self.assertRaises(ValueError): - losses.cosine_distance( - predictions=tf_predictions, - labels=constant_op.constant(self._labels), - dim=2, - weights=constant_op.constant( - [1, 0, 0, 1, 1, 1], shape=(3, 2))) - def testMeasurementSpecificWeightsWithPlaceholderWithShape(self): tf_predictions = array_ops.placeholder( dtypes.float32, shape=self._labels.shape) @@ -1104,7 +1101,7 @@ class CosineDistanceLossTest(test.TestCase): labels=constant_op.constant(self._labels), dim=2, weights=constant_op.constant( - [1, 0, 0, 1, 1, 1], shape=(3, 2))) + [1, 0, 0, 1, 1, 1], shape=(3, 2, 1))) with self.test_session() as sess: loss = sess.run(loss, feed_dict={tf_predictions: self._predictions}) self.assertEqual(3.0 / 4.0, loss) @@ -1114,7 +1111,7 @@ class CosineDistanceLossTest(test.TestCase): predictions=constant_op.constant(self._predictions), labels=constant_op.constant(self._labels), dim=2, - weights=array_ops.zeros((3,))) + weights=array_ops.zeros((3, 1, 1))) with self.test_session(): self.assertEqual(0, loss.eval()) @@ -1123,7 +1120,7 @@ class CosineDistanceLossTest(test.TestCase): predictions=constant_op.constant(self._predictions), labels=constant_op.constant(self._labels), dim=2, - weights=array_ops.zeros((3, 2))) + weights=array_ops.zeros((3, 2, 1))) with self.test_session(): self.assertEqual(0, loss.eval()) @@ -1161,17 +1158,18 @@ class ComputeWeightedLossTest(test.TestCase): with ops.Graph().as_default(): self.assertEqual(0, len(util.get_losses())) raw_losses = self._raw_losses - shape = self._shape - unweighted_losses = (losses.compute_weighted_loss(raw_losses), - losses.compute_weighted_loss( - raw_losses, weights=1.0), - losses.compute_weighted_loss( - raw_losses, weights=np.ones(shape=shape[0:1])), - losses.compute_weighted_loss( - raw_losses, weights=np.ones(shape=shape[0:2])), - losses.compute_weighted_loss( - raw_losses, weights=np.ones(shape=shape))) - self.assertEqual(5, len(util.get_losses())) + unweighted_losses = ( + losses.compute_weighted_loss(raw_losses), + losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 1, 1))), + losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 1, 4))), + losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 2, 1))), + losses.compute_weighted_loss(raw_losses, weights=np.ones((1, 2, 4))), + losses.compute_weighted_loss(raw_losses, weights=np.ones((3, 1, 1))), + losses.compute_weighted_loss(raw_losses, weights=np.ones((3, 1, 4))), + losses.compute_weighted_loss(raw_losses, weights=np.ones((3, 2, 1))), + losses.compute_weighted_loss(raw_losses, weights=np.ones(self._shape)) + ) + self.assertEqual(9, len(util.get_losses())) with self.test_session(): for unweighted_loss in unweighted_losses: self.assertAllClose(self._unweighted_loss, unweighted_loss.eval()) @@ -1187,215 +1185,114 @@ class ComputeWeightedLossTest(test.TestCase): self.assertAllClose( np.mean(weight * self._raw_losses), weighted_loss.eval()) - # TODO(b/33556118): Bug: `loss1` should be the same as `testUnweighted`, and - # `loss17` should be the same as `testScalarWeight`. - def testScalar1DWeight(self): + def _test_invalid_weights(self, weights): with ops.Graph().as_default(): self.assertEqual(0, len(util.get_losses())) - loss1 = losses.compute_weighted_loss(self._raw_losses, weights=(1.0,)) + expected_error_msg = 'weights can not be broadcast to values' + + # Static check. + with self.assertRaisesRegexp(ValueError, expected_error_msg): + losses.compute_weighted_loss(self._raw_losses, weights=weights) + + # Dynamic check. + weights_placeholder = array_ops.placeholder(dtypes.float32) + weighted_loss = losses.compute_weighted_loss( + self._raw_losses, weights=weights_placeholder) self.assertEqual(1, len(util.get_losses())) - weight = 17.0 - loss17 = losses.compute_weighted_loss(self._raw_losses, weights=(weight,)) - self.assertEqual(2, len(util.get_losses())) with self.test_session(): - self.assertAllClose(self._unweighted_loss * self._shape[0], - loss1.eval()) - self.assertAllClose( - np.mean(weight * self._raw_losses) * self._shape[0], loss17.eval()) + with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): + weighted_loss.eval(feed_dict={weights_placeholder: weights}) - def testInvalid1DWeight(self): - with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, 'Dimensions must be equal'): - losses.compute_weighted_loss(self._raw_losses, weights=(17.0, 31.0)) - - def testInvalid4DWeight(self): - with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, 'Invalid weights shape'): - losses.compute_weighted_loss( - self._raw_losses, weights=np.zeros(shape=(2, 2, 2, 2))) + def testInvalidWeightTooManyDims(self): + self._test_invalid_weights(np.zeros(shape=(2, 2, 2, 2))) - def testInvalid4DWeight2(self): + def testInvalidWeightMismatchedDim(self): with ops.Graph().as_default(): raw_losses = array_ops.reshape(self._raw_losses, shape=(3, 2, 4, 1)) weights = np.ones(shape=(3, 2, 4, 2)) - with self.assertRaisesRegexp(ValueError, 'Invalid weights shape'): + expected_error_msg = 'weights can not be broadcast to values' + self.assertEqual(0, len(util.get_losses())) + + # Static check. + with self.assertRaisesRegexp(ValueError, expected_error_msg): losses.compute_weighted_loss(raw_losses, weights=weights) - def test3Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3 = (17.0, 5.0, 2.0) + # Dynamic check. + weights_placeholder = array_ops.placeholder(dtypes.float32) weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3) + raw_losses, weights=weights_placeholder) self.assertEqual(1, len(util.get_losses())) with self.test_session(): - weights3x1x1 = np.reshape(weights3, (3, 1, 1)) - self.assertAllClose( - np.mean(weights3x1x1 * self._raw_losses), weighted_loss.eval()) + with self.assertRaisesRegexp(errors_impl.OpError, expected_error_msg): + weighted_loss.eval(feed_dict={weights_placeholder: weights}) - def test3x1Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3x1 = ( - (17.0,), - (5.0,), - (2.0,),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3x1) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - weights3x1x1 = np.reshape(weights3x1, (3, 1, 1)) - self.assertAllClose( - np.mean(weights3x1x1 * self._raw_losses), weighted_loss.eval()) + def testInvalid3Weight(self): + self._test_invalid_weights((17.0, 5.0, 2.0)) - # TODO(ptucker): Bug: this should be the same as `test3x1Weight`. - def test3x1x1Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3x1x1 = ( - ((17.0,),), - ((5.0,),), - ((2.0,),),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3x1x1) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - self.assertAllClose( - np.mean(weights3x1x1 * self._raw_losses) * self._shape[1], - weighted_loss.eval()) + def testInvalid3x1Weight(self): + self._test_invalid_weights(((17.0,), (5.0,), (2.0,),)) - def test3x2Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3x2 = ( - (17.0, 3.0), - (5.0, 31.0), - (2.0, 7.0),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3x2) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - weights3x2x1 = np.reshape(weights3x2, (3, 2, 1)) - self.assertAllClose( - np.mean(weights3x2x1 * self._raw_losses), weighted_loss.eval()) + def testInvalid3x2Weight(self): + self._test_invalid_weights(( + (17.0, 3.0), + (5.0, 31.0), + (2.0, 7.0),)) + + def testInvalid1x2Weight(self): + self._test_invalid_weights((17.0, 3.0,),) - # TODO(b/33556118): Bug: this should be averaged across all dimensions, not - # summed across dim 0. - def test1x2Weight(self): + def testInvalidScalar1DWeight(self): + self._test_invalid_weights((17.0,),) + + def _test_valid_weights(self, weights): with ops.Graph().as_default(): self.assertEqual(0, len(util.get_losses())) - weights1x2 = (( - 17.0, - 3.0,),) weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights1x2) + self._raw_losses, weights=weights) self.assertEqual(1, len(util.get_losses())) with self.test_session(): - weights1x2x1 = np.reshape(weights1x2, (1, 2, 1)) self.assertAllClose( - np.mean(weights1x2x1 * self._raw_losses) * self._shape[0], + np.mean(weights * self._raw_losses), weighted_loss.eval()) - # TODO(b/33556118): Bug: this should be averaged across all dimensions, not - # summed across dim 0. + def test1x1x1Weight(self): + self._test_valid_weights((((17.0,),),)) + def test1x2x1Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights1x2x1 = (( - (17.0,), - (3.0,),),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights1x2x1) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - self.assertAllClose( - np.mean(weights1x2x1 * self._raw_losses) * self._shape[0], - weighted_loss.eval()) + self._test_valid_weights((((17.0,), (3.0,),),)) - # TODO(b/33556118): Bug: this should be averaged across all dimensions, not - # summed across dims 0 & 1. def test1x1x4Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights1x1x4 = (((17.0, 13.0, 2.0, 5.0),),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights1x1x4) - self.assertEqual(1, len(util.get_losses())) - shape = self._shape - with self.test_session(): - self.assertAllClose( - np.mean(weights1x1x4 * self._raw_losses) * shape[0] * shape[1], - weighted_loss.eval()) + self._test_valid_weights((((17.0, 13.0, 2.0, 5.0),),)) + + def test3x1x1Weight(self): + self._test_valid_weights((((17.0,),), ((5.0,),), ((2.0,),),)) def test3x2x1Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3x2x1 = ( - ((17.0,), (3.0,)), - ((5.0,), (31.0,)), - ((2.0,), (7.0,)), - ) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3x2x1) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - self.assertAllClose( - np.mean(weights3x2x1 * self._raw_losses), - weighted_loss.eval()) + self._test_valid_weights(( + ((17.0,), (3.0,)), + ((5.0,), (31.0,)), + ((2.0,), (7.0,)), + )) - # TODO(b/33556118): Bug: this should be averaged across all dimensions, not - # summed across dim 1. def test3x1x4Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3x1x4 = ( - ((17.0, 13.0, 2.0, 5.0),), - ((5.0, 31.0, 17.0, 5.0),), - ((7.0, 3.0, 11.0, 5.0),), - ) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3x1x4) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - self.assertAllClose( - np.mean(weights3x1x4 * self._raw_losses) * self._shape[1], - weighted_loss.eval()) + self._test_valid_weights(( + ((17.0, 13.0, 2.0, 5.0),), + ((5.0, 31.0, 17.0, 5.0),), + ((7.0, 3.0, 11.0, 5.0),), + )) - # TODO(b/33556118): Bug: this should be averaged across all dimensions, not - # summed across dim 0. def test1x2x4Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights1x2x4 = (( - (17.0, 13.0, 2.0, 5.0), - (3.0, 13.0, 11.0, 2.0),),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights1x2x4) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - self.assertAllClose( - np.mean(weights1x2x4 * self._raw_losses) * self._shape[0], - weighted_loss.eval()) + self._test_valid_weights((( + (17.0, 13.0, 2.0, 5.0), + (3.0, 13.0, 11.0, 2.0), + ),)) def test3x2x4Weight(self): - with ops.Graph().as_default(): - self.assertEqual(0, len(util.get_losses())) - weights3x2x4 = ( - ( - (17.0, 13.0, 2.0, 5.0), - (3.0, 13.0, 11.0, 2.0),), - ( - (5.0, 31.0, 17.0, 5.0), - (13.0, 3.0, 1.0, 11.0),), - ( - (7.0, 3.0, 11.0, 5.0), - (13.0, 11.0, 1.0, 7.0),),) - weighted_loss = losses.compute_weighted_loss( - self._raw_losses, weights=weights3x2x4) - self.assertEqual(1, len(util.get_losses())) - with self.test_session(): - self.assertAllClose( - np.mean(weights3x2x4 * self._raw_losses), weighted_loss.eval()) + self._test_valid_weights(( + ((17.0, 13.0, 2.0, 5.0), (3.0, 13.0, 11.0, 2.0),), + ((5.0, 31.0, 17.0, 5.0), (13.0, 3.0, 1.0, 11.0),), + ((7.0, 3.0, 11.0, 5.0), (13.0, 11.0, 1.0, 7.0),), + )) if __name__ == '__main__': diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index fc021c897a..4fbde86aec 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -721,15 +722,18 @@ class PrecisionTest(test.TestCase): self.assertAlmostEqual(1, sess.run(update_op)) self.assertAlmostEqual(1, precision.eval()) - def testSomeCorrect(self): - predictions = constant_op.constant([1, 0, 1, 0], shape=(1, 4)) - labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) - precision, update_op = metrics.precision(labels, predictions) + def testSomeCorrect_multipleInputDtypes(self): + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast( + constant_op.constant([1, 0, 1, 0], shape=(1, 4)), dtype=dtype) + labels = math_ops.cast( + constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype) + precision, update_op = metrics.precision(labels, predictions) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.5, update_op.eval()) - self.assertAlmostEqual(0.5, precision.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(0.5, update_op.eval()) + self.assertAlmostEqual(0.5, precision.eval()) def testWeighted1d(self): predictions = constant_op.constant([[1, 0, 1, 0], [1, 0, 1, 0]]) @@ -885,15 +889,18 @@ class RecallTest(test.TestCase): sess.run(update_op) self.assertEqual(1, recall.eval()) - def testSomeCorrect(self): - predictions = constant_op.constant([1, 0, 1, 0], shape=(1, 4)) - labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) - recall, update_op = metrics.recall(labels, predictions) + def testSomeCorrect_multipleInputDtypes(self): + for dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = math_ops.cast( + constant_op.constant([1, 0, 1, 0], shape=(1, 4)), dtype=dtype) + labels = math_ops.cast( + constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=dtype) + recall, update_op = metrics.recall(labels, predictions) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.5, update_op.eval()) - self.assertAlmostEqual(0.5, recall.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(0.5, update_op.eval()) + self.assertAlmostEqual(0.5, recall.eval()) def testWeighted1d(self): predictions = constant_op.constant([[1, 0, 1, 0], [0, 1, 0, 1]]) @@ -1008,17 +1015,20 @@ class AUCTest(test.TestCase): self.assertEqual(1, auc.eval()) - def testSomeCorrect(self): + def testSomeCorrect_multipleLabelDtypes(self): with self.test_session() as sess: - predictions = constant_op.constant( - [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) - labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) - auc, update_op = metrics.auc(labels, predictions) + for label_dtype in ( + dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = constant_op.constant( + [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) + labels = math_ops.cast( + constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=label_dtype) + auc, update_op = metrics.auc(labels, predictions) - sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.5, sess.run(update_op)) + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(0.5, sess.run(update_op)) - self.assertAlmostEqual(0.5, auc.eval()) + self.assertAlmostEqual(0.5, auc.eval()) def testWeighted1d(self): with self.test_session() as sess: @@ -1297,23 +1307,24 @@ class SpecificityAtSensitivityTest(test.TestCase): self.assertAlmostEqual(0.6, sess.run(update_op)) self.assertAlmostEqual(0.6, specificity.eval()) - def testWeighted1d(self): - predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, 0.1, 0.2, 0.2, 0.26, 0.26] - labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - weights_values = [3] + def testWeighted1d_multipleLabelDtypes(self): + for label_dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, 0.1, 0.2, 0.2, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weights_values = [3] - predictions = constant_op.constant( - predictions_values, dtype=dtypes_lib.float32) - labels = constant_op.constant(labels_values) - weights = constant_op.constant(weights_values) - specificity, update_op = metrics.specificity_at_sensitivity( - labels, predictions, weights=weights, sensitivity=0.4) + predictions = constant_op.constant( + predictions_values, dtype=dtypes_lib.float32) + labels = math_ops.cast(labels_values, dtype=label_dtype) + weights = constant_op.constant(weights_values) + specificity, update_op = metrics.specificity_at_sensitivity( + labels, predictions, weights=weights, sensitivity=0.4) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.6, sess.run(update_op)) - self.assertAlmostEqual(0.6, specificity.eval()) + self.assertAlmostEqual(0.6, sess.run(update_op)) + self.assertAlmostEqual(0.6, specificity.eval()) def testWeighted2d(self): predictions_values = [0.1, 0.2, 0.4, 0.3, 0.0, 0.1, 0.2, 0.2, 0.26, 0.26] @@ -1432,22 +1443,24 @@ class SensitivityAtSpecificityTest(test.TestCase): self.assertAlmostEqual(0.6, sess.run(update_op)) self.assertAlmostEqual(0.6, specificity.eval()) - def testWeighted(self): - predictions_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] - labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] - weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + def testWeighted_multipleLabelDtypes(self): + for label_dtype in (dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions_values = [ + 0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26] + labels_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] + weights_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - predictions = constant_op.constant( - predictions_values, dtype=dtypes_lib.float32) - labels = constant_op.constant(labels_values) - weights = constant_op.constant(weights_values) - specificity, update_op = metrics.sensitivity_at_specificity( - labels, predictions, weights=weights, specificity=0.4) + predictions = constant_op.constant( + predictions_values, dtype=dtypes_lib.float32) + labels = math_ops.cast(labels_values, dtype=label_dtype) + weights = constant_op.constant(weights_values) + specificity, update_op = metrics.sensitivity_at_specificity( + labels, predictions, weights=weights, specificity=0.4) - with self.test_session() as sess: - sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.675, sess.run(update_op)) - self.assertAlmostEqual(0.675, specificity.eval()) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + self.assertAlmostEqual(0.675, sess.run(update_op)) + self.assertAlmostEqual(0.675, specificity.eval()) # TODO(nsilberman): Break this up into two sets of tests. @@ -1536,22 +1549,25 @@ class PrecisionRecallThresholdsTest(test.TestCase): self.assertEqual(1, prec.eval()) self.assertEqual(1, rec.eval()) - def testSomeCorrect(self): + def testSomeCorrect_multipleLabelDtypes(self): with self.test_session() as sess: - predictions = constant_op.constant( - [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) - labels = constant_op.constant([0, 1, 1, 0], shape=(1, 4)) - thresholds = [0.5] - prec, prec_op = metrics.precision_at_thresholds(labels, predictions, - thresholds) - rec, rec_op = metrics.recall_at_thresholds(labels, predictions, - thresholds) + for label_dtype in ( + dtypes_lib.bool, dtypes_lib.int32, dtypes_lib.float32): + predictions = constant_op.constant( + [1, 0, 1, 0], shape=(1, 4), dtype=dtypes_lib.float32) + labels = math_ops.cast( + constant_op.constant([0, 1, 1, 0], shape=(1, 4)), dtype=label_dtype) + thresholds = [0.5] + prec, prec_op = metrics.precision_at_thresholds(labels, predictions, + thresholds) + rec, rec_op = metrics.recall_at_thresholds(labels, predictions, + thresholds) - sess.run(variables.local_variables_initializer()) - sess.run([prec_op, rec_op]) + sess.run(variables.local_variables_initializer()) + sess.run([prec_op, rec_op]) - self.assertAlmostEqual(0.5, prec.eval()) - self.assertAlmostEqual(0.5, rec.eval()) + self.assertAlmostEqual(0.5, prec.eval()) + self.assertAlmostEqual(0.5, rec.eval()) def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) diff --git a/tensorflow/python/kernel_tests/record_input_test.py b/tensorflow/python/kernel_tests/record_input_test.py new file mode 100644 index 0000000000..9b5de4fcdb --- /dev/null +++ b/tensorflow/python/kernel_tests/record_input_test.py @@ -0,0 +1,80 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for record_input_op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.python.lib.io import tf_record +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.platform import test + + +class RecordInputOpTest(test.TestCase): + + def generateTestData(self, prefix, n, m): + for i in range(n): + f = os.path.join(self.get_temp_dir(), prefix + "." + str(i)) + w = tf_record.TFRecordWriter(f) + + for j in range(m): + w.write("{0:0{width}}".format(i * m + j, width=10).encode("utf-8")) + + w.close() + + def testRecordInputSimple(self): + with self.test_session() as sess: + self.generateTestData("basic", 1, 1) + + yield_op = data_flow_ops.RecordInput( + file_pattern=os.path.join(self.get_temp_dir(), "basic.*"), + parallelism=1, + buffer_size=1, + batch_size=1, + name="record_input").get_yield_op() + + self.assertEqual(sess.run(yield_op), b"0000000000") + + def testRecordInputEpochs(self): + files = 100 + records_per_file = 100 + with self.test_session() as sess: + self.generateTestData("basic", files, records_per_file) + + records = data_flow_ops.RecordInput( + file_pattern=os.path.join(self.get_temp_dir(), "basic.*"), + parallelism=2, + buffer_size=2000, + batch_size=1, + shift_ratio=0.33, + seed=10, + name="record_input") + + yield_op = records.get_yield_op() + + # cycle over 3 epochs and make sure we never duplicate + for _ in range(3): + epoch_set = set() + for _ in range(files * records_per_file): + r = sess.run(yield_op) + self.assertTrue(r[0] not in epoch_set) + epoch_set.add(r[0]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 74a6052ff6..853b08b2a5 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -121,6 +121,14 @@ class _Layer(object): return self._non_trainable_variables if self.trainable else self.variables @property + def trainable_weights(self): + return self.trainable_variables + + @property + def non_trainable_weights(self): + return self.non_trainable_variables + + @property def variables(self): """Returns the list of all layer variables/weights. diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index a476b0f72a..3b96d4362f 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -268,7 +268,7 @@ def conv1d(inputs, activity_regularizer=None, trainable=True, name=None, - reuse=False): + reuse=None): """Functional interface for 1D convolution layer (e.g. temporal convolution). This layer creates a convolution kernel that is convolved @@ -435,7 +435,7 @@ def conv2d(inputs, activity_regularizer=None, trainable=True, name=None, - reuse=False): + reuse=None): """Functional interface for the 2D convolution layer. This layer creates a convolution kernel that is convolved @@ -608,7 +608,7 @@ def conv3d(inputs, activity_regularizer=None, trainable=True, name=None, - reuse=False): + reuse=None): """Functional interface for the 3D convolution layer. This layer creates a convolution kernel that is convolved @@ -867,7 +867,7 @@ def separable_conv2d(inputs, activity_regularizer=None, trainable=True, name=None, - reuse=False): + reuse=None): """Functional interface for the depthwise separable 2D convolution layer. This layer performs a depthwise convolution that acts separately on @@ -1128,7 +1128,7 @@ def conv2d_transpose(inputs, activity_regularizer=None, trainable=True, name=None, - reuse=False): + reuse=None): """Transposed convolution layer (sometimes called Deconvolution). The need for transposed convolutions generally arises diff --git a/tensorflow/python/layers/convolutional_test.py b/tensorflow/python/layers/convolutional_test.py index c47e92c582..1a5fe5c9b7 100644 --- a/tensorflow/python/layers/convolutional_test.py +++ b/tensorflow/python/layers/convolutional_test.py @@ -18,11 +18,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import ops from tensorflow.python.layers import convolutional as conv_layers +from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -191,21 +196,45 @@ class ConvTest(test.TestCase): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d(images, 32, [3, 3], name='conv1') - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) + self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv2d(images, 32, [3, 3], name='conv1', reuse=True) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) + self.assertEqual(len(variables.trainable_variables()), 2) + + def testFunctionalConv2DReuseFromScope(self): + with variable_scope.variable_scope('scope'): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3], name='conv1') + self.assertEqual(len(variables.trainable_variables()), 2) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.conv2d(images, 32, [3, 3], name='conv1') + self.assertEqual(len(variables.trainable_variables()), 2) + + def testFunctionalConv2DInitializerFromScope(self): + with self.test_session() as sess: + with variable_scope.variable_scope( + 'scope', initializer=init_ops.ones_initializer()): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d(images, 32, [3, 3], name='conv1') + weights = variables.trainable_variables() + # Check the names of weights in order. + self.assertTrue('kernel' in weights[0].name) + self.assertTrue('bias' in weights[1].name) + sess.run(variables.global_variables_initializer()) + weights = sess.run(weights) + # Check that the kernel weights got initialized to ones (from scope) + self.assertAllClose(weights[0], np.ones((3, 3, 3, 32))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[1], np.zeros((32))) def testFunctionalConv2DNoReuse(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d(images, 32, [3, 3]) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) + self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv2d(images, 32, [3, 3]) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 4) + self.assertEqual(len(variables.trainable_variables()), 4) class SeparableConv2DTest(test.TestCase): @@ -323,22 +352,48 @@ class SeparableConv2DTest(test.TestCase): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 3) + self.assertEqual(len(variables.trainable_variables()), 3) conv_layers.separable_conv2d( images, 32, [3, 3], name='sepconv1', reuse=True) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 3) + self.assertEqual(len(variables.trainable_variables()), 3) + + def testFunctionalConv2DReuseFromScope(self): + with variable_scope.variable_scope('scope'): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') + self.assertEqual(len(variables.trainable_variables()), 3) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') + self.assertEqual(len(variables.trainable_variables()), 3) + + def testFunctionalConv2DInitializerFromScope(self): + with self.test_session() as sess: + with variable_scope.variable_scope( + 'scope', initializer=init_ops.ones_initializer()): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.separable_conv2d(images, 32, [3, 3], name='sepconv1') + weights = variables.trainable_variables() + # Check the names of weights in order. + self.assertTrue('depthwise_kernel' in weights[0].name) + self.assertTrue('pointwise_kernel' in weights[1].name) + self.assertTrue('bias' in weights[2].name) + sess.run(variables.global_variables_initializer()) + weights = sess.run(weights) + # Check that the kernel weights got initialized to ones (from scope) + self.assertAllClose(weights[0], np.ones((3, 3, 3, 1))) + self.assertAllClose(weights[1], np.ones((1, 1, 3, 32))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[2], np.zeros((32))) def testFunctionalConv2DNoReuse(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.separable_conv2d(images, 32, [3, 3]) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 3) + self.assertEqual(len(variables.trainable_variables()), 3) conv_layers.separable_conv2d(images, 32, [3, 3]) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 6) + self.assertEqual(len(variables.trainable_variables()), 6) def testSeparableConv2DDepthwiseRegularizer(self): height, width = 7, 9 @@ -511,21 +566,45 @@ class Conv2DTransposeTest(test.TestCase): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) + self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1', reuse=True) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) + self.assertEqual(len(variables.trainable_variables()), 2) + + def testFunctionalConv2DTransposeReuseFromScope(self): + with variable_scope.variable_scope('scope'): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') + self.assertEqual(len(variables.trainable_variables()), 2) + with variable_scope.variable_scope('scope', reuse=True): + conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') + self.assertEqual(len(variables.trainable_variables()), 2) + + def testFunctionalConv2DTransposeInitializerFromScope(self): + with self.test_session() as sess: + with variable_scope.variable_scope( + 'scope', initializer=init_ops.ones_initializer()): + height, width = 7, 9 + images = random_ops.random_uniform((5, height, width, 3), seed=1) + conv_layers.conv2d_transpose(images, 32, [3, 3], name='deconv1') + weights = variables.trainable_variables() + # Check the names of weights in order. + self.assertTrue('kernel' in weights[0].name) + self.assertTrue('bias' in weights[1].name) + sess.run(variables.global_variables_initializer()) + weights = sess.run(weights) + # Check that the kernel weights got initialized to ones (from scope) + self.assertAllClose(weights[0], np.ones((3, 3, 32, 3))) + # Check that the bias still got initialized to zeros. + self.assertAllClose(weights[1], np.zeros((32))) def testFunctionalConv2DTransposeNoReuse(self): height, width = 7, 9 images = random_ops.random_uniform((5, height, width, 3), seed=1) conv_layers.conv2d_transpose(images, 32, [3, 3]) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) + self.assertEqual(len(variables.trainable_variables()), 2) conv_layers.conv2d_transpose(images, 32, [3, 3]) - self.assertEqual( - len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 4) + self.assertEqual(len(variables.trainable_variables()), 4) if __name__ == '__main__': diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py index c662478ccc..92894e1447 100644 --- a/tensorflow/python/layers/core.py +++ b/tensorflow/python/layers/core.py @@ -41,10 +41,12 @@ from tensorflow.python.layers import utils class Dense(base._Layer): # pylint: disable=protected-access """Densely-connected layer class. - This layer implements the operation `outputs = activation(inputs.w + b)` + This layer implements the operation: + `outputs = activation(inputs.kernel + bias)` Where `activation` is the activation function passed as the `activation` - argument (if not `None`), `w` is a weights matrix created by the layer, - and `b` is a bias vector created by the layer (only if `use_bias` is `True`). + argument (if not `None`), `kernel` is a weights matrix created by the layer, + and `bias` is a bias vector created by the layer + (only if `use_bias` is `True`). Note: if the input to the layer has a rank greater than 2, then it is flattened prior to the initial matrix multiply by `w`. @@ -54,9 +56,9 @@ class Dense(base._Layer): # pylint: disable=protected-access activation: Activation function (callable). Set it to None to maintain a linear activation. use_bias: Boolean, whether the layer uses a bias. - weights_initializer: Initializer function for the weight matrix. + kernel_initializer: Initializer function for the weight matrix. bias_initializer: Initializer function for the bias. - weights_regularizer: Regularizer function for the weight matrix. + kernel_regularizer: Regularizer function for the weight matrix. bias_regularizer: Regularizer function for the bias. activity_regularizer: Regularizer function for the output. trainable: Boolean, if `True` also add variables to the graph collection @@ -70,21 +72,21 @@ class Dense(base._Layer): # pylint: disable=protected-access units: Python integer, dimensionality of the output space. activation: Activation function (callable). use_bias: Boolean, whether the layer uses a bias. - weights_initializer: Initializer instance (or name) for the weight matrix. + kernel_initializer: Initializer instance (or name) for the weight matrix. bias_initializer: Initializer instance (or name) for the bias. - weights_regularizer: Regularizer instance for the weight matrix (callable) + kernel_regularizer: Regularizer instance for the weight matrix (callable) bias_regularizer: Regularizer instance for the bias (callable). activity_regularizer: Regularizer instance for the output (callable) - weights: Weight matrix (TensorFlow variable or tensor). + kernel: Weight matrix (TensorFlow variable or tensor). bias: Bias vector, if applicable (TensorFlow variable or tensor). """ def __init__(self, units, activation=None, use_bias=True, - weights_initializer=None, + kernel_initializer=None, bias_initializer=init_ops.zeros_initializer(), - weights_regularizer=None, + kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, trainable=True, @@ -94,9 +96,9 @@ class Dense(base._Layer): # pylint: disable=protected-access self.units = units self.activation = activation self.use_bias = use_bias - self.weights_initializer = weights_initializer + self.kernel_initializer = kernel_initializer self.bias_initializer = bias_initializer - self.weights_regularizer = weights_regularizer + self.kernel_regularizer = kernel_regularizer self.bias_regularizer = bias_regularizer self.activity_regularizer = activity_regularizer @@ -113,12 +115,12 @@ class Dense(base._Layer): # pylint: disable=protected-access # weight of the layer. If the layer is not trainable # (self.trainable = False), the variable will not be added to # tf.trainable_variables(), and self.trainable_weights will be empty. - self.w = vs.get_variable('weights', - shape=[input_shape[-1].value, self.units], - initializer=self.weights_initializer, - regularizer=self.weights_regularizer, - dtype=self.dtype, - trainable=True) + self.kernel = vs.get_variable('kernel', + shape=[input_shape[-1].value, self.units], + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + dtype=self.dtype, + trainable=True) if self.use_bias: self.bias = vs.get_variable('bias', shape=[self.units,], @@ -140,7 +142,7 @@ class Dense(base._Layer): # pylint: disable=protected-access output_shape_tensor = array_ops.stack(output_shape_tensors) inputs = array_ops.reshape(inputs, [-1, input_dim]) - outputs = standard_ops.matmul(inputs, self.w) + outputs = standard_ops.matmul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) @@ -158,20 +160,22 @@ def dense( inputs, units, activation=None, use_bias=True, - weights_initializer=None, + kernel_initializer=None, bias_initializer=init_ops.zeros_initializer(), - weights_regularizer=None, + kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, trainable=True, name=None, - reuse=False): + reuse=None): """Functional interface for the densely-connected layer. - This layer implements the operation `outputs = activation(inputs.w + b)` + This layer implements the operation: + `outputs = activation(inputs.kernel + bias)` Where `activation` is the activation function passed as the `activation` - argument (if not `None`), `w` is a weights matrix created by the layer, - and `b` is a bias vector created by the layer (only if `use_bias` is `True`). + argument (if not `None`), `kernel` is a weights matrix created by the layer, + and `bias` is a bias vector created by the layer + (only if `use_bias` is `True`). Note: if the `inputs` tensor has a rank greater than 2, then it is flattened prior to the initial matrix multiply by `w`. @@ -182,9 +186,9 @@ def dense( activation: Activation function (callable). Set it to None to maintain a linear activation. use_bias: Boolean, whether the layer uses a bias. - weights_initializer: Initializer function for the weight matrix. + kernel_initializer: Initializer function for the weight matrix. bias_initializer: Initializer function for the bias. - weights_regularizer: Regularizer function for the weight matrix. + kernel_regularizer: Regularizer function for the weight matrix. bias_regularizer: Regularizer function for the bias. activity_regularizer: Regularizer function for the output. trainable: Boolean, if `True` also add variables to the graph collection @@ -199,9 +203,9 @@ def dense( layer = Dense(units, activation=activation, use_bias=use_bias, - weights_initializer=weights_initializer, + kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, - weights_regularizer=weights_regularizer, + kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, trainable=trainable, diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py index c1fbe957df..cfcee7b788 100644 --- a/tensorflow/python/layers/core_test.py +++ b/tensorflow/python/layers/core_test.py @@ -39,7 +39,7 @@ class DenseTest(test.TestCase): dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense') self.assertEqual(dense.units, 2) self.assertEqual(dense.activation, nn_ops.relu) - self.assertEqual(dense.weights_regularizer, None) + self.assertEqual(dense.kernel_regularizer, None) self.assertEqual(dense.bias_regularizer, None) self.assertEqual(dense.activity_regularizer, None) self.assertEqual(dense.use_bias, True) @@ -55,36 +55,37 @@ class DenseTest(test.TestCase): dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense') inputs = random_ops.random_uniform((5, 2), seed=1) _ = dense(inputs) - self.assertListEqual(dense.variables, [dense.w, dense.bias]) - self.assertListEqual(dense.trainable_variables, [dense.w, dense.bias]) + self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) + self.assertListEqual(dense.trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.non_trainable_variables, []) - self.assertListEqual(dense._trainable_variables, [dense.w, dense.bias]) + self.assertListEqual(dense._trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense._non_trainable_variables, []) self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) - self.assertEqual(dense.w.name, 'my_dense/weights:0') + self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias.name, 'my_dense/bias:0') def testNoBias(self): dense = core_layers.Dense(2, use_bias=False, name='my_dense') inputs = random_ops.random_uniform((5, 2), seed=1) _ = dense(inputs) - self.assertListEqual(dense.variables, [dense.w]) - self.assertListEqual(dense.trainable_variables, [dense.w]) + self.assertListEqual(dense.variables, [dense.kernel]) + self.assertListEqual(dense.trainable_variables, [dense.kernel]) self.assertListEqual(dense.non_trainable_variables, []) self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1) - self.assertEqual(dense.w.name, 'my_dense/weights:0') + self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias, None) def testNonTrainable(self): dense = core_layers.Dense(2, trainable=False, name='my_dense') inputs = random_ops.random_uniform((5, 2), seed=1) _ = dense(inputs) - self.assertListEqual(dense.variables, [dense.w, dense.bias]) - self.assertListEqual(dense.non_trainable_variables, [dense.w, dense.bias]) + self.assertListEqual(dense.variables, [dense.kernel, dense.bias]) + self.assertListEqual(dense.non_trainable_variables, + [dense.kernel, dense.bias]) self.assertListEqual(dense.trainable_variables, []) - self.assertListEqual(dense._trainable_variables, [dense.w, dense.bias]) + self.assertListEqual(dense._trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense._non_trainable_variables, []) self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 0) @@ -149,25 +150,25 @@ class DenseTest(test.TestCase): self.assertEqual(len(loss_keys), 1) self.assertListEqual(dense.losses, loss_keys) - def testWeightsRegularizer(self): + def testKernelRegularizer(self): regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 dense = core_layers.Dense( - 2, name='my_dense', weights_regularizer=regularizer) + 2, name='my_dense', kernel_regularizer=regularizer) inputs = random_ops.random_uniform((5, 3), seed=1) _ = dense(inputs) loss_keys = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(len(loss_keys), 1) self.assertListEqual(dense.losses, loss_keys) - def testWeightsRegularizerWithReuse(self): + def testKernelRegularizerWithReuse(self): regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3 inputs = random_ops.random_uniform((5, 3), seed=1) _ = core_layers.dense( - inputs, 2, name='my_dense', weights_regularizer=regularizer) + inputs, 2, name='my_dense', kernel_regularizer=regularizer) self.assertEqual( len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 1) _ = core_layers.dense( - inputs, 2, name='my_dense', weights_regularizer=regularizer, reuse=True) + inputs, 2, name='my_dense', kernel_regularizer=regularizer, reuse=True) self.assertEqual( len(ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)), 1) @@ -206,6 +207,16 @@ class DenseTest(test.TestCase): vars2 = variables.trainable_variables() self.assertEqual(vars1, vars2) + def testFunctionalDenseTwiceReuseFromScope(self): + with variable_scope.variable_scope('scope'): + inputs = random_ops.random_uniform((5, 3), seed=1) + core_layers.dense(inputs, 2, name='my_dense') + vars1 = variables.trainable_variables() + with variable_scope.variable_scope('scope', reuse=True): + core_layers.dense(inputs, 2, name='my_dense') + vars2 = variables.trainable_variables() + self.assertEqual(vars1, vars2) + def testFunctionalDenseInitializerFromScope(self): with self.test_session() as sess: with variable_scope.variable_scope( @@ -237,17 +248,17 @@ class DenseTest(test.TestCase): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name='my_dense') var = variables.trainable_variables()[0] - self.assertEqual(var.name, 'test/my_dense/weights:0') + self.assertEqual(var.name, 'test/my_dense/kernel:0') with variable_scope.variable_scope('test1') as scope: inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2, name=scope) var = variables.trainable_variables()[2] - self.assertEqual(var.name, 'test1/weights:0') + self.assertEqual(var.name, 'test1/kernel:0') with variable_scope.variable_scope('test2'): inputs = random_ops.random_uniform((5, 3), seed=1) core_layers.dense(inputs, 2) var = variables.trainable_variables()[4] - self.assertEqual(var.name, 'test2/dense/weights:0') + self.assertEqual(var.name, 'test2/dense/kernel:0') class DropoutTest(test.TestCase): diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index fcbc69f2c5..4a59d77948 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -257,7 +257,7 @@ def batch_normalization(inputs, training=False, trainable=True, name=None, - reuse=False): + reuse=None): """Functional interface for the batch normalization layer. Reference: http://arxiv.org/abs/1502.03167 diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py index 93efc09ca0..91b7cb6f48 100644 --- a/tensorflow/python/layers/normalization_test.py +++ b/tensorflow/python/layers/normalization_test.py @@ -26,6 +26,7 @@ from tensorflow.python.layers import normalization as normalization_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -454,6 +455,20 @@ class BNTest(test.TestCase): self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2) self.assertAlmostEqual(np.std(normed_np_output), 1., places=1) + def testFunctionalReuseFromScope(self): + inputs = variables.Variable( + np.random.random((5, 4, 3, 6)), dtype=dtypes.float32) + epsilon = 1e-3 + training = array_ops.placeholder(dtype='bool') + with variable_scope.variable_scope('scope'): + _ = normalization_layers.batch_norm( + inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training) + self.assertEqual(len(variables.global_variables()), 5) + with variable_scope.variable_scope('scope', reuse=True): + _ = normalization_layers.batch_norm( + inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training) + self.assertEqual(len(variables.global_variables()), 5) + def testNoCenter(self): bn = normalization_layers.BatchNormalization(axis=1, center=False) inputs = random_ops.random_uniform((5, 4, 3), seed=1) diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py index 628853545e..95247ea125 100644 --- a/tensorflow/python/ops/confusion_matrix.py +++ b/tensorflow/python/ops/confusion_matrix.py @@ -32,8 +32,19 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops -def remove_squeezable_dimensions(labels, predictions, name=None): - """Squeeze last dim if ranks of `predictions` and `labels` differ by 1. +def remove_squeezable_dimensions( + labels, predictions, expected_rank_diff=0, name=None): + """Squeeze last dim if ranks differ from expected by exactly 1. + + In the common case where we expect shapes to match, `expected_rank_diff` + defaults to 0, and we squeeze the last dimension of the larger rank if they + differ by 1. + + But, for example, if `labels` contains class IDs and `predictions` contains 1 + probability per class, we expect `predictions` to have 1 more dimension than + `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze + `labels` if `rank(predictions) - rank(labels) == 0`, and + `predictions` if `rank(predictions) - rank(labels) == 2`. This will use static shape if available. Otherwise, it will add graph operations, which could result in a performance hit. @@ -41,6 +52,7 @@ def remove_squeezable_dimensions(labels, predictions, name=None): Args: labels: Label values, a `Tensor` whose dimensions match `predictions`. predictions: Predicted values, a `Tensor` of arbitrary dimensions. + expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. name: Name of the op. Returns: @@ -57,10 +69,10 @@ def remove_squeezable_dimensions(labels, predictions, name=None): if (labels_rank is not None) and (predictions_rank is not None): # Use static rank. rank_diff = predictions_rank - labels_rank - if rank_diff == -1: - labels = array_ops.squeeze(labels, [-1]) - elif rank_diff == 1: + if rank_diff == expected_rank_diff + 1: predictions = array_ops.squeeze(predictions, [-1]) + elif rank_diff == expected_rank_diff - 1: + labels = array_ops.squeeze(labels, [-1]) return labels, predictions # Use dynamic rank. @@ -68,13 +80,13 @@ def remove_squeezable_dimensions(labels, predictions, name=None): if (predictions_rank is None) or ( predictions_shape.dims[-1].is_compatible_with(1)): predictions = control_flow_ops.cond( - math_ops.equal(1, rank_diff), + math_ops.equal(expected_rank_diff + 1, rank_diff), lambda: array_ops.squeeze(predictions, [-1]), lambda: predictions) if (labels_rank is None) or ( labels_shape.dims[-1].is_compatible_with(1)): labels = control_flow_ops.cond( - math_ops.equal(-1, rank_diff), + math_ops.equal(expected_rank_diff - 1, rank_diff), lambda: array_ops.squeeze(labels, [-1]), lambda: labels) return labels, predictions diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 72f0454e30..037c3a8187 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -1613,3 +1613,65 @@ class StagingArea(object): output.set_shape(shape) return self._get_return_value(ret) + + +class RecordInput(object): + """RecordInput asynchronously reads and randomly yields TFRecords. + + A RecordInput Op will continuously read a batch of records asynchronously + into a buffer of some fixed capacity. It can also asynchronously yield + random records from this buffer. + + It will not start yielding until at least `buffer_size / 2` elements have been + placed into the buffer so that sufficient randomization can take place. + + The order the files are read will be shifted each epoch by `shift_amount` so + that the data is presented in a different order every epoch. + """ + + def __init__(self, + file_pattern, + batch_size=1, + buffer_size=1, + parallelism=1, + shift_ratio=0, + seed=0, + name=None): + """Constructs a RecordInput Op. + + Args: + file_pattern: File path to the dataset, possibly containing wildcards. + All matching files will be iterated over each epoch. + batch_size: How many records to return at a time. + buffer_size: The maximum number of records the buffer will contain. This + _must_ be smaller than the total number of records in an epoch or + deadlock can occur. + parallelism: How many reader threads to use for reading from files. + shift_ratio: What percentage of the total number files to move the start + file forward by each epoch. + seed: Specify the random number seed used by generator that randomizes + records. + name: Optional name for the operation. + + Raises: + ValueError: If one of the arguments is invalid. + """ + + self._batch_size = batch_size + self._file_pattern = file_pattern + self._buffer_size = buffer_size + self._parallelism = parallelism + self._shift_ratio = shift_ratio + self._seed = seed + self._name = name + + def get_yield_op(self): + """Add a node that yields a minibatch every time it is executed.""" + return gen_data_flow_ops.record_input( + file_pattern=self._file_pattern, + file_buffer_size=self._buffer_size, + file_parallelism=self._parallelism, + file_shuffle_shift_ratio=self._shift_ratio, + batch_size=self._batch_size, + file_random_seed=self._seed, + name=self._name) diff --git a/tensorflow/python/ops/hidden_ops.txt b/tensorflow/python/ops/hidden_ops.txt index 4b1b9815ca..16068e57d8 100644 --- a/tensorflow/python/ops/hidden_ops.txt +++ b/tensorflow/python/ops/hidden_ops.txt @@ -237,6 +237,7 @@ Max Mean Min Mul +Neg Pow Prod Range diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index b6da60770d..c231ca56bb 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -449,6 +449,42 @@ class AdjustSaturationBenchmark(test.Benchmark): self._benchmarkAdjustSaturation(test.gpu_device_name(), None) +class ResizeBilinearBenchmark(test.Benchmark): + + def _benchmarkResize(self, image_size): + # 4D float tensor (10 images per batch, 3 channels per image) + img = variables.Variable( + random_ops.random_normal([10, image_size[0], image_size[1], 3]), + name='img') + + deps = [] + for _ in xrange(100): + with ops.control_dependencies(deps): + resize_op = image_ops.resize_bilinear( + img, [299, 299], align_corners=False) + deps = [resize_op] + benchmark_op = control_flow_ops.group(*deps) + + with session.Session() as sess: + sess.run(variables.global_variables_initializer()) + print('Variables initalized for resize_bilinear image size: %s.' % + (image_size,)) + benchmark_values = self.run_op_benchmark( + sess, + benchmark_op, + name=('bilinear_%s_%s' % image_size),) + print('Benchmark values:\n%s' % benchmark_values) + + def benchmarkSimilar(self): + self._benchmarkResize((183, 229)) + + def benchmarkScaleUp(self): + self._benchmarkResize((141, 186)) + + def benchmarkScaleDown(self): + self._benchmarkResize((749, 603)) + + class ResizeBicubicBenchmark(test.Benchmark): def _benchmarkResize(self, image_size): diff --git a/tensorflow/python/ops/losses/BUILD b/tensorflow/python/ops/losses/BUILD index 47d4d594d6..c4ce11ce0f 100644 --- a/tensorflow/python/ops/losses/BUILD +++ b/tensorflow/python/ops/losses/BUILD @@ -22,12 +22,15 @@ py_library( srcs_version = "PY2AND3", deps = [ "//tensorflow/python:array_ops", + "//tensorflow/python:confusion_matrix", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", "//tensorflow/python:nn", "//tensorflow/python:nn_ops", "//tensorflow/python:platform", "//tensorflow/python:util", + "//tensorflow/python:weights_broadcast_ops", ], ) diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 486e25afc7..89daa9594a 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -20,11 +20,13 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import confusion_matrix +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import weights_broadcast_ops from tensorflow.python.ops.losses import util -from tensorflow.python.platform import tf_logging as logging def _scale_losses(losses, weights): @@ -46,13 +48,8 @@ def _scale_losses(losses, weights): A scalar tf.float32 `Tensor` whose value represents the sum of the scaled `losses`. """ - # First, compute the sum of the losses over all elements: - start_index = max(0, weights.get_shape().ndims) - reduction_indices = list(range(start_index, losses.get_shape().ndims)) - reduced_losses = math_ops.reduce_sum(losses, - reduction_indices=reduction_indices) - reduced_losses = math_ops.multiply(reduced_losses, weights) - return math_ops.reduce_sum(reduced_losses) + weighted_losses = math_ops.multiply(losses, weights) + return math_ops.reduce_sum(weighted_losses) def _safe_div(numerator, denominator, name="value"): @@ -117,51 +114,29 @@ def _num_present(losses, weights, per_batch=False): `per_batch` is `True`, the value is returned as a tensor of size `[batch_size]`. Otherwise, a single scalar tensor is returned. """ - # If weights is a scalar, its easy to compute: - if weights.get_shape().ndims == 0: - if losses.get_shape().ndims == 0: - batch_size = 1 - else: - batch_size = array_ops.reshape(array_ops.slice(array_ops.shape(losses), - [0], [1]), []) - num_per_batch = math_ops.div(math_ops.to_float(array_ops.size(losses)), - math_ops.to_float(batch_size)) - num_per_batch = array_ops.where(math_ops.equal(weights, 0), - 0.0, num_per_batch) - num_per_batch = math_ops.multiply(array_ops.ones( - array_ops.reshape(batch_size, [1])), num_per_batch) - return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch) - - # First, count the number of nonzero weights. - if weights.get_shape().ndims >= 1: - reduction_indices = list(range(1, weights.get_shape().ndims)) - num_nonzero_per_batch = math_ops.reduce_sum( - math_ops.to_float(math_ops.not_equal(weights, 0)), - reduction_indices=reduction_indices) - - # Next, determine the number of elements that weight would broadcast to: - broadcast_dims = array_ops.slice(array_ops.shape(losses), - [weights.get_shape().ndims], [-1]) - num_to_broadcast = math_ops.to_float(math_ops.reduce_prod(broadcast_dims)) - - num_per_batch = math_ops.multiply(num_nonzero_per_batch, num_to_broadcast) - return num_per_batch if per_batch else math_ops.reduce_sum(num_per_batch) + with ops.name_scope(None, "num_present", (losses, weights)) as scope: + weights = math_ops.to_float(weights) + present = array_ops.where( + math_ops.equal(weights, 0.0), + array_ops.zeros_like(weights), + array_ops.ones_like(weights)) + present = weights_broadcast_ops.broadcast_weights(present, losses) + if per_batch: + return math_ops.reduce_sum( + present, axis=math_ops.range(1, array_ops.rank(present)), + keep_dims=True, name=scope) + return math_ops.reduce_sum(present, name=scope) def compute_weighted_loss( losses, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Computes the weighted loss. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: losses: `Tensor` of shape `[batch_size, d1, ... dN]`. - weights: `Tensor` of shape `[]`, `[batch_size]` or - `[batch_size, d1, ... dK]`, where K < N. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `losses`, and must be broadcastable to `losses` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). scope: the scope for the operations performed in computing the loss. loss_collection: the loss will be added to these collections. @@ -173,52 +148,20 @@ def compute_weighted_loss( `losses`, or if the number of dimensions (rank) of either `losses` or `weights` is missing. """ - with ops.name_scope(scope, "weighted_loss", [losses, weights]): - losses = ops.convert_to_tensor(losses) - input_dtype = losses.dtype - losses = math_ops.to_float(losses) - weights = math_ops.to_float(ops.convert_to_tensor(weights)) - - losses_shape = losses.get_shape() - if losses_shape.ndims is None: - raise ValueError("losses.get_shape().ndims cannot be None") - weights_shape = weights.get_shape() - if weights_shape.ndims is None: - raise ValueError("weight.get_shape().ndims cannot be None") - - # TODO(b/33556118): Remove `ndims > 1` check so shapes [] and [1] behave the - # same. - if weights_shape.ndims > 1 and weights_shape.dims[-1].is_compatible_with(1): - weights = array_ops.squeeze(weights, [-1]) - - # TODO(b/33556118): Remove this when we require weights shape be either - # scalar or the same as losses. - weights_dims = weights_shape.as_list() - losses_dims = losses_shape.as_list() - if len(weights_dims) > len(losses_dims): - raise ValueError( - "Invalid weights shape %s can not be broadcast to losses %s." % ( - weights_shape, losses_shape)) - for i in range(len(weights_dims)): - if ((losses_dims[i] is not None) and (losses_dims[i] == 1) and - (weights_dims[i] is not None) and (weights_dims[i] != 1)): - raise ValueError( - "Invalid weights shape %s can not be broadcast to losses %s." % ( - weights_shape, losses_shape)) - for i in range(len(weights_dims)): - if ((losses_dims[i] is not None) and (losses_dims[i] != 1) and - (weights_dims[i] is not None) and (weights_dims[i] == 1)): - logging.warn( - "WARNING: Weights %s with dimension 1 will result in a sum" - ", not average, across dimension %d.", weights_shape, i) - - total_loss = _scale_losses(losses, weights) - num_present = _num_present(losses, weights) - mean_loss = _safe_mean(total_loss, num_present) - # Convert the result back to the input type. - mean_loss = math_ops.cast(mean_loss, input_dtype) - util.add_loss(mean_loss, loss_collection) - return mean_loss + with ops.name_scope(scope, "weighted_loss", (losses, weights)): + with ops.control_dependencies(( + weights_broadcast_ops.assert_broadcastable(weights, losses),)): + losses = ops.convert_to_tensor(losses) + input_dtype = losses.dtype + losses = math_ops.to_float(losses) + weights = math_ops.to_float(weights) + total_loss = _scale_losses(losses, weights) + num_present = _num_present(losses, weights) + mean_loss = _safe_mean(total_loss, num_present) + # Convert the result back to the input type. + mean_loss = math_ops.cast(mean_loss, input_dtype) + util.add_loss(mean_loss, loss_collection) + return mean_loss def absolute_difference( @@ -234,17 +177,12 @@ def absolute_difference( measurable element of `predictions` is scaled by the corresponding value of `weights`. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. - weights: Coefficients for the loss a scalar, a tensor of shape - `[batch_size]` or a tensor whose shape matches `predictions`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. @@ -272,18 +210,13 @@ def cosine_distance( Note that the function assumes that `predictions` and `labels` are already unit-normalized. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: labels: `Tensor` whose shape matches 'predictions' predictions: An arbitrary matrix. dim: The dimension along which the cosine distance is computed. - weights: Coefficients for the loss a scalar, a tensor of shape - `[batch_size]` or a tensor whose shape matches `predictions`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which this loss will be added. @@ -303,7 +236,7 @@ def cosine_distance( predictions.get_shape().assert_is_compatible_with(labels.get_shape()) radial_diffs = math_ops.multiply(predictions, labels) - losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,]) + losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(dim,), keep_dims=True) return compute_weighted_loss(losses, weights, scope, loss_collection) @@ -311,18 +244,13 @@ def hinge_loss(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Adds a hinge loss to the training procedure. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: labels: The ground truth output tensor. Its shape should match the shape of logits. The values of the tensor are expected to be 0.0 or 1.0. logits: The logits, a float tensor. - weights: Coefficients for the loss a scalar, a tensor of shape - `[batch_size]` or a tensor whose shape matches `predictions`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. @@ -356,17 +284,12 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None, measurable element of `predictions` is scaled by the corresponding value of `weights`. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. - weights: Coefficients for the loss a scalar, a tensor of shape - `[batch_size]` or a tensor whose shape matches `predictions`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). epsilon: A small increment to add to avoid taking a log of zero. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. @@ -434,41 +357,39 @@ def mean_pairwise_squared_error(labels, predictions, weights=1.0, scope=None, """ with ops.name_scope(scope, "mean_pairwise_squared_error", (predictions, labels, weights)) as scope: - predictions = math_ops.to_float(predictions) + weights = math_ops.to_float(weights) labels = math_ops.to_float(labels) - predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - weights = math_ops.to_float(ops.convert_to_tensor(weights)) - - diffs = math_ops.subtract(predictions, labels) + with ops.control_dependencies(( + weights_broadcast_ops.assert_broadcastable(weights, labels),)): + predictions = math_ops.to_float(predictions) + predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - # Need to verify here since the function doesn't use compute_weighted_loss - if diffs.get_shape().ndims is None: - raise ValueError("diffs.get_shape().ndims cannot be None") - if weights.get_shape().ndims is None: - raise ValueError("weights.get_shape().ndims cannot be None") + diffs = math_ops.subtract(predictions, labels) - reduction_indices = list(range(1, diffs.get_shape().ndims)) + reduction_indices = math_ops.range(1, array_ops.rank(diffs)) - sum_squares_diff_per_batch = math_ops.reduce_sum( - math_ops.square(diffs), - reduction_indices=reduction_indices) - num_present_per_batch = _num_present(diffs, weights, per_batch=True) + sum_squares_diff_per_batch = math_ops.reduce_sum( + math_ops.square(diffs), reduction_indices=reduction_indices, + keep_dims=True) + num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, - num_present_per_batch) + term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, + num_present_per_batch) - sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) - term2 = 2.0 * _safe_div(math_ops.square(sum_diff), - math_ops.square(num_present_per_batch)) + sum_diff = math_ops.reduce_sum( + diffs, reduction_indices=reduction_indices, keep_dims=True) + term2 = 2.0 * _safe_div(math_ops.square(sum_diff), + math_ops.square(num_present_per_batch)) - loss = _scale_losses(term1 - term2, weights) + loss = _scale_losses(term1 - term2, weights) - mean_loss = array_ops.where(math_ops.reduce_sum(num_present_per_batch) > 0, - loss, - array_ops.zeros_like(loss), - name="value") - util.add_loss(mean_loss, loss_collection) - return mean_loss + mean_loss = array_ops.where( + math_ops.reduce_sum(num_present_per_batch) > 0, + loss, + array_ops.zeros_like(loss), + name="value") + util.add_loss(mean_loss, loss_collection) + return mean_loss def mean_squared_error(labels, predictions, weights=1.0, scope=None, @@ -483,17 +404,12 @@ def mean_squared_error(labels, predictions, weights=1.0, scope=None, measurable element of `predictions` is scaled by the corresponding value of `weights`. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: labels: The ground truth output tensor, same dimensions as 'predictions'. predictions: The predicted outputs. - weights: Coefficients for the loss a scalar, a tensor of shape - `[batch_size]` or a tensor whose shape matches `predictions`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. @@ -523,12 +439,6 @@ def sigmoid_cross_entropy( tensor of shape `[batch_size]`, then the loss weights apply to each corresponding sample. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - If `label_smoothing` is nonzero, smooth the labels towards 1/2: new_multiclass_labels = multiclass_labels * (1 - label_smoothing) @@ -538,8 +448,9 @@ def sigmoid_cross_entropy( multi_class_labels: `[batch_size, num_classes]` target integer labels in `(0, 1)`. logits: `[batch_size, num_classes]` logits outputs of the network. - weights: Coefficients for the loss. This must be of shape `[]`, - `[batch_size]` or `[batch_size, num_classes]`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`, and must be broadcastable to `labels` (i.e., all dimensions must + be either `1`, or the same as the corresponding `losses` dimension). label_smoothing: If greater than `0` then smooth the labels. scope: The scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. @@ -578,12 +489,6 @@ def softmax_cross_entropy( tensor of shape `[batch_size]`, then the loss weights apply to each corresponding sample. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - If `label_smoothing` is nonzero, smooth the labels towards 1/num_classes: new_onehot_labels = onehot_labels * (1 - label_smoothing) + label_smoothing / num_classes @@ -591,8 +496,10 @@ def softmax_cross_entropy( Args: onehot_labels: `[batch_size, num_classes]` target one-hot-encoded labels. logits: [batch_size, num_classes] logits outputs of the network . - weights: Coefficients for the loss. This must be of shape `[]`, - `[batch_size]` or `[batch_size, num_classes]`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `onehot_labels`, and must be broadcastable to `onehot_labels` (i.e., all + dimensions must be either `1`, or the same as the corresponding `losses` + dimension). label_smoothing: If greater than 0 then smooth the labels. scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. @@ -623,6 +530,57 @@ def softmax_cross_entropy( return compute_weighted_loss(losses, weights, scope, loss_collection) +# TODO(ptucker): Merge this with similar method in metrics_impl. +def _remove_squeezable_dimensions( + labels, predictions, weights=None, expected_rank_diff=0): + """Internal version of _remove_squeezable_dimensions which handles weights. + + Squeezes `predictions` and `labels` if their ranks differ from expected by + exactly 1. + Squeezes `weights` if its rank is 1 more than the new rank of `predictions` + + This will use static shape if available. Otherwise, it will add graph + operations, which could result in a performance hit. + + Args: + labels: Label values, a `Tensor` whose dimensions match `predictions`. + predictions: Predicted values, a `Tensor` of arbitrary dimensions. + weights: Optional weight `Tensor`. It will be squeezed if it's not scalar, + and its rank is 1 more than the new rank of `labels`. + expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`. + + Returns: + Tuple of `predictions`, `labels` and `weights`, possibly with the last + dimension squeezed. + """ + labels, predictions = confusion_matrix.remove_squeezable_dimensions( + labels, predictions, expected_rank_diff=expected_rank_diff) + + if weights is not None: + weights = ops.convert_to_tensor(weights) + labels_rank = labels.get_shape().ndims + weights_shape = weights.get_shape() + weights_rank = weights_shape.ndims + + if (labels_rank is not None) and (weights_rank is not None): + # Use static rank. + rank_diff = weights_rank - labels_rank + if rank_diff == 1: + weights = array_ops.squeeze(weights, [-1]) + return labels, predictions, weights + + # Use dynamic rank. + rank_diff = array_ops.rank(weights) - array_ops.rank(labels) + if (weights_rank is None) or ( + weights_shape.dims[-1].is_compatible_with(1)): + weights = control_flow_ops.cond( + math_ops.equal(1, rank_diff), + lambda: array_ops.squeeze(weights, [-1]), + lambda: weights) + + return labels, predictions, weights + + def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES): """Cross-entropy loss using `tf.nn.sparse_softmax_cross_entropy_with_logits`. @@ -632,18 +590,16 @@ def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None, tensor of shape [`batch_size`], then the loss weights apply to each corresponding sample. - WARNING: `weights` also supports dimensions of 1, but the broadcasting does - not work as advertised, you'll wind up with weighted sum instead of weighted - mean for any but the last dimension. This will be cleaned up soon, so please - do not rely on the current behavior for anything but the shapes documented for - `weights` below. - Args: - labels: [batch_size, 1] or [batch_size] target labels of dtype `int32` or - `int64` in the range `[0, num_classes)`. - logits: [batch_size, num_classes] logits outputs of the network . - weights: Coefficients for the loss. This must be of shape `[batch_size]` or - `[batch_size, 1]`. + labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of + `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` + must be an index in `[0, num_classes)`. Other values will raise an + exception when this op is run on CPU, and return `NaN` for corresponding + loss and gradient rows on GPU. + logits: Unscaled log probabilities of shape + `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. + weights: Coefficients for the loss. This must be scalar or of same rank as + `labels` scope: the scope for the operations performed in computing the loss. loss_collection: collection to which the loss will be added. @@ -655,12 +611,13 @@ def sparse_softmax_cross_entropy(labels, logits, weights=1.0, scope=None, if `weights` is None. """ with ops.name_scope(scope, "sparse_softmax_cross_entropy_loss", - [logits, labels, weights]) as scope: - labels = array_ops.reshape(labels, shape=[array_ops.shape(labels)[0]]) - + (logits, labels, weights)) as scope: + # As documented above in Args, labels contain class IDs and logits contains + # 1 probability per class ID, so we expect rank(logits) - rank(labels) == 1; + # therefore, expected_rank_diff=1. + labels, logits, weights = _remove_squeezable_dimensions( + labels, logits, weights, expected_rank_diff=1) losses = nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="xentropy") - # Reshape losses to [batch_size, 1] to be consistent with weights. - losses = array_ops.reshape(losses, shape=[array_ops.shape(losses)[0], 1]) return compute_weighted_loss(losses, weights, scope, loss_collection) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index c9ad0936a5..11e7d8382f 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -260,6 +260,8 @@ def argmax(input, axis=None, name=None, dimension=None): if axis is not None: raise ValueError("Cannot specify both 'axis' and 'dimension'") axis = dimension + elif axis is None: + axis = 0 return gen_math_ops.arg_max(input, axis, name) @@ -273,6 +275,8 @@ def argmin(input, axis=None, name=None, dimension=None): if axis is not None: raise ValueError("Cannot specify both 'axis' and 'dimension'") axis = dimension + elif axis is None: + axis = 0 return gen_math_ops.arg_min(input, axis, name) @@ -399,11 +403,11 @@ def negative(x, name=None): """ with ops.name_scope(name, "Neg", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): - x_neg = gen_math_ops.neg(x.values, name=name) + x_neg = gen_math_ops._neg(x.values, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_neg, dense_shape=x.dense_shape) else: - return gen_math_ops.neg(x, name=name) + return gen_math_ops._neg(x, name=name) # pylint: enable=g-docstring-has-escape @@ -857,7 +861,7 @@ def to_bfloat16(x, name="ToBFloat16"): return cast(x, dtypes.bfloat16, name=name) -ops.Tensor._override_operator("__neg__", gen_math_ops.neg) +ops.Tensor._override_operator("__neg__", gen_math_ops._neg) ops.Tensor._override_operator("__abs__", abs) # __invert__ corresponds to the ~ operator. Here we follow the numpy convention # ~ marks an elementwise bit-wise inverse. This is only implemented for boolean diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index a00625d083..0a109eb99b 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -87,7 +87,7 @@ def _remove_squeezable_dimensions(labels, predictions, weights): weights = array_ops.squeeze(weights, [-1]) elif (weights_rank is None) or ( weights_shape.dims[-1].is_compatible_with(1)): - # Use dynamic rank + # Use dynamic rank. weights = control_flow_ops.cond( math_ops.equal(array_ops.rank(weights), math_ops.add(array_ops.rank(predictions), 1)), @@ -354,8 +354,8 @@ def _confusion_matrix_at_thresholds( If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: A `Tensor` whose shape matches `predictions`. `labels` will be cast - to `bool`. + labels: A `Tensor` whose shape matches `predictions`. Will be cast to + `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A python list or tuple of float thresholds in `[0, 1]`. @@ -384,6 +384,8 @@ def _confusion_matrix_at_thresholds( if include not in all_includes: raise ValueError('Invaild key: %s.' % include) + labels = math_ops.cast(labels, dtype=dtypes.bool) + predictions = math_ops.to_float(predictions) labels, predictions, weights = _remove_squeezable_dimensions( labels, predictions, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) @@ -503,7 +505,8 @@ def auc(labels, predictions, weights=None, num_thresholds=200, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: A `bool` `Tensor` whose shape matches `predictions`. + labels: A `Tensor` whose shape matches `predictions`. Will be cast to + `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. weights: Optional `Tensor` whose rank is either 0, or the same rank as @@ -1101,10 +1104,10 @@ def true_positives(labels, predictions, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). @@ -1127,11 +1130,11 @@ def true_positives(labels, predictions, weights=None, with variable_scope.variable_scope( name, 'true_positives', (predictions, labels, weights)): - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) + labels = math_ops.cast(labels, dtype=dtypes.bool) + predictions = math_ops.cast(predictions, dtype=dtypes.bool) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_true_positive = math_ops.logical_and(math_ops.equal(labels, 1), - math_ops.equal(predictions, 1)) + is_true_positive = math_ops.logical_and(math_ops.equal(labels, True), + math_ops.equal(predictions, True)) return _count_condition(is_true_positive, weights, metrics_collections, updates_collections) @@ -1145,10 +1148,10 @@ def false_positives(labels, predictions, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). @@ -1171,11 +1174,11 @@ def false_positives(labels, predictions, weights=None, with variable_scope.variable_scope( name, 'false_positives', (predictions, labels, weights)): - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) + labels = math_ops.cast(labels, dtype=dtypes.bool) + predictions = math_ops.cast(predictions, dtype=dtypes.bool) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_false_positive = math_ops.logical_and(math_ops.equal(labels, 0), - math_ops.equal(predictions, 1)) + is_false_positive = math_ops.logical_and(math_ops.equal(labels, False), + math_ops.equal(predictions, True)) return _count_condition(is_false_positive, weights, metrics_collections, updates_collections) @@ -1199,9 +1202,10 @@ def precision(labels, predictions, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). @@ -1227,6 +1231,8 @@ def precision(labels, predictions, weights=None, with variable_scope.variable_scope( name, 'precision', (predictions, labels, weights)): + labels = math_ops.cast(labels, dtype=dtypes.bool) + predictions = math_ops.cast(predictions, dtype=dtypes.bool) labels, predictions, weights = _remove_squeezable_dimensions( labels, predictions, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) @@ -1279,7 +1285,8 @@ def precision_at_thresholds(labels, predictions, thresholds, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: A `bool` `Tensor` whose shape matches `predictions`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A python list or tuple of float thresholds in `[0, 1]`. @@ -1336,10 +1343,10 @@ def false_negatives(labels, predictions, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - predictions: The predicted values, a `bool` `Tensor` of arbitrary - dimensions. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). @@ -1361,11 +1368,11 @@ def false_negatives(labels, predictions, weights=None, with variable_scope.variable_scope( name, 'false_negatives', (predictions, labels, weights)): - predictions = ops.convert_to_tensor(predictions) - labels = ops.convert_to_tensor(labels) + labels = math_ops.cast(labels, dtype=dtypes.bool) + predictions = math_ops.cast(predictions, dtype=dtypes.bool) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - is_false_negative = math_ops.logical_and(math_ops.equal(labels, 1), - math_ops.equal(predictions, 0)) + is_false_negative = math_ops.logical_and(math_ops.equal(labels, True), + math_ops.equal(predictions, False)) return _count_condition(is_false_negative, weights, metrics_collections, updates_collections) @@ -1387,9 +1394,10 @@ def recall(labels, predictions, weights=None, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. + predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will + be cast to `bool`. weights: Optional `Tensor` whose rank is either 0, or the same rank as `labels`, and must be broadcastable to `labels` (i.e., all dimensions must be either `1`, or the same as the corresponding `labels` dimension). @@ -1414,6 +1422,8 @@ def recall(labels, predictions, weights=None, """ with variable_scope.variable_scope( name, 'recall', (predictions, labels, weights)): + labels = math_ops.cast(labels, dtype=dtypes.bool) + predictions = math_ops.cast(predictions, dtype=dtypes.bool) labels, predictions, weights = _remove_squeezable_dimensions( labels, predictions, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) @@ -1817,7 +1827,8 @@ def recall_at_thresholds(labels, predictions, thresholds, If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. Args: - labels: A `bool` `Tensor` whose shape matches `predictions`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. thresholds: A python list or tuple of float thresholds in `[0, 1]`. @@ -1952,7 +1963,8 @@ def sensitivity_at_specificity( following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity Args: - labels: A `bool` `Tensor` whose shape matches `predictions`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. specificity: A scalar value in range `[0, 1]`. @@ -2515,7 +2527,8 @@ def specificity_at_sensitivity( following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity Args: - labels: A `bool` `Tensor` whose shape matches `predictions`. + labels: The ground truth values, a `Tensor` whose dimensions must match + `predictions`. Will be cast to `bool`. predictions: A floating point `Tensor` of arbitrary shape and whose values are in the range `[0, 1]`. sensitivity: A scalar value in range `[0, 1]`. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 9ad2bf998b..344a592106 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1663,13 +1663,13 @@ def sparse_softmax_cross_entropy_with_logits(_sentinel=None, # pylint: disable= Args: _sentinel: Used to prevent positional parameters. Internal, do not use. - labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or - `int64`. Each entry in `labels` must be an index in `[0, num_classes)`. - Other values will raise an exception when this op is run on CPU, and - return `NaN` for corresponding corresponding loss and gradient rows - on GPU. - logits: Unscaled log probabilities of rank `r` and shape - `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`. + labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-1}]` (where `r` is rank of + `labels` and result) and dtype `int32` or `int64`. Each entry in `labels` + must be an index in `[0, num_classes)`. Other values will raise an + exception when this op is run on CPU, and return `NaN` for corresponding + loss and gradient rows on GPU. + logits: Unscaled log probabilities of shape + `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. name: A name for the operation (optional). Returns: diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index bdd59eeb6b..0c266770ab 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -44,6 +44,7 @@ from google.protobuf import text_format from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.client import session from tensorflow.python.framework import graph_util from tensorflow.python.framework import importer @@ -67,6 +68,8 @@ def freeze_graph(input_graph, variable_names_blacklist=""): """Converts all variables in a graph and checkpoint into constants.""" + del restore_op_name, filename_tensor_name # Unused by updated loading code. + if not gfile.Exists(input_graph): print("Input graph file '" + input_graph + "' does not exist!") return -1 @@ -96,6 +99,7 @@ def freeze_graph(input_graph, if clear_devices: for node in input_graph_def.node: node.device = "" + _ = importer.import_graph_def(input_graph_def, name="") with session.Session() as sess: @@ -109,7 +113,19 @@ def freeze_graph(input_graph, saver = saver_lib.Saver(saver_def=saver_def) saver.restore(sess, input_checkpoint) else: - sess.run([restore_op_name], {filename_tensor_name: input_checkpoint}) + var_list = {} + reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint) + var_to_shape_map = reader.get_variable_to_shape_map() + for key in var_to_shape_map: + try: + tensor = sess.graph.get_tensor_by_name(key + ":0") + except KeyError: + # This tensor doesn't exist in the graph (for example it's + # 'global_step' or a similar housekeeping element) so skip it. + continue + var_list[key] = tensor + saver = saver_lib.Saver(var_list=var_list) + saver.restore(sess, input_checkpoint) if initializer_nodes: sess.run(initializer_nodes) diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index b7dde8aa69..cda4fedec7 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -122,7 +122,8 @@ class LoggingTensorHook(session_run_hook.SessionRunHook): The tensors will be printed to the log, with `INFO` severity. """ - def __init__(self, tensors, every_n_iter=None, every_n_secs=None): + def __init__(self, tensors, every_n_iter=None, every_n_secs=None, + formatter=None): """Initializes a LoggingHook monitor. Args: @@ -133,6 +134,8 @@ class LoggingTensorHook(session_run_hook.SessionRunHook): every_n_secs: `int` or `float`, print the values of `tensors` once every N seconds. Exactly one of `every_n_iter` and `every_n_secs` should be provided. + formatter: function, takes dict of `tag`->`Tensor` and returns a string. + If `None` uses default printing all tensors. Raises: ValueError: if `every_n_iter` is non-positive. @@ -143,8 +146,12 @@ class LoggingTensorHook(session_run_hook.SessionRunHook): if every_n_iter is not None and every_n_iter <= 0: raise ValueError("invalid every_n_iter=%s." % every_n_iter) if not isinstance(tensors, dict): + self._tag_order = tensors tensors = {item: item for item in tensors} + else: + self._tag_order = tensors.keys() self._tensors = tensors + self._formatter = formatter self._timer = SecondOrStepTimer(every_secs=every_n_secs, every_steps=every_n_iter) @@ -164,11 +171,17 @@ class LoggingTensorHook(session_run_hook.SessionRunHook): def after_run(self, run_context, run_values): _ = run_context if self._should_trigger: - stats = [] - for tag in self._current_tensors.keys(): - stats.append("%s = %s" % (tag, run_values.results[tag])) - logging.info("%s", ", ".join(stats)) - self._timer.update_last_triggered_step(self._iter_count) + original = np.get_printoptions() + np.set_printoptions(suppress=True) + elapsed_secs, _ = self._timer.update_last_triggered_step(self._iter_count) + if self._formatter: + logging.info(self._formatter(run_values.results)) + else: + stats = [] + for tag in self._tag_order: + stats.append("%s = %s" % (tag, run_values.results[tag])) + logging.info("%s (%.3f sec)", ", ".join(stats), elapsed_secs) + np.set_printoptions(**original) self._iter_count += 1 @@ -647,6 +660,22 @@ class FinalOpsHook(session_run_hook.SessionRunHook): feed_dict=self._final_ops_feed_dict) +class FeedFnHook(session_run_hook.SessionRunHook): + """Runs `feed_fn` and sets the `feed_dict` accordingly.""" + + def __init__(self, feed_fn): + """Constructs the FeedFnHook with given `feed_fn`. + + Args: + feed_fn: function, no arguments and returns `dict` to feed. + """ + self.feed_fn = feed_fn + + def before_run(self, run_context): # pylint: disable=unused-argument + return session_run_hook.SessionRunArgs( + fetches=None, feed_dict=self.feed_fn()) + + def _as_graph_element(obj): """Retrieves Graph element.""" graph = ops.get_default_graph() diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index babc651e6c..6c2945396f 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -251,6 +251,19 @@ class LoggingTensorHookTest(test.TestCase): mon_sess.run(train_op) self.assertRegexpMatches(str(self.logged_message), t.name) + def test_print_formatter(self): + with ops.Graph().as_default(), session_lib.Session() as sess: + t = constant_op.constant(42.0, name='foo') + train_op = constant_op.constant(3) + hook = basic_session_run_hooks.LoggingTensorHook( + tensors=[t.name], every_n_iter=10, + formatter=lambda items: 'qqq=%s' % items[t.name]) + hook.begin() + mon_sess = monitored_session._HookedSession(sess, [hook]) + sess.run(variables_lib.global_variables_initializer()) + mon_sess.run(train_op) + self.assertEqual(self.logged_message[0], 'qqq=42.0') + class CheckpointSaverHookTest(test.TestCase): @@ -820,5 +833,18 @@ class FinalOpsHookTest(test.TestCase): hook.final_ops_values.tolist()) +class FeedFnHookTest(test.TestCase): + + def test_feeding_placeholder(self): + with ops.Graph().as_default(), session_lib.Session() as sess: + x = array_ops.placeholder(dtype=dtypes.float32) + y = x + 1 + hook = basic_session_run_hooks.FeedFnHook( + feed_fn=lambda: {x: 1.0}) + hook.begin() + mon_sess = monitored_session._HookedSession(sess, [hook]) + self.assertEqual(mon_sess.run(y), 2) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 30b9ccf922..26e52464cb 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -248,6 +248,7 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name chief_only_hooks=None, save_checkpoint_secs=600, save_summaries_steps=100, + save_summaries_secs=None, config=None): """Creates a `MonitoredSession` for training. @@ -273,8 +274,12 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name using a default checkpoint saver. If `save_checkpoint_secs` is set to `None`, then the default checkpoint saver isn't used. save_summaries_steps: The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If - `save_summaries_steps` is set to `None`, then the default summary saver + summaries are written to disk using a default summary saver. If both + `save_summaries_steps` and `save_summaries_secs` are set to `None`, then + the default summary saver isn't used. + save_summaries_secs: The frequency, in secs, that the summaries are written + to disk using a default summary saver. If both `save_summaries_steps` and + `save_summaries_secs` are set to `None`, then the default summary saver isn't used. config: an instance of `tf.ConfigProto` proto used to configure the session. It's the `config` argument of constructor of `tf.Session`. @@ -301,10 +306,12 @@ def MonitoredTrainingSession(master='', # pylint: disable=invalid-name all_hooks.append( basic_session_run_hooks.StepCounterHook(output_dir=checkpoint_dir)) - if save_summaries_steps and save_summaries_steps > 0: + if (save_summaries_steps and save_summaries_steps > 0) or ( + save_summaries_secs and save_summaries_secs > 0): all_hooks.append(basic_session_run_hooks.SummarySaverHook( scaffold=scaffold, save_steps=save_summaries_steps, + save_secs=save_summaries_secs, output_dir=checkpoint_dir)) if save_checkpoint_secs and save_checkpoint_secs > 0: all_hooks.append(basic_session_run_hooks.CheckpointSaverHook( diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py index 3b16073166..444ee68cb8 100644 --- a/tensorflow/python/training/monitored_session_test.py +++ b/tensorflow/python/training/monitored_session_test.py @@ -215,15 +215,37 @@ class MonitoredTrainingSessionTest(test.TestCase): is_chief=True, checkpoint_dir=logdir) as session: self.assertEqual(2, session.run(gstep)) - def test_summaries(self): - logdir = _test_dir(self.get_temp_dir(), 'test_summaries') + def test_summaries_steps(self): + logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps') with ops.Graph().as_default(): gstep = variables_lib.get_or_create_global_step() new_gstep = state_ops.assign_add(gstep, 1) summary.scalar('my_summary_tag', new_gstep * 2) with monitored_session.MonitoredTrainingSession( - is_chief=True, checkpoint_dir=logdir) as session: - for _ in range(101): # 100 is default summary writing steps + is_chief=True, + checkpoint_dir=logdir, + save_summaries_steps=100) as session: + for _ in range(101): + session.run(new_gstep) + summaries = util_test.latest_summaries(logdir) + tags = [s.summary.value[0].tag for s in summaries] + self.assertIn('my_summary_tag', tags) + self.assertIn('global_step/sec', tags) + + def test_summaries_secs(self): + logdir = _test_dir(self.get_temp_dir(), 'test_summaries_secs') + with ops.Graph().as_default(): + gstep = variables_lib.get_or_create_global_step() + new_gstep = state_ops.assign_add(gstep, 1) + summary.scalar('my_summary_tag', new_gstep * 2) + with monitored_session.MonitoredTrainingSession( + is_chief=True, + checkpoint_dir=logdir, + save_summaries_steps=None, + save_summaries_secs=0.1) as session: + session.run(new_gstep) + time.sleep(0.2) + for _ in range(101): session.run(new_gstep) summaries = util_test.latest_summaries(logdir) tags = [s.summary.value[0].tag for s in summaries] diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index 3a2415629a..9f59d270e4 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -88,8 +88,10 @@ See [Threading and Queues](../../how_tos/threading_and_queues/index.md) for how to use threads and queues. For documentation on the Queue API, see [Queues](../../api_docs/python/io_ops.md#queues). + @@Coordinator @@QueueRunner +@@LooperThread @@add_queue_runner @@start_queue_runners @@ -119,14 +121,15 @@ overview of summaries, event files, and visualization in TensorBoard. @@summary_iterator -## Training Utilities +## Training Hooks + +Hooks are tools that run in the process of training/evaluation of the model. -@@global_step -@@basic_train_loop -@@get_global_step -@@assert_global_step -@@write_graph @@SessionRunHook +@@SessionRunArgs +@@SessionRunContext +@@SessionRunValues + @@LoggingTensorHook @@StopAtStepHook @@CheckpointSaverHook @@ -136,10 +139,16 @@ overview of summaries, event files, and visualization in TensorBoard. @@NanTensorHook @@SummarySaverHook @@GlobalStepWaiterHook -@@SessionRunArgs -@@SessionRunContext -@@SessionRunValues -@@LooperThread +@@FinalOpsHook +@@FeedFnHook + +## Training Utilities + +@@global_step +@@basic_train_loop +@@get_global_step +@@assert_global_step +@@write_graph """ # pylint: enable=line-too-long @@ -190,6 +199,8 @@ from tensorflow.python.training.basic_session_run_hooks import NanLossDuringTrai from tensorflow.python.training.basic_session_run_hooks import NanTensorHook from tensorflow.python.training.basic_session_run_hooks import SummarySaverHook from tensorflow.python.training.basic_session_run_hooks import GlobalStepWaiterHook +from tensorflow.python.training.basic_session_run_hooks import FinalOpsHook +from tensorflow.python.training.basic_session_run_hooks import FeedFnHook from tensorflow.python.training.basic_loops import basic_train_loop from tensorflow.python.training.device_setter import replica_device_setter from tensorflow.python.training.monitored_session import Scaffold diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index ac0f15b687..93c312ecfc 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -1212,57 +1212,56 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) { return false; } -/* static */ bool CUDADriver::SynchronousMemcpyD2H(CudaContext* context, - void *host_dst, - CUdeviceptr gpu_src, - uint64 size) { +/* static */ port::Status CUDADriver::SynchronousMemcpyD2H(CudaContext *context, + void *host_dst, + CUdeviceptr gpu_src, + uint64 size) { ScopedActivateContext activation{context}; CUresult res = dynload::cuMemcpyDtoH_v2(host_dst, gpu_src, size); if (res != CUDA_SUCCESS) { - LOG(ERROR) << port::Printf( - "failed to synchronous memcpy from device to host: %s; " - "host dst: %p; GPU src: %p; size: %llu=0x%llx", - ToString(res).c_str(), host_dst, port::bit_cast<void *>(gpu_src), size, size); - return false; + return port::InternalError( + port::Printf("failed to synchronous memcpy from device to host: %s; " + "host dst: %p; GPU src: %p; size: %llu=0x%llx", + ToString(res).c_str(), host_dst, + port::bit_cast<void *>(gpu_src), size, size)); } VLOG(2) << "successfully sync memcpy'd d2h of " << size << " bytes to " << host_dst; - return true; + return port::Status::OK(); } -/* static */ bool CUDADriver::SynchronousMemcpyH2D(CudaContext* context, - CUdeviceptr gpu_dst, - const void *host_src, - uint64 size) { +/* static */ port::Status CUDADriver::SynchronousMemcpyH2D(CudaContext *context, + CUdeviceptr gpu_dst, + const void *host_src, + uint64 size) { ScopedActivateContext activation{context}; CUresult res = dynload::cuMemcpyHtoD_v2(gpu_dst, host_src, size); if (res != CUDA_SUCCESS) { - LOG(ERROR) << port::Printf( + return port::InternalError(port::Printf( "failed to synchronous memcpy from host to device: %s; GPU dst: %p;" " host src: %p; size: %llu=0x%llx", - ToString(res).c_str(), port::bit_cast<void *>(gpu_dst), host_src, size, size); - return false; + ToString(res).c_str(), port::bit_cast<void *>(gpu_dst), host_src, size, + size)); } VLOG(2) << "successfully enqueued sync memcpy h2d of " << size << " bytes"; - return true; + return port::Status::OK(); } -/* static */ bool CUDADriver::SynchronousMemcpyD2D(CudaContext* context, - CUdeviceptr gpu_dst, - CUdeviceptr gpu_src, - uint64 size) { +/* static */ port::Status CUDADriver::SynchronousMemcpyD2D(CudaContext *context, + CUdeviceptr gpu_dst, + CUdeviceptr gpu_src, + uint64 size) { ScopedActivateContext activation{context}; CUresult res = dynload::cuMemcpyDtoD_v2(gpu_dst, gpu_src, size); if (res != CUDA_SUCCESS) { - LOG(ERROR) << port::Printf( + return port::InternalError(port::Printf( "failed to synchronous memcpy from host to device: %s; GPU dst: %p; " "GPU src: %p; size: %llu=0x%llx", ToString(res).c_str(), port::bit_cast<void *>(gpu_dst), - port::bit_cast<void *>(gpu_src), size, size); - return false; + port::bit_cast<void *>(gpu_src), size, size)); } VLOG(2) << "successfully sync memcpy'd d2d of " << size << " bytes"; - return true; + return port::Status::OK(); } /* static */ bool CUDADriver::AsynchronousMemcpyD2H(CudaContext* context, diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h index ab118e5d40..c5d7d8b32f 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.h +++ b/tensorflow/stream_executor/cuda/cuda_driver.h @@ -251,12 +251,14 @@ class CUDADriver { // -- Synchronous memcopies. // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g4d32266788c440b0220b1a9ba5795169 - static bool SynchronousMemcpyD2H(CudaContext* context, void *host_dst, - CUdeviceptr gpu_src, uint64 size); - static bool SynchronousMemcpyH2D(CudaContext* context, CUdeviceptr gpu_dst, - const void *host_src, uint64 size); - static bool SynchronousMemcpyD2D(CudaContext* context, CUdeviceptr gpu_dst, - CUdeviceptr gpu_src, uint64 size); + static port::Status SynchronousMemcpyD2H(CudaContext* context, void* host_dst, + CUdeviceptr gpu_src, uint64 size); + static port::Status SynchronousMemcpyH2D(CudaContext* context, + CUdeviceptr gpu_dst, + const void* host_src, uint64 size); + static port::Status SynchronousMemcpyD2D(CudaContext* context, + CUdeviceptr gpu_dst, + CUdeviceptr gpu_src, uint64 size); // -- Asynchronous memcopies. // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1g56f30236c7c5247f8e061b59d3268362 diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index b2da109bf0..ae1bf991a1 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -508,20 +508,21 @@ bool CUDAExecutor::SynchronousMemSet(DeviceMemoryBase *location, int value, value, size); } -bool CUDAExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, - const void *host_src, uint64 size) { +port::Status CUDAExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, + const void *host_src, + uint64 size) { return CUDADriver::SynchronousMemcpyH2D(context_, AsCudaDevicePtr(gpu_dst), host_src, size); } -bool CUDAExecutor::SynchronousMemcpy(void *host_dst, - const DeviceMemoryBase &gpu_src, - uint64 size) { +port::Status CUDAExecutor::SynchronousMemcpy(void *host_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) { return CUDADriver::SynchronousMemcpyD2H(context_, host_dst, AsCudaDevicePtr(gpu_src), size); } -bool CUDAExecutor::SynchronousMemcpyDeviceToDevice( +port::Status CUDAExecutor::SynchronousMemcpyDeviceToDevice( DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, uint64 size) { return CUDADriver::SynchronousMemcpyD2D(context_, AsCudaDevicePtr(gpu_dst), AsCudaDevicePtr(gpu_src), size); diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 3959d04439..a9917cc89f 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -108,15 +108,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface { bool SynchronousMemSet(DeviceMemoryBase *location, int value, uint64 size) override; - bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src, - uint64 size) override; + port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst, + const void *host_src, uint64 size) override; - bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, - uint64 size) override; + port::Status SynchronousMemcpy(void *host_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) override; - bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, - const DeviceMemoryBase &gpu_src, - uint64 size) override; + port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) override; bool MemZero(Stream *stream, DeviceMemoryBase *location, uint64 size) override; diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index d83d3042d5..5db86cefc3 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -38,6 +38,7 @@ limitations under the License. namespace perftools { namespace gputools { +class HostBuffer; class Stream; class ScratchAllocator; @@ -125,6 +126,15 @@ enum class RnnDirectionMode { kRnnBidirectional = 1, }; +// Relevant to DepthToSpace and SpaceToDepth. This is the write layout when +// performing depth to space and the read layout when performing space to depth. +// It's specified with most-major dimension first and most-minor dimension last. +// In DepthToSpace, the D*M² values are read in and then, for DepthHeightWidth, +// written out to the output patch, by varying first width, then height, then +// depth. In C array format, it looks like [depth][height][width]. See +// DepthToSpace comment for more information. +enum class DepthToSpaceLayout { DepthHeightWidth }; + // Specifies the descriptor for a RNN model. // // An example use case: @@ -530,6 +540,13 @@ enum class PoolingMode : int64 { kAverage, }; +// Specify the dimension in which to concatenate inputs in space. +// Specify int64 so there's no padding in SpaceConcatenateMode. +enum class SpaceConcatenateMode : int64 { + XDirection, + YDirection, +}; + // Returns a short name for the pooling mode, e.g. "Avg". string ShortPoolingModeString(PoolingMode mode); @@ -1319,6 +1336,129 @@ class DnnSupport { port::ArraySlice<const DeviceMemory<float>*> input_data, DeviceMemory<float>* output_data) = 0; + // Concatenates several layers into one, by concatenating each in the + // x-dimension or y-dimension, based on a user-specified flag. + // For x-concatenation, layers are aligned at matching y and depth + // coordinates, and for y-concatenation, they are aligned at matching x and + // depth coordinates. The inputs must all have the same depth and batch size. + // For x-concatenation, the inputs must have the same height (y-size), and the + // output will have the same depth and height as the inputs and its width (x- + // size) will be the sum of the input widths. For y-concatenation, the inputs + // must have the same width, and the output will have the same depth and width + // as the inputs, and its height will be the sum of the input heights. + // + // Arguments: + // stream: borrowed pointer to the stream that the 'space concatenate' + // operation should be enqueued onto. + // input_dimensions: the dimensions of each input. + // input_data: un-owned device memory region which contains the input data + // for each input layer. + // output_data: un-owned device memory region in which to place the space + // concatenate result. + // concat_direction: either dnn:SpaceConcatenateMode::XDirection or + // dnn::SpaceConcatenateMode::YDirection. + virtual bool DoSpaceConcatenate( + Stream* stream, port::ArraySlice<dnn::BatchDescriptor> input_dimensions, + port::ArraySlice<const DeviceMemory<float>*> input_data, + DeviceMemory<float>* output_data, + dnn::SpaceConcatenateMode concat_direction) { + return false; + } + + // Change the layout of the data by shrinking one dimension (or set of + // dimensions) and growing another dimension (or set of dimensions), while + // keeping the total number of data elements constant, and maintaining the + // current data ordering. + // + // Currently, the only supported operation is depth into space by a power of + // 2. E.g. (y, x, z) -> (y*2, x*2, z/4) + // + // Note that Reshape may not be a no-op, depending on the platform and which + // dimensions are being changed. + // + // Example: forgetting about batch for the moment, let's take a tensor that's + // 2x1x8 (y by x by z) and reshape to a tensor that's 4x2x2. The memory layout + // is row-major order: y,x,z. I.e. z changes the fastest, then x, then y. The + // elements of the tensor range from 0 to 15. The x,y,z indices are below each + // element. + // + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + // y0 y0 y0 y0 y0 y0 y0 y0 y1 y1 y1 y1 y1 y1 y1 y1 + // x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 x0 + // z0 z1 z2 z3 z4 z5 z6 z7 z0 z1 z2 z3 z4 z5 z6 z7 + // + // reshape to 4x2x2 + // + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + // y0 y0 y0 y0 y1 y1 y1 y1 y2 y2 y2 y2 y3 y3 y3 y3 + // x0 x0 x1 x1 x0 x0 x1 x1 x0 x0 x1 x1 x0 x0 x1 x1 + // z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 z0 z1 + virtual bool DoReshape(Stream* stream, + const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<float>& input_data, + const dnn::BatchDescriptor& output_dimensions, + DeviceMemory<float>* output_data) { + return false; + } + + // Depth to space takes an X by Y image with depth D*M² and changes it to an + // MX x MY image with depth D. Each input location (x,y) with depth D*M² in + // the input image is changed to an MxM contiguous area in the output image, + // with the values being laid out in the raster order by DepthToSpaceLayout, + // and will have a new depth of D. + // + // Example. + // M=2, Din =8, Xin=2, Yin=2. Xout=4, Yout=4, Dout=2 + // DepthHeightWidth layout + // Values within a 'cell' are at different depths and same x & y. + // Input: + // abcdefgh ijklmnop + // qrstuvwx yz012345 + // Output: + // ae bf im jn + // cg dh ko lp + // qu rv y2 z3 + // sw tx 04 15 + // + // sqrt_depth_reduction: 'M' in the comment above + virtual bool DoDepthToSpace(Stream* stream, + const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<float>& input_data, + const DepthToSpaceLayout& depth_to_space_layout, + const int& sqrt_depth_reduction, + DeviceMemory<float>* output_data) { + return false; + } + + // Space to depth is the inverse of depth to space. Space to depth takes each + // non-overlapping M by M patch (in the X and Y dimensions) with depth D of + // the input, and transforms it to a 1 by 1 patch with depth D*M². If the + // input has size (MX, MY, D), the output has size (X, Y, D*M²). The number of + // data elements is not changed. + // + // Example. + // M=2, Din =2, Xin=4, Yin=4, Dout=8 + // DepthHeightWidth layout + // Values within a 'cell' are at different depths and same x & y. + // Input: + // ae bf im jn + // cg dh ko lp + // qu rv y2 z3 + // sw tx 04 15 + // Output: + // abcdefgh ijklmnop + // qrstuvwx yz012345 + // + // sqrt_depth_increase: 'M' in the comment above + virtual bool DoSpaceToDepth(Stream* stream, + const dnn::BatchDescriptor& input_dimensions, + const DeviceMemory<float>& input_data, + const DepthToSpaceLayout& space_to_depth_layout, + const int& sqrt_depth_increase, + DeviceMemory<float>* output_data) { + return false; + } + // Computes the specified operation (e.g. addition or multiplication) // between corresponding elements in the inputs and stores the result in the // output element. @@ -1342,6 +1482,37 @@ class DnnSupport { const dnn::BatchDescriptor& output_dimensions, DeviceMemory<float>* output_data) = 0; + // Computes the specified operation (e.g. addition or multiplication) + // between corresponding elements in the inputs and stores the result in the + // output element. Each input is multiplied by a scalar constant and the + // result is divided by a scalar constant. + // e.g. To perform Z = 0.9*X + 1.1*Y, set the input multiplicands to 9 and 11 + // and the output divisor to 10. + // The inputs and output must all have the same dimensions, but may have + // different quantization parameters (min_value and max_value). + // + // Arguments (all borrowed): + // stream: borrowed pointer to the stream that the 'elementwise operation' + // should be enqueued onto. + // operation: The operation to perform. + // input_multiplicands: Amount to scale each input. + // output_divisor: Amount to divide the output. + // input_dimensions: The dimensions of each input. + // input_data: un-owned device memory region which contains the + // input data for each input layer. + // output_dimensions: The dimensions of the output. + // output_data: un-owned device memory region in which to place the + // operation result. + virtual bool DoElementwiseOperateScaledQuantized( + Stream* stream, ElementwiseOperation operation, + port::ArraySlice<int> input_multiplicands, int output_divisor, + port::ArraySlice<dnn::BatchDescriptor> input_dimensions, + port::ArraySlice<const DeviceMemory<float>*> input_data, + const dnn::BatchDescriptor& output_dimensions, + DeviceMemory<float>* output_data) { + return false; + } + // Pads the input with zeros in the X and Y dimensions. The feature_map // dimension is unchanged. // @@ -1382,6 +1553,43 @@ class DnnSupport { int64 left_trim, int64 right_trim, int64 top_trim, int64 bottom_trim, DeviceMemory<float> *output_data) = 0; + // Grows the input tensor by replicating the X and Y dimensions. The batch and + // depth/feature_map dimensions are unchanged. Currently, the input tensor is + // limited to X=1 and Y=1. + // + // For example, the input has dimensions x=2, y=3, and replicate_x=3, + // replicate_y=2. The diagonal elements of the output would be: [x0y0, x1y1, + // x0y2, x1y0, x0y1, x1y2]. + // Here is the example as a picture. input: + // AB + // CD + // EF + // broadcast result: + // ABABAB + // CDCDCD + // EFEFEF + // ABABAB + // CDCDCD + // EFEFEF + // + // Arguments (all borrowed): + // stream: borrowed pointer to the stream that the 'elementwise operation' + // should be enqueued onto. + // dimensions: The dimensions of the input. + // input_data: un-owned device memory region which contains the + // input data for the input layer. + // replicate_x: Amount to replicate the input's X dimension. + // replicate_y: Amount to replicate the input's Y dimension. + // output_data: un-owned device memory region in which to place the + // padded result. + virtual bool DoXYBroadcast(Stream* stream, + const dnn::BatchDescriptor& dimensions, + const DeviceMemory<float>& input_data, + int64 replicate_x, int64 replicate_y, + DeviceMemory<float>* output_data) { + return false; + } + // Enqueues an asynchronous memcpy of the *quantized* output of a layer (that // is, bytes instead of scaled floats) into 'host_dst' if they are available // for the underlying DNN implementation. If this quantized output is not @@ -1425,6 +1633,21 @@ class DnnSupport { QuantizedActivationMode mode, DeviceMemory<float>* gpu_unquantized_dst) = 0; + // Enqueues an asynchronous copy of the contents of buffer_src to + // gpu_unquantized_dst. + virtual bool DoCopyHostBuffer2Device( + Stream* stream, HostBuffer* buffer_src, + DeviceMemory<float>* gpu_unquantized_dst) { + return false; + } + + // Enqueues an asynchronous copy of the contents of gpu_unquantized_src to + // buffer_dst. + virtual bool DoCopyDevice2HostBuffer( + Stream* stream, const DeviceMemory<float>& gpu_unquantized_src, + HostBuffer* buffer_dst) { + return false; + } // Create an RNN descriptor based on model shapes and configurations. // The caller retains the ownership of the descriptor. diff --git a/tensorflow/stream_executor/host/host_gpu_executor.cc b/tensorflow/stream_executor/host/host_gpu_executor.cc index ff07432bb7..830bc9a681 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.cc +++ b/tensorflow/stream_executor/host/host_gpu_executor.cc @@ -129,23 +129,24 @@ bool HostExecutor::Memset32(Stream *stream, DeviceMemoryBase *location, return true; } -bool HostExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, - const void *host_src, uint64 size) { +port::Status HostExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, + const void *host_src, + uint64 size) { memcpy(gpu_dst->opaque(), host_src, size); - return true; + return port::Status::OK(); } -bool HostExecutor::SynchronousMemcpy(void *host_dst, - const DeviceMemoryBase &gpu_src, - uint64 size) { +port::Status HostExecutor::SynchronousMemcpy(void *host_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) { memcpy(host_dst, gpu_src.opaque(), size); - return true; + return port::Status::OK(); } -bool HostExecutor::SynchronousMemcpyDeviceToDevice( +port::Status HostExecutor::SynchronousMemcpyDeviceToDevice( DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, uint64 size) { memcpy(gpu_dst->opaque(), gpu_src.opaque(), size); - return true; + return port::Status::OK(); } bool HostExecutor::HostCallback(Stream *stream, diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h index f217f7947f..77b07e4a57 100644 --- a/tensorflow/stream_executor/host/host_gpu_executor.h +++ b/tensorflow/stream_executor/host/host_gpu_executor.h @@ -95,13 +95,14 @@ class HostExecutor : public internal::StreamExecutorInterface { bool SynchronousMemSet(DeviceMemoryBase *location, int value, uint64 size) override; - bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src, - uint64 size) override; - bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, - uint64 size) override; - bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, - const DeviceMemoryBase &gpu_src, - uint64 size) override; + port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst, + const void *host_src, uint64 size) override; + port::Status SynchronousMemcpy(void *host_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) override; + port::Status SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) override; bool HostCallback(Stream *stream, std::function<void()> callback) override; diff --git a/tensorflow/stream_executor/host_buffer.h b/tensorflow/stream_executor/host_buffer.h new file mode 100644 index 0000000000..8fa542e9ff --- /dev/null +++ b/tensorflow/stream_executor/host_buffer.h @@ -0,0 +1,48 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_ + +#include "tensorflow/stream_executor/dnn.h" + +namespace perftools { +namespace gputools { + +// A HostBuffer is a block of memory in host memory containing the data for a +// dnn::BatchDescriptor using a device-dependent memory layout. +// Derived classes provide methods to construct a HostBuffer for a specific +// device, and to copy data in and out of the buffer. +class HostBuffer { + public: + const dnn::BatchDescriptor& descriptor() const { return descriptor_; } + + // Returns a string describing the HostBuffer. + virtual string AsString() const = 0; + + protected: + // Construct a HostBuffer from the supplied dnn::BatchDescriptor. + explicit HostBuffer(const dnn::BatchDescriptor& descriptor) + : descriptor_(descriptor) {} + virtual ~HostBuffer() {} + + private: + const dnn::BatchDescriptor descriptor_; +}; + +} // namespace gputools +} // namespace perftools + +#endif // TENSORFLOW_STREAM_EXECUTOR_HOST_BUFFER_H_ diff --git a/tensorflow/stream_executor/lib/status.h b/tensorflow/stream_executor/lib/status.h index 493fc656e1..0aec2917dc 100644 --- a/tensorflow/stream_executor/lib/status.h +++ b/tensorflow/stream_executor/lib/status.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/stream_executor/lib/error.h" // IWYU pragma: export +#include "tensorflow/stream_executor/lib/stringpiece.h" #include "tensorflow/stream_executor/platform/logging.h" namespace perftools { @@ -33,6 +34,17 @@ using Status = tensorflow::Status; #define SE_ASSERT_OK(val) \ ASSERT_EQ(::perftools::gputools::port::Status::OK(), (val)) +// Define some canonical error helpers. +inline Status UnimplementedError(StringPiece message) { + return Status(error::UNIMPLEMENTED, message); +} +inline Status InternalError(StringPiece message) { + return Status(error::INTERNAL, message); +} +inline Status FailedPreconditionError(StringPiece message) { + return Status(error::FAILED_PRECONDITION, message); +} + } // namespace port } // namespace gputools } // namespace perftools diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 512e882cad..980d544b01 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/blas.h" +#include "tensorflow/stream_executor/host_buffer.h" #include "tensorflow/stream_executor/lib/stacktrace.h" #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/platform.h" @@ -85,6 +86,8 @@ string ToVlogString(const void *ptr) { return out.str(); } +string ToVlogString(const HostBuffer &buffer) { return buffer.AsString(); } + template <class T> string ToVlogString(const std::complex<T> &c) { // StrCat does not convert std::complex to text. @@ -149,6 +152,13 @@ string ToVlogString(port::MutableArraySlice<T> elements) { return ToVlogString(port::ArraySlice<T>(elements)); } +string ToVlogString(dnn::DepthToSpaceLayout depth_to_space_layout) { + switch (depth_to_space_layout) { + case dnn::DepthToSpaceLayout::DepthHeightWidth: + return "DepthToSpaceLayout::DepthHeightWidth"; + } +} + // Used together with PARAM to VLOG calls made to the stream. Intended // to be used like this: // @@ -299,10 +309,7 @@ Stream &Stream::ThenBatchNormalizationForward( saved_inv_var, is_training, std::move(var_to_inv_var), std::move(inv_var_to_var))); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -324,10 +331,7 @@ Stream &Stream::ThenBatchNormalizationBackward( this, y_backprop, x, scale, mean, variance, x_desc, scale_offset_desc, epsilon, x_backprop, scale_backprop, offset_backprop)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -355,10 +359,7 @@ Stream &Stream::ThenConvolveWithScratch( /*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(), nullptr)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -385,10 +386,7 @@ Stream &Stream::ThenConvolveWithScratch( /*scratch_allocator=*/scratch_allocator, dnn::AlgorithmConfig(), nullptr)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -419,10 +417,7 @@ Stream &Stream::ThenConvolveWithAlgorithm( SetError(); } } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -453,10 +448,7 @@ Stream &Stream::ThenConvolveWithAlgorithm( SetError(); } } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -497,10 +489,7 @@ Stream &Stream::ThenSeparableConvolve( depth_multiplier, first_weights, second_weights, convolution_descriptor, output_descriptor, output)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -528,10 +517,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch( backward_input_data, scratch_allocator, dnn::AlgorithmConfig(), nullptr)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -564,10 +550,7 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( SetError(); } } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -600,10 +583,7 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( SetError(); } } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -631,10 +611,7 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch( backward_input_data, scratch_allocator, dnn::AlgorithmConfig(), nullptr)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -676,10 +653,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch( backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(), nullptr)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -712,10 +686,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( SetError(); } } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -743,10 +714,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch( backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(), nullptr)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -779,10 +747,7 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( SetError(); } } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -817,10 +782,7 @@ Stream &Stream::ThenConvolveBackwardBiasImpl( bias_descriptor, backward_bias_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -866,10 +828,7 @@ Stream &Stream::ThenMatMul(const DeviceMemory<float> &input_data, CheckError(dnn->DoMatMul(this, input_data, weights, input_dimensions, output_dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -891,10 +850,7 @@ Stream &Stream::ThenMatMulQuantized( weight_scales, input_dimensions, output_dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -916,10 +872,7 @@ Stream &Stream::ThenMatMulQuantized( weight_scales, input_dimensions, output_dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -937,10 +890,7 @@ Stream &Stream::ThenBiasAdd(const DeviceMemory<float> &input_data, CheckError( dnn->DoBiasAdd(this, input_data, biases, dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -961,10 +911,7 @@ Stream &Stream::ThenPoolForward( input_data, output_dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -985,10 +932,7 @@ Stream &Stream::ThenPoolForward( input_data, output_dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1012,10 +956,7 @@ Stream &Stream::ThenPoolBackward( input_data, output_dimensions, output_data, input_diff_data, output_diff_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1039,10 +980,7 @@ Stream &Stream::ThenPoolBackward( input_data, output_dimensions, output_data, input_diff_data, output_diff_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1058,10 +996,7 @@ Stream &Stream::ThenNormalize( CheckError(dnn->DoNormalize(this, normalize_descriptor, input_data, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1079,10 +1014,7 @@ Stream &Stream::ThenNormalizeWithDimensions( CheckError(dnn->DoNormalizeWithDimensions( this, normalize_descriptor, dimensions, input_data, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1104,10 +1036,7 @@ Stream &Stream::ThenNormalizeBackwardWithDimensions( this, normalize_descriptor, dimensions, raw_data, normalized_data, normalized_variable_gradient, raw_variable_gradient)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1125,10 +1054,7 @@ Stream &Stream::ThenActivate(dnn::ActivationMode activation_mode, CheckError(dnn->DoActivate(this, activation_mode, dimensions, input_data, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1158,10 +1084,114 @@ Stream &Stream::ThenDepthConcatenate( CheckError(dnn->DoDepthConcatenate(this, input_dimensions, input_data, output_data)); } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenSpaceConcatenate( + port::ArraySlice<dnn::BatchDescriptor> input_dimensions, + port::ArraySlice<const DeviceMemory<float> *> input_data, + DeviceMemory<float> *output_data, + dnn::SpaceConcatenateMode concat_direction) { + VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), PARAM(output_data)); + + // Check that the input dimensions of all the other batches match those of the + // first batch. + for (size_t i = 1; i < input_dimensions.size(); ++i) { + if ((concat_direction == dnn::SpaceConcatenateMode::XDirection) && + (input_dimensions[i].count() != input_dimensions[0].count() || + input_dimensions[i].height() != input_dimensions[0].height() || + input_dimensions[i].feature_map_count() != + input_dimensions[0].feature_map_count())) { SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + LOG(ERROR) << "Incompatible dimensions for X concatenation.\n" + << "input_dimensions[0]: " << input_dimensions[0].ToString() + << "input_dimensions[" << i + << "]: " << input_dimensions[i].ToString(); + return *this; + } + + if ((concat_direction == dnn::SpaceConcatenateMode::YDirection) && + (input_dimensions[i].count() != input_dimensions[0].count() || + input_dimensions[i].width() != input_dimensions[0].width() || + input_dimensions[i].feature_map_count() != + input_dimensions[0].feature_map_count())) { + SetError(); + LOG(ERROR) << "Incompatible dimensions for Y concatenation.\n" + << "input_dimensions[0]: " << input_dimensions[0].ToString() + << "input_dimensions[" << i + << "]: " << input_dimensions[i].ToString(); + return *this; + } + } + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoSpaceConcatenate(this, input_dimensions, input_data, + output_data, concat_direction)); + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenReshape(const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<float> &input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory<float> *output_data) { + VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), + PARAM(output_dimensions), PARAM(output_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoReshape(this, input_dimensions, input_data, + output_dimensions, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenDepthToSpace( + const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<float> &input_data, + const dnn::DepthToSpaceLayout &depth_to_space_layout, + const int sqrt_depth_reduction, DeviceMemory<float> *output_data) { + VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), + PARAM(depth_to_space_layout), PARAM(sqrt_depth_reduction), + PARAM(output_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoDepthToSpace(this, input_dimensions, input_data, + depth_to_space_layout, + sqrt_depth_reduction, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenSpaceToDepth( + const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<float> &input_data, + const dnn::DepthToSpaceLayout &space_to_depth_layout, + const int sqrt_depth_increase, DeviceMemory<float> *output_data) { + VLOG_CALL(PARAM(input_dimensions), PARAM(input_data), + PARAM(space_to_depth_layout), PARAM(sqrt_depth_increase), + PARAM(output_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoSpaceToDepth(this, input_dimensions, input_data, + space_to_depth_layout, sqrt_depth_increase, + output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1182,10 +1212,30 @@ Stream &Stream::ThenElementwiseOperate( input_data, output_dimensions, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenElementwiseOperateScaledQuantized( + dnn::ElementwiseOperation operation, + port::ArraySlice<int> input_multiplicands, int output_divisor, + port::ArraySlice<dnn::BatchDescriptor> input_dimensions, + port::ArraySlice<const DeviceMemory<float> *> input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory<float> *output_data) { + VLOG_CALL(PARAM(operation), PARAM(input_multiplicands), PARAM(output_divisor), + PARAM(input_dimensions), PARAM(input_data), + PARAM(output_dimensions), PARAM(output_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoElementwiseOperateScaledQuantized( + this, operation, input_multiplicands, output_divisor, + input_dimensions, input_data, output_dimensions, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1204,10 +1254,7 @@ Stream &Stream::ThenXYPad(const dnn::BatchDescriptor &dimensions, CheckError(dnn->DoXYPad(this, dimensions, input_data, left_pad, right_pad, top_pad, bottom_pad, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1228,10 +1275,25 @@ Stream &Stream::ThenXYSlice(const dnn::BatchDescriptor &dimensions, right_trim, top_trim, bottom_trim, output_data)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenXYBroadcast(const dnn::BatchDescriptor &dimensions, + const DeviceMemory<float> &input_data, + int64 replicate_x, int64 replicate_y, + DeviceMemory<float> *output_data) { + VLOG_CALL(PARAM(dimensions), PARAM(input_data), PARAM(replicate_x), + PARAM(replicate_y), PARAM(output_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError(dnn->DoXYBroadcast(this, dimensions, input_data, replicate_x, + replicate_y, output_data)); + } else { + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1248,10 +1310,7 @@ Stream &Stream::ThenMemcpyD2HQuantized( CheckError(dnn->DoMemcpyD2HQuantized(this, gpu_unquantized_src, mode, host_dst, size)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); } } return *this; @@ -1268,10 +1327,37 @@ Stream &Stream::ThenMemcpyH2DQuantized( CheckError(dnn->DoMemcpyH2DQuantized(this, host_src, size, mode, gpu_unquantized_dst)); } else { - SetError(); - LOG(WARNING) - << "attempting to perform DNN operation using StreamExecutor " - "without DNN support"; + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenCopyHostBuffer2Device( + HostBuffer *buffer_src, DeviceMemory<float> *gpu_unquantized_dst) { + VLOG_CALL(PARAM(*buffer_src), PARAM(gpu_unquantized_dst)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError( + dnn->DoCopyHostBuffer2Device(this, buffer_src, gpu_unquantized_dst)); + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + +Stream &Stream::ThenCopyDevice2HostBuffer( + const DeviceMemory<float> &gpu_unquantized_src, HostBuffer *buffer_dst) { + VLOG_CALL(PARAM(gpu_unquantized_src), PARAM(*buffer_dst)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + CheckError( + dnn->DoCopyDevice2HostBuffer(this, gpu_unquantized_src, buffer_dst)); + } else { + SetErrorAndLogNoDnnSupport(); } } return *this; diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index 0d16495a1d..711eb3079a 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -499,6 +499,44 @@ class Stream { port::ArraySlice<const DeviceMemory<float> *> input_data, DeviceMemory<float> *output_data); + Stream &ThenSpaceConcatenate( + port::ArraySlice<dnn::BatchDescriptor> input_dimensions, + port::ArraySlice<const DeviceMemory<float> *> input_data, + DeviceMemory<float> *output_data, + dnn::SpaceConcatenateMode concat_direction); + + // Change the layout of the data by shrinking one dimension (or set of + // dimensions) and growing another dimension (or set of dimensions), while + // keeping the total number of data elements constant, and maintaining the + // current data ordering. + Stream &ThenReshape(const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<float> &input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory<float> *output_data); + + // Depth to space takes an X by Y image with depth D*M² and changes it to an + // MX x MY image with depth D. Each input location (x,y) with depth D*M² in + // the input image is changed to an MxM contiguous area in the output image, + // with the values being laid out in raster order specified by + // DepthToSpaceLayout, and will have a new depth of D. + // See the DoDepthToSpace comment for more information. + Stream &ThenDepthToSpace(const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<float> &input_data, + const dnn::DepthToSpaceLayout &depth_to_space_layout, + const int sqrt_depth_reduction, + DeviceMemory<float> *output_data); + + // Space to depth is the inverse of depth to space. Space to depth takes each + // non-overlapping M by M patch (in the X and Y dimensions) with depth D of + // the input, and transforms it to a 1 by 1 patch with depth D*M². If the + // input has size (MX, MY, D), the output has size (X, Y, D*M²). The number of + // data elements is not changed. + Stream &ThenSpaceToDepth(const dnn::BatchDescriptor &input_dimensions, + const DeviceMemory<float> &input_data, + const dnn::DepthToSpaceLayout &space_to_depth_layout, + const int sqrt_depth_increase, + DeviceMemory<float> *output_data); + Stream &ThenElementwiseOperate( dnn::ElementwiseOperation operation, port::ArraySlice<dnn::BatchDescriptor> input_dimensions, @@ -506,6 +544,14 @@ class Stream { const dnn::BatchDescriptor &output_dimensions, DeviceMemory<float> *output_data); + Stream &ThenElementwiseOperateScaledQuantized( + dnn::ElementwiseOperation operation, + port::ArraySlice<int> input_multiplicands, int output_divisor, + port::ArraySlice<dnn::BatchDescriptor> input_dimensions, + port::ArraySlice<const DeviceMemory<float> *> input_data, + const dnn::BatchDescriptor &output_dimensions, + DeviceMemory<float> *output_data); + Stream &ThenXYPad(const dnn::BatchDescriptor &dimensions, const DeviceMemory<float> &input_data, int64 left_pad, int64 right_pad, int64 top_pad, int64 bottom_pad, @@ -516,6 +562,14 @@ class Stream { int64 right_trim, int64 top_trim, int64 bottom_trim, DeviceMemory<float> *output_data); + // Grows the input tensor by replicating the X and Y dimensions. The batch and + // depth/feature_map dimensions are unchanged. Currently, the input tensor is + // limited to X=1 and Y=1. + Stream &ThenXYBroadcast(const dnn::BatchDescriptor &dimensions, + const DeviceMemory<float> &input_data, + int64 replicate_x, int64 replicate_y, + DeviceMemory<float> *output_data); + // See DnnSupport::DoMemcpyD2HQuantized. Stream &ThenMemcpyD2HQuantized(const DeviceMemory<float> &gpu_unquantized_src, dnn::QuantizedActivationMode mode, @@ -549,6 +603,14 @@ class Stream { Quantization<ElementType>::kModeId, gpu_unquantized_dst); } + // See DnnSupport::DoCopyHostBuffer2Device. + Stream &ThenCopyHostBuffer2Device(HostBuffer *buffer_src, + DeviceMemory<float> *gpu_unquantized_dst); + + // See DnnSupport::DoCopyDevice2HostBuffer. + Stream &ThenCopyDevice2HostBuffer( + const DeviceMemory<float> &gpu_unquantized_src, HostBuffer *buffer_dst); + ///////////////// // BLAS support @@ -1527,6 +1589,12 @@ class Stream { void SetError() { CheckError(false /* = operation_retcode */); } + void SetErrorAndLogNoDnnSupport() { + SetError(); + LOG(WARNING) << "attempting to perform DNN operation using StreamExecutor " + "without DNN support"; + } + // The StreamExecutor that supports the operation of this stream. StreamExecutor *parent_; diff --git a/tensorflow/stream_executor/stream_executor_internal.h b/tensorflow/stream_executor/stream_executor_internal.h index 57db7775a6..d6d55fd623 100644 --- a/tensorflow/stream_executor/stream_executor_internal.h +++ b/tensorflow/stream_executor/stream_executor_internal.h @@ -199,14 +199,14 @@ class StreamExecutorInterface { virtual bool SynchronousMemZero(DeviceMemoryBase *location, uint64 size) = 0; virtual bool SynchronousMemSet(DeviceMemoryBase *location, int value, uint64 size) = 0; - virtual bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, - const void *host_src, uint64 size) = 0; - virtual bool SynchronousMemcpy(void *host_dst, - const DeviceMemoryBase &gpu_src, - uint64 size) = 0; - virtual bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, - const DeviceMemoryBase &gpu_src, - uint64 size) = 0; + virtual port::Status SynchronousMemcpy(DeviceMemoryBase *gpu_dst, + const void *host_src, uint64 size) = 0; + virtual port::Status SynchronousMemcpy(void *host_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) = 0; + virtual port::Status SynchronousMemcpyDeviceToDevice( + DeviceMemoryBase *gpu_dst, const DeviceMemoryBase &gpu_src, + uint64 size) = 0; virtual bool MemZero(Stream *stream, DeviceMemoryBase *location, uint64 size) = 0; virtual bool Memset(Stream *stream, DeviceMemoryBase *location, diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc index 7739d31662..71a5a45b67 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.cc +++ b/tensorflow/stream_executor/stream_executor_pimpl.cc @@ -491,7 +491,12 @@ bool StreamExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, // Tracing overloaded methods is very difficult due to issues with type // inference on template args. Since use of these overloaded methods is // discouraged anyway, this isn't a huge deal. - return implementation_->SynchronousMemcpy(gpu_dst, host_src, size); + port::Status status = + implementation_->SynchronousMemcpy(gpu_dst, host_src, size); + if (!status.ok()) { + LOG(ERROR) << "synchronous memcpy: " << status; + } + return status.ok(); } bool StreamExecutor::SynchronousMemcpy(void *host_dst, @@ -501,7 +506,12 @@ bool StreamExecutor::SynchronousMemcpy(void *host_dst, << ", gpu_src=" << gpu_src.opaque() << ", size=" << size << ") D2H" << StackTraceIfVLOG10(); - return implementation_->SynchronousMemcpy(host_dst, gpu_src, size); + port::Status status = + implementation_->SynchronousMemcpy(host_dst, gpu_src, size); + if (!status.ok()) { + LOG(ERROR) << "synchronous memcpy: " << status; + } + return status.ok(); } bool StreamExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, @@ -511,8 +521,12 @@ bool StreamExecutor::SynchronousMemcpy(DeviceMemoryBase *gpu_dst, << gpu_dst->opaque() << ", gpu_src=" << gpu_src.opaque() << ", size=" << size << ") D2D" << StackTraceIfVLOG10(); - return implementation_->SynchronousMemcpyDeviceToDevice(gpu_dst, gpu_src, - size); + port::Status status = + implementation_->SynchronousMemcpyDeviceToDevice(gpu_dst, gpu_src, size); + if (!status.ok()) { + LOG(ERROR) << "synchronous memcpy: " << status; + } + return status.ok(); } port::Status StreamExecutor::SynchronousMemcpyD2H( @@ -525,13 +539,15 @@ port::Status StreamExecutor::SynchronousMemcpyD2H( SCOPED_TRACE(TraceListener::SynchronousMemcpyD2H, &result, gpu_src, size, host_dst); - if (!implementation_->SynchronousMemcpy(host_dst, gpu_src, size)) { + port::Status status = + implementation_->SynchronousMemcpy(host_dst, gpu_src, size); + if (!status.ok()) { return port::Status{ port::error::INTERNAL, port::Printf( "failed to synchronously memcpy device-to-host: GPU %p to host %p " - "size %lld", - gpu_src.opaque(), host_dst, size)}; + "size %lld: %s", + gpu_src.opaque(), host_dst, size, status.ToString().c_str())}; } return result; @@ -548,12 +564,15 @@ port::Status StreamExecutor::SynchronousMemcpyH2D(const void *host_src, SCOPED_TRACE(TraceListener::SynchronousMemcpyH2D, &result, host_src, size, gpu_dst); - if (!implementation_->SynchronousMemcpy(gpu_dst, host_src, size)) { + port::Status status = + implementation_->SynchronousMemcpy(gpu_dst, host_src, size); + if (!status.ok()) { result = port::Status{ port::error::INTERNAL, port::Printf("failed to synchronously memcpy host-to-device: host " - "%p to GPU %p size %lld", - host_src, gpu_dst->opaque(), size)}; + "%p to GPU %p size %lld: %s", + host_src, gpu_dst->opaque(), size, + status.ToString().c_str())}; } return result; diff --git a/tensorflow/tensorboard/BUILD b/tensorflow/tensorboard/BUILD index 21f6519cab..2887fb4362 100644 --- a/tensorflow/tensorboard/BUILD +++ b/tensorflow/tensorboard/BUILD @@ -30,6 +30,7 @@ py_binary( deps = [ "//tensorflow/python:platform", "//tensorflow/tensorboard/backend:server", + "@werkzeug", ], ) diff --git a/tensorflow/tensorboard/tensorboard.py b/tensorflow/tensorboard/tensorboard.py index 9adcee7e36..42d5aedced 100644 --- a/tensorflow/tensorboard/tensorboard.py +++ b/tensorflow/tensorboard/tensorboard.py @@ -23,6 +23,7 @@ from __future__ import print_function import os import socket +from werkzeug.serving import run_simple from tensorflow.python.platform import app from tensorflow.python.platform import flags diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 2a9fcae5e5..7fa7e4a91d 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -140,28 +140,27 @@ def tf_gen_op_libs(op_lib_names, deps=None): linkstatic=1,) def tf_gen_op_wrapper_cc(name, out_ops_file, pkg="", - op_gen="//tensorflow/cc:cc_op_gen_main"): + op_gen="//tensorflow/cc:cc_op_gen_main", + deps=None, + include_internal_ops=0): # Construct an op generator binary for these ops. tool = out_ops_file + "_gen_cc" + if deps == None: + deps = [pkg + ":" + name + "_op_lib"] native.cc_binary( name = tool, copts = tf_copts(), linkopts = ["-lm"], linkstatic = 1, # Faster to link this one-time-use binary dynamically - deps = ([op_gen, pkg + ":" + name + "_op_lib"]) + deps = [op_gen] + deps ) - # Run the op generator. - if name == "sendrecv_ops" or name == "function_ops": - include_internal = "1" - else: - include_internal = "0" native.genrule( name=name + "_genrule", outs=[out_ops_file + ".h", out_ops_file + ".cc"], tools=[":" + tool], cmd=("$(location :" + tool + ") $(location :" + out_ops_file + ".h) " + - "$(location :" + out_ops_file + ".cc) " + include_internal)) + "$(location :" + out_ops_file + ".cc) " + str(include_internal_ops))) # Given a list of "op_lib_names" (a list of files in the ops directory # without their .cc extensions), generate individual C++ .cc and .h @@ -192,11 +191,14 @@ def tf_gen_op_wrappers_cc(name, "//tensorflow/cc:const_op", ], op_gen="//tensorflow/cc:cc_op_gen_main", + include_internal_ops=0, visibility=None): subsrcs = other_srcs subhdrs = other_hdrs for n in op_lib_names: - tf_gen_op_wrapper_cc(n, "ops/" + n, pkg=pkg, op_gen=op_gen) + tf_gen_op_wrapper_cc( + n, "ops/" + n, pkg=pkg, op_gen=op_gen, + include_internal_ops=include_internal_ops) subsrcs += ["ops/" + n + ".cc"] subhdrs += ["ops/" + n + ".h"] diff --git a/tensorflow/tools/ci_build/builds/libtensorflow.sh b/tensorflow/tools/ci_build/builds/libtensorflow.sh index a9989fe504..683ab9f77b 100755 --- a/tensorflow/tools/ci_build/builds/libtensorflow.sh +++ b/tensorflow/tools/ci_build/builds/libtensorflow.sh @@ -46,20 +46,17 @@ function build_libtensorflow_tarball() { fi bazel clean --expunge yes "" | ./configure - - # TODO(ashankar): Once - # https://github.com/tensorflow/tensorflow/commit/1b32b698eddc10c0d85b0b8cf838f42023394de7 - # can be undone, i.e., when bazel supports pkg_tar with python3+ then all of this below - # can be replaced with something like: - # bazel build ${BAZEL_OPTS} //tensorflow/tools/lib_package:libtensorflow.tar.gz - - bazel build ${BAZEL_OPTS} //tensorflow:libtensorflow.so + + # Remove this test call when + # https://github.com/bazelbuild/bazel/issues/2352 + # and https://github.com/bazelbuild/bazel/issues/1580 + # have been resolved and the "manual" tags on the BUILD targets + # in tensorflow/tools/lib_package/BUILD are removed. + # Till then, must manually run the test. + bazel test ${BAZEL_OPTS} //tensorflow/tools/lib_package/... + + bazel build ${BAZEL_OPTS} //tensorflow/tools/lib_package:libtensorflow.tar.gz DIR=lib_package - rm -rf ${DIR} - mkdir -p ${DIR}/build/lib - mkdir -p ${DIR}/build/include/tensorflow/c - cp bazel-bin/tensorflow/libtensorflow.so ${DIR}/build/lib - cp tensorflow/c/c_api.h ${DIR}/build/include/tensorflow/c - tar -C ${DIR}/build -cvf ${DIR}/libtensorflow${TARBALL_SUFFIX}.tar.gz include/tensorflow/c/c_api.h lib/libtensorflow.so - rm -rf ${DIR}/build + mkdir -p ${DIR} + cp bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz ${DIR}/libtensorflow${TARBALL_SUFFIX}.tar.gz } diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 6f6684dcdf..46f97891d3 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -332,6 +332,11 @@ else EXTRA_ARGS="${TF_BUILD_APPEND_ARGUMENTS} --test_tag_filters=-benchmark-test" fi +# For any "tool" dependencies in genrules, Bazel will build them for host +# instead of the target configuration. We can save some build time by setting +# this flag, and it only affects a few tests. +EXTRA_ARGS="${EXTRA_ARGS} --distinct_host_configuration=false" + # Process PIP install-test option if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] || [[ ${TF_BUILD_IS_PIP} == "both" ]]; then diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index 0d890f5684..975a14e7d5 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -292,8 +292,8 @@ do_buildifier(){ } do_external_licenses_check(){ - echo "Running do_external_licenses_check" - echo "" + BUILD_TARGET="$1" + LICENSES_TARGET="$2" EXTERNAL_LICENSES_CHECK_START_TIME=$(date +'%s') @@ -302,8 +302,8 @@ do_external_licenses_check(){ MISSING_LICENSES_FILE="$(mktemp)_missing_licenses.log" EXTRA_LICENSES_FILE="$(mktemp)_extra_licenses.log" - echo "Getting external dependencies for //tensorflow/tools/pip_package:build_pip_package." - bazel query 'attr("licenses", "notice", deps(//tensorflow/tools/pip_package:build_pip_package))' --no_implicit_deps --no_host_deps --keep_going \ + echo "Getting external dependencies for ${BUILD_TARGET}" + bazel query "attr('licenses', 'notice', deps(${BUILD_TARGET}))" --no_implicit_deps --no_host_deps --keep_going \ | egrep -v "^//tensorflow" \ | sed -e 's|:.*||' \ | sort \ @@ -311,8 +311,8 @@ do_external_licenses_check(){ | tee ${EXTERNAL_DEPENDENCIES_FILE} echo - echo "Getting list of external licenses." - bazel query 'deps(//tensorflow/tools/pip_package:licenses)' --no_implicit_deps --no_host_deps --keep_going \ + echo "Getting list of external licenses mentioned in ${LICENSES_TARGET}." + bazel query "deps(${LICENSES_TARGET})" --no_implicit_deps --no_host_deps --keep_going \ | egrep -v "^//tensorflow" \ | sed -e 's|:.*||' \ | sort \ @@ -331,7 +331,7 @@ do_external_licenses_check(){ echo if [[ -s ${MISSING_LICENSES_FILE} ]] || [[ -s ${EXTRA_LICENSES_FILE} ]] ; then - echo "FAIL: pip package external dependencies vs licenses mismatch." + echo "FAIL: mismatch in packaged licenses and external dependencies" if [[ -s ${MISSING_LICENSES_FILE} ]] ; then echo "Missing the licenses for the following external dependencies:" cat ${MISSING_LICENSES_FILE} @@ -355,6 +355,21 @@ do_external_licenses_check(){ fi } +do_pip_package_licenses_check() { + echo "Running do_pip_package_licenses_check" + echo "" + do_external_licenses_check \ + "//tensorflow/tools/pip_package:build_pip_package" \ + "//tensorflow/tools/pip_package:licenses" +} + +do_lib_package_licenses_check() { + echo "Running do_lib_package_licenses_check" + echo "" + do_external_licenses_check \ + "//tensorflow:libtensorflow.so" \ + "//tensorflow/tools/lib_package:clicenses_generate" +} # Run bazel build --nobuild to test the validity of the BUILD files do_bazel_nobuild() { @@ -376,8 +391,8 @@ do_bazel_nobuild() { } # Supply all sanity step commands and descriptions -SANITY_STEPS=("do_pylint PYTHON2" "do_pylint PYTHON3" "do_buildifier" "do_bazel_nobuild" "do_external_licenses_check") -SANITY_STEPS_DESC=("Python 2 pylint" "Python 3 pylint" "buildifier check" "bazel nobuild" "external dependencies licenses check") +SANITY_STEPS=("do_pylint PYTHON2" "do_pylint PYTHON3" "do_buildifier" "do_bazel_nobuild" "do_pip_package_licenses_check" "do_lib_package_licenses_check") +SANITY_STEPS_DESC=("Python 2 pylint" "Python 3 pylint" "buildifier check" "bazel nobuild" "pip: license check for external dependencies" "C library: license check for external dependencies") INCREMENTAL_FLAG="" diff --git a/tensorflow/tools/ci_build/install/install_deb_packages.sh b/tensorflow/tools/ci_build/install/install_deb_packages.sh index 71e2a6c852..227b83ab9f 100755 --- a/tensorflow/tools/ci_build/install/install_deb_packages.sh +++ b/tensorflow/tools/ci_build/install/install_deb_packages.sh @@ -21,18 +21,11 @@ ubuntu_version=$(cat /etc/issue | grep -i ubuntu | awk '{print $2}' | \ # Install dependencies from ubuntu deb repository. apt-get update -set +e -ffmpeg_location=$(which ffmpeg) -if [[ -z "$ffmpeg_location" && "$ubuntu_version" == "14" ]]; then - set -e +if [[ "$ubuntu_version" == "14" ]]; then # specifically for trusty linked from ffmpeg.org add-apt-repository -y ppa:mc3man/trusty-media apt-get update apt-get dist-upgrade -y - apt-get install -y ffmpeg libav-tools -else - set -e - apt-get install -y ffmpeg libav-tools fi apt-get install -y --no-install-recommends \ @@ -41,6 +34,7 @@ apt-get install -y --no-install-recommends \ build-essential \ cmake \ curl \ + ffmpeg \ git \ libcurl4-openssl-dev \ libtool \ diff --git a/tensorflow/tools/graph_transforms/summarize_graph_main.cc b/tensorflow/tools/graph_transforms/summarize_graph_main.cc index 638296b923..55b55e0a15 100644 --- a/tensorflow/tools/graph_transforms/summarize_graph_main.cc +++ b/tensorflow/tools/graph_transforms/summarize_graph_main.cc @@ -65,7 +65,8 @@ Status SummarizeGraph(const GraphDef& graph) { MapNodesToOutputs(graph, &output_map); std::vector<const NodeDef*> outputs; for (const NodeDef& node : graph.node()) { - if (output_map.count(node.name()) == 0) { + if ((output_map.count(node.name()) == 0) && (node.op() != "Const") && + (node.op() != "Assign")) { outputs.push_back(&node); } } diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD new file mode 100644 index 0000000000..41e7221efe --- /dev/null +++ b/tensorflow/tools/lib_package/BUILD @@ -0,0 +1,107 @@ +# Packaging the TensorFlow C API into a small, standalone archive for use with +# language bindings and installations without Python. +# +# TODO(ashankar): Something similar for the JNI library for Java? +# TODO(ashankar): Something similar for the C++ API (caveat: ABI compatibility) + +package(default_visibility = ["//visibility:private"]) + +load("@bazel_tools//tools/build_defs/pkg:pkg.bzl", "pkg_tar") + +pkg_tar( + name = "libtensorflow", + extension = "tar.gz", + # Mark as "manual" till + # https://github.com/bazelbuild/bazel/issues/2352 + # and https://github.com/bazelbuild/bazel/issues/1580 + # are resolved, otherwise these rules break when built + # with Python 3. + tags = ["manual"], + deps = [ + ":cheaders", + ":clib", + ":clicenses", + ], +) + +pkg_tar( + name = "cheaders", + files = ["//tensorflow/c:headers"], + package_dir = "include/tensorflow/c", + # Mark as "manual" till + # https://github.com/bazelbuild/bazel/issues/2352 + # and https://github.com/bazelbuild/bazel/issues/1580 + # are resolved, otherwise these rules break when built + # with Python 3. + tags = ["manual"], +) + +pkg_tar( + name = "clib", + files = ["//tensorflow:libtensorflow.so"], + package_dir = "lib", + # Mark as "manual" till + # https://github.com/bazelbuild/bazel/issues/2352 + # and https://github.com/bazelbuild/bazel/issues/1580 + # are resolved, otherwise these rules break when built + # with Python 3. + tags = ["manual"], +) + +pkg_tar( + name = "clicenses", + files = [":include/tensorflow/c/LICENSE"], + package_dir = "include/tensorflow/c", + # Mark as "manual" till + # https://github.com/bazelbuild/bazel/issues/2352 + # and https://github.com/bazelbuild/bazel/issues/1580 + # are resolved, otherwise these rules break when built + # with Python 3. + tags = ["manual"], +) + +genrule( + name = "clicenses_generate", + srcs = [ + "//third_party/hadoop:LICENSE.txt", + "//third_party/eigen3:LICENSE", + "@boringssl//:LICENSE", + "@com_googlesource_code_re2//:LICENSE", + "@curl//:COPYING", + "@eigen_archive//:COPYING.MPL2", + "@farmhash_archive//:COPYING", + "@gemmlowp//:LICENSE", + "@gif_archive//:COPYING", + "@grpc//:LICENSE", + "@highwayhash//:LICENSE", + "@jemalloc//:COPYING", + "@jpeg//:LICENSE.md", + "@libxsmm_archive//:LICENSE", + "@local_config_sycl//sycl:LICENSE.text", + "@nanopb_git//:LICENSE.txt", + "@png_archive//:LICENSE", + "@protobuf//:LICENSE", + "@zlib_archive//:zlib.h", + ], + outs = ["include/tensorflow/c/LICENSE"], + cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", + tools = [":concat_licenses.sh"], +) + +sh_test( + name = "libtensorflow_test", + size = "small", + srcs = ["libtensorflow_test.sh"], + data = [ + "libtensorflow_test.c", + ":libtensorflow.tar.gz", + ], + # Mark as "manual" till + # https://github.com/bazelbuild/bazel/issues/2352 + # and https://github.com/bazelbuild/bazel/issues/1580 + # are resolved, otherwise these rules break when built + # with Python 3. + # Till then, this test is explicitly executed when building + # the release by tensorflow/tools/ci_build/builds/libtensorflow.sh + tags = ["manual"], +) diff --git a/tensorflow/tools/lib_package/README.md b/tensorflow/tools/lib_package/README.md new file mode 100644 index 0000000000..fbec0a067a --- /dev/null +++ b/tensorflow/tools/lib_package/README.md @@ -0,0 +1,31 @@ +Bazel rules to package the TensorFlow C-library and [header +files](https://www.tensorflow.org/code/tensorflow/c/c_api.h) +into an archive. + +## TensorFlow C library + +The TensorFlow [C +API](https://www.tensorflow.org/code/tensorflow/c/c_api.h) +is typically a requirement of TensorFlow APIs in other languages such as +[Go](https://www.tensorflow.org/code/tensorflow/go) +and [Rust](https://github.com/tensorflow/rust). + +The command: + +```sh +bazel build -c opt //tensorflow/tools/lib_package:libtensorflow +``` + +produces `bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz`, which +can be distributed and installed using something like: + +```sh +tar -C /usr/local -xzf libtensorflow.tar.gz +``` + +## Release + +Scripts to generate archives using these rules for release are in +[tensorflow/tools/ci_build/linux](https://www.tensorflow.org/code/tensorflow/tools/ci_build/linux) +and +[tensorflow/tools/ci_build/osx](https://www.tensorflow.org/code/tensorflow/tools/ci_build/osx) diff --git a/tensorflow/tools/lib_package/concat_licenses.sh b/tensorflow/tools/lib_package/concat_licenses.sh new file mode 100755 index 0000000000..2070f64e9f --- /dev/null +++ b/tensorflow/tools/lib_package/concat_licenses.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script aimed to combining multiple license files into a single one. + +for f in $@ +do + echo "--------------------------------------------------------------------------------" + echo "BEGIN LICENSE FOR $f" + echo "--------------------------------------------------------------------------------" + cat $f + echo "--------------------------------------------------------------------------------" + echo "END LICENSE FOR $f" + echo "--------------------------------------------------------------------------------" +done diff --git a/tensorflow/tools/lib_package/libtensorflow_test.c b/tensorflow/tools/lib_package/libtensorflow_test.c new file mode 100644 index 0000000000..dff6fb77ec --- /dev/null +++ b/tensorflow/tools/lib_package/libtensorflow_test.c @@ -0,0 +1,28 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Companion source file for libtensorflow_test.sh + +#include <tensorflow/c/c_api.h> + +int main() { + TF_Status* s = TF_NewStatus(); + TF_SetStatus(s, TF_UNKNOWN, "Some error"); + if (TF_GetCode(s) != TF_UNKNOWN) { + return 1; + } + TF_DeleteStatus(s); + return 0; +} diff --git a/tensorflow/tools/lib_package/libtensorflow_test.sh b/tensorflow/tools/lib_package/libtensorflow_test.sh new file mode 100755 index 0000000000..6463ecea70 --- /dev/null +++ b/tensorflow/tools/lib_package/libtensorflow_test.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -ex + +# Sanity test for the package C-library archive. +# - Unarchive +# - Compile a trivial C file that uses the archive +# - Run it + +# Tools needed: A C-compiler and tar +CC="${CC}" +TAR="${TAR}" + +[ -z "${CC}" ] && CC="/usr/bin/gcc" +[ -z "${TAR}"] && TAR="tar" + +# bazel tests run with ${PWD} set to the root of the bazel workspace +TARFILE="${PWD}/tensorflow/tools/lib_package/libtensorflow.tar.gz" +CFILE="${PWD}/tensorflow/tools/lib_package/libtensorflow_test.c" + +cd ${TEST_TMPDIR} + +# Extract the archive into tensorflow/ +mkdir tensorflow +${TAR} -xzf ${TARFILE} -Ctensorflow + +# Compile the test .c file +${CC} ${CFILE} -Itensorflow/include -Ltensorflow/lib -ltensorflow -oa.out + +# Execute it, with the shared library available. +# DYLD_LIBRARY_PATH is used on OS X, LD_LIBRARY_PATH on Linux +export DYLD_LIBRARY_PATH=tensorflow/lib +export LD_LIBRARY_PATH=tensorflow/lib +./a.out diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 62fb9b9176..0ef09835e9 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -95,6 +95,7 @@ filegroup( "@png_archive//:LICENSE", "@protobuf//:LICENSE", "@six_archive//:LICENSE", + "@werkzeug//:LICENSE", "@zlib_archive//:zlib.h", ] + tf_additional_license_deps(), ) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a03e844ea2..1ad739d6cf 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -76,10 +76,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): native.new_http_archive( name = "libxsmm_archive", urls = [ - "https://github.com/hfp/libxsmm/archive/1.6.4.tar.gz", + # "http://bazel-mirror.storage.googleapis.com/github.com/hfp/libxsmm/archive/1.6.1.tar.gz", + "https://github.com/hfp/libxsmm/archive/1.6.5.tar.gz", ], - sha256 = "3788bf1cdb60f119f8a04ed7ed96861322e539ce2d2ea977f00431d6b2b80beb", - strip_prefix = "libxsmm-1.6.4", + sha256 = "5231419a8e13e7a6d286cf25d32a3aa75c443a625e5ea57024d36468bc3d5936", + strip_prefix = "libxsmm-1.6.5", build_file = str(Label("//third_party:libxsmm.BUILD")), ) @@ -191,6 +192,17 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): build_file = str(Label("//third_party:six.BUILD")), ) + native.new_http_archive( + name = "werkzeug", + urls = [ + "http://bazel-mirror.storage.googleapis.com/pypi.python.org/packages/b7/7f/44d3cfe5a12ba002b253f6985a4477edfa66da53787a2a838a40f6415263/Werkzeug-0.11.10.tar.gz", + "https://pypi.python.org/packages/b7/7f/44d3cfe5a12ba002b253f6985a4477edfa66da53787a2a838a40f6415263/Werkzeug-0.11.10.tar.gz", + ], + strip_prefix = "Werkzeug-0.11.10", + sha256 = "cc64dafbacc716cdd42503cf6c44cb5a35576443d82f29f6829e5c49264aeeee", + build_file = str(Label("//third_party:werkzeug.BUILD")), + ) + native.bind( name = "six", actual = "@six_archive//:six", @@ -314,7 +326,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror. # Switch to an official source of snapshots if/when possible. - native.new_http_archive( + temp_workaround_http_archive( name = "llvm", urls = [ "http://bazel-mirror.storage.googleapis.com/github.com/llvm-mirror/llvm/archive/4e9e4f277ad254e02a0cff33c61cd827e600da62.tar.gz", @@ -323,6 +335,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): sha256 = "ec67c57dfd85c2bb857fd13011c5c2aa3f1dc9f40c0a5bac13e78e76d6b61aa6", strip_prefix = "llvm-4e9e4f277ad254e02a0cff33c61cd827e600da62", build_file = str(Label("//third_party/llvm:llvm.BUILD")), + repository = tf_repo_name, ) native.new_http_archive( @@ -395,7 +408,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): actual = "@junit_jar//jar", ) - native.new_http_archive( + temp_workaround_http_archive( name = "jemalloc", urls = [ "http://bazel-mirror.storage.googleapis.com/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", @@ -404,4 +417,5 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", strip_prefix = "jemalloc-4.4.0", build_file = str(Label("//third_party:jemalloc.BUILD")), + repository = tf_repo_name, ) diff --git a/third_party/jemalloc.BUILD b/third_party/jemalloc.BUILD index 2496d12627..aabff39d7b 100644 --- a/third_party/jemalloc.BUILD +++ b/third_party/jemalloc.BUILD @@ -5,7 +5,7 @@ licenses(["notice"]) # BSD exports_files(["COPYING"]) -load("@//third_party:common.bzl", "template_rule") +load("@%ws%//third_party:common.bzl", "template_rule") cc_library( name = "jemalloc", diff --git a/third_party/llvm/llvm.BUILD b/third_party/llvm/llvm.BUILD index 0f7ef74545..330d8b79ce 100644 --- a/third_party/llvm/llvm.BUILD +++ b/third_party/llvm/llvm.BUILD @@ -7,18 +7,18 @@ licenses(["notice"]) exports_files(["LICENSE.TXT"]) load( - "@//third_party/llvm:llvm.bzl", + "@%ws%//third_party/llvm:llvm.bzl", "gentbl", "expand_cmake_vars", "llvm_target_cmake_vars", "cmake_var_string", ) load( - "@//third_party:common.bzl", + "@%ws%//third_party:common.bzl", "template_rule", ) -package(default_visibility = ["@//tensorflow/compiler/xla:internal"]) +package(default_visibility = ["@%ws%//tensorflow/compiler/xla:internal"]) llvm_host_triple = "x86_64-unknown-linux_gnu" @@ -147,7 +147,7 @@ darwin_cmake_vars = { # TODO(phawkins): use a better method to select the right host triple, rather # than hardcoding x86_64. all_cmake_vars = select({ - "@//tensorflow:darwin": cmake_var_string( + "@%ws%//tensorflow:darwin": cmake_var_string( cmake_vars + llvm_target_cmake_vars("X86", "x86_64-apple-darwin") + darwin_cmake_vars, ), diff --git a/third_party/werkzeug.BUILD b/third_party/werkzeug.BUILD new file mode 100644 index 0000000000..aaf1614bb9 --- /dev/null +++ b/third_party/werkzeug.BUILD @@ -0,0 +1,14 @@ +# Description: +# Werkzeug provides utilities for making WSGI applications + +licenses(["notice"]) # BSD 3-Clause + +exports_files(["LICENSE"]) + +# Note: this library includes test code. Consider creating a testonly target. +py_library( + name = "werkzeug", + srcs = glob(["werkzeug/werkzeug/*.py"]), + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], +) |